revm_bytecode/
iter.rs

1use crate::{opcode, Bytecode, OpCode};
2
3/// Iterator over opcodes in a bytecode, skipping immediates.
4///
5/// This allows you to iterate through the actual opcodes in the bytecode,
6/// without dealing with the immediate values that follow instructions.
7#[derive(Debug, Clone)]
8pub struct BytecodeIterator<'a> {
9    /// Start pointer of the bytecode. Only used to calculate [`position`](Self::position).
10    start: *const u8,
11    /// Iterator over the bytecode bytes.
12    bytes: core::slice::Iter<'a, u8>,
13}
14
15impl<'a> BytecodeIterator<'a> {
16    /// Creates a new iterator from a bytecode reference.
17    #[inline]
18    pub fn new(bytecode: &'a Bytecode) -> Self {
19        let bytes = match bytecode {
20            Bytecode::LegacyAnalyzed(_) | Bytecode::Eof(_) => &bytecode.bytecode()[..],
21            Bytecode::Eip7702(_) => &[],
22        };
23        Self {
24            start: bytes.as_ptr(),
25            bytes: bytes.iter(),
26        }
27    }
28
29    /// Skips to the next opcode, taking into account PUSH instructions.
30    pub fn skip_to_next_opcode(&mut self) {
31        self.next();
32    }
33
34    /// Returns the remaining bytes in the bytecode as a slice.
35    #[inline]
36    pub fn as_slice(&self) -> &[u8] {
37        self.bytes.as_slice()
38    }
39
40    /// Returns the current position in the bytecode.
41    #[inline]
42    pub fn position(&self) -> usize {
43        (self.bytes.as_slice().as_ptr() as usize) - (self.start as usize)
44        // TODO: Use the following on 1.87
45        // SAFETY: `start` always points to the start of the bytecode.
46        // unsafe {
47        //     self.bytes
48        //         .as_slice()
49        //         .as_ptr()
50        //         .offset_from_unsigned(self.start)
51        // }
52    }
53
54    #[inline]
55    fn skip_immediate(&mut self, opcode: u8) {
56        // Get base immediate size from opcode info
57        let mut immediate_size = opcode::OPCODE_INFO[opcode as usize]
58            .map(|info| info.immediate_size() as usize)
59            .unwrap_or_default();
60
61        // Special handling for RJUMPV which has variable immediates
62        if opcode == opcode::RJUMPV {
63            if let Some(max_index) = self.peek() {
64                immediate_size += (max_index as usize + 1) * 2;
65            }
66        }
67
68        // Advance the iterator by the immediate size
69        if immediate_size > 0 {
70            self.bytes = self
71                .bytes
72                .as_slice()
73                .get(immediate_size..)
74                .unwrap_or_default()
75                .iter();
76        }
77    }
78
79    /// Returns the current opcode without advancing the iterator.
80    #[inline]
81    pub fn peek(&self) -> Option<u8> {
82        self.bytes.as_slice().first().copied()
83    }
84
85    /// Returns the current opcode wrapped in OpCode without advancing the iterator.
86    #[inline]
87    pub fn peek_opcode(&self) -> Option<OpCode> {
88        self.peek().and_then(OpCode::new)
89    }
90}
91
92impl Iterator for BytecodeIterator<'_> {
93    type Item = u8;
94
95    #[inline]
96    fn next(&mut self) -> Option<Self::Item> {
97        self.bytes
98            .next()
99            .copied()
100            .inspect(|&current| self.skip_immediate(current))
101    }
102
103    #[inline]
104    fn size_hint(&self) -> (usize, Option<usize>) {
105        // Lower bound is 0 if empty, 1 if not empty as it depends on the bytes.
106        let byte_len = self.bytes.len();
107        (byte_len.min(1), Some(byte_len))
108    }
109}
110
111impl core::iter::FusedIterator for BytecodeIterator<'_> {}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use crate::LegacyRawBytecode;
117    use primitives::Bytes;
118
119    #[test]
120    fn test_simple_bytecode_iteration() {
121        // Create a simple bytecode: PUSH1 0x01 PUSH1 0x02 ADD STOP
122        let bytecode_data = vec![
123            opcode::PUSH1,
124            0x01,
125            opcode::PUSH1,
126            0x02,
127            opcode::ADD,
128            opcode::STOP,
129        ];
130        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
131        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
132        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
133        // We should only see the opcodes, not the immediates
134        assert_eq!(
135            opcodes,
136            vec![opcode::PUSH1, opcode::PUSH1, opcode::ADD, opcode::STOP]
137        );
138    }
139
140    #[test]
141    fn test_bytecode_with_various_push_sizes() {
142        // PUSH1 0x01, PUSH2 0x0203, PUSH3 0x040506, STOP
143        let bytecode_data = vec![
144            opcode::PUSH1,
145            0x01,
146            opcode::PUSH2,
147            0x02,
148            0x03,
149            opcode::PUSH3,
150            0x04,
151            0x05,
152            0x06,
153            opcode::STOP,
154        ];
155        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
156        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
157
158        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
159
160        // We should only see the opcodes, not the immediates
161        assert_eq!(
162            opcodes,
163            vec![opcode::PUSH1, opcode::PUSH2, opcode::PUSH3, opcode::STOP]
164        );
165    }
166
167    #[test]
168    fn test_bytecode_skips_immediates() {
169        // Create a bytecode with various PUSH operations
170        let bytecode_data = vec![
171            opcode::PUSH1,
172            0x01, // PUSH1 0x01
173            opcode::PUSH2,
174            0x02,
175            0x03,        // PUSH2 0x0203
176            opcode::ADD, // ADD
177            opcode::PUSH3,
178            0x04,
179            0x05,
180            0x06, // PUSH3 0x040506
181            opcode::PUSH32,
182            0x10,
183            0x11,
184            0x12,
185            0x13, // PUSH32 with 32 bytes of immediate data
186            0x14,
187            0x15,
188            0x16,
189            0x17,
190            0x18,
191            0x19,
192            0x1a,
193            0x1b,
194            0x1c,
195            0x1d,
196            0x1e,
197            0x1f,
198            0x20,
199            0x21,
200            0x22,
201            0x23,
202            0x24,
203            0x25,
204            0x26,
205            0x27,
206            0x28,
207            0x29,
208            0x2a,
209            0x2b,
210            0x2c,
211            0x2d,
212            0x2e,
213            0x2f,
214            opcode::MUL,  // MUL
215            opcode::STOP, // STOP
216        ];
217
218        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
219        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
220
221        // Use the iterator directly
222        let iter = BytecodeIterator::new(&bytecode);
223        let opcodes: Vec<u8> = iter.collect();
224
225        // Should only include the opcodes, not the immediates
226        assert_eq!(
227            opcodes,
228            vec![
229                opcode::PUSH1,
230                opcode::PUSH2,
231                opcode::ADD,
232                opcode::PUSH3,
233                opcode::PUSH32,
234                opcode::MUL,
235                opcode::STOP,
236            ]
237        );
238
239        // Use the method on the bytecode struct
240        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
241        assert_eq!(
242            opcodes,
243            vec![
244                opcode::PUSH1,
245                opcode::PUSH2,
246                opcode::ADD,
247                opcode::PUSH3,
248                opcode::PUSH32,
249                opcode::MUL,
250                opcode::STOP,
251            ]
252        );
253    }
254
255    #[test]
256    fn test_position_tracking() {
257        // PUSH1 0x01, PUSH1 0x02, ADD, STOP
258        let bytecode_data = vec![
259            opcode::PUSH1,
260            0x01,
261            opcode::PUSH1,
262            0x02,
263            opcode::ADD,
264            opcode::STOP,
265        ];
266        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
267        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
268
269        let mut iter = bytecode.iter_opcodes();
270
271        // Start at position 0
272        assert_eq!(iter.position(), 0);
273        assert_eq!(iter.next(), Some(opcode::PUSH1));
274        // After PUSH1, position should be 2 (PUSH1 + immediate)
275        assert_eq!(iter.position(), 2);
276
277        assert_eq!(iter.next(), Some(opcode::PUSH1));
278        // After second PUSH1, position should be 4 (2 + PUSH1 + immediate)
279        assert_eq!(iter.position(), 4);
280
281        assert_eq!(iter.next(), Some(opcode::ADD));
282        // After ADD, position should be 5 (4 + ADD)
283        assert_eq!(iter.position(), 5);
284
285        assert_eq!(iter.next(), Some(opcode::STOP));
286        // After STOP, position should be 6 (5 + STOP)
287        assert_eq!(iter.position(), 6);
288
289        // No more opcodes
290        assert_eq!(iter.next(), None);
291        assert_eq!(iter.position(), 6);
292    }
293
294    #[test]
295    fn test_empty_bytecode() {
296        // Empty bytecode (just STOP)
297        let bytecode_data = vec![opcode::STOP];
298        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
299        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
300
301        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
302        assert_eq!(opcodes, vec![opcode::STOP]);
303    }
304}