revm_bytecode/
iter.rs

1use crate::{opcode, Bytecode, OpCode};
2
3/// Iterator over opcodes in a bytecode, skipping immediates.
4///
5/// This allows you to iterate through the actual opcodes in the bytecode,
6/// without dealing with the immediate values that follow instructions.
7#[derive(Debug, Clone)]
8pub struct BytecodeIterator<'a> {
9    /// Iterator over the bytecode bytes.
10    bytes: core::slice::Iter<'a, u8>,
11    /// Start pointer of the bytecode. Only used to calculate [`position`](Self::position).
12    start: *const u8,
13}
14
15impl<'a> BytecodeIterator<'a> {
16    /// Creates a new iterator from a bytecode reference.
17    #[inline]
18    pub fn new(bytecode: &'a Bytecode) -> Self {
19        let bytes = match bytecode {
20            Bytecode::LegacyAnalyzed(_) => &bytecode.bytecode()[..],
21            Bytecode::Eip7702(_) => &[],
22        };
23        Self {
24            bytes: bytes.iter(),
25            start: bytes.as_ptr(),
26        }
27    }
28
29    /// Skips to the next opcode, taking into account PUSH instructions.
30    pub fn skip_to_next_opcode(&mut self) {
31        self.next();
32    }
33
34    /// Returns the remaining bytes in the bytecode as a slice.
35    #[inline]
36    pub fn as_slice(&self) -> &[u8] {
37        self.bytes.as_slice()
38    }
39
40    /// Returns the current position in the bytecode.
41    #[inline]
42    pub fn position(&self) -> usize {
43        // SAFETY: `start` always points to the start of the bytecode.
44        unsafe {
45            self.bytes
46                .as_slice()
47                .as_ptr()
48                .offset_from_unsigned(self.start)
49        }
50    }
51
52    #[inline]
53    fn skip_immediate(&mut self, opcode: u8) {
54        // Get base immediate size from opcode info
55        let immediate_size = opcode::OPCODE_INFO[opcode as usize]
56            .map(|info| info.immediate_size() as usize)
57            .unwrap_or_default();
58
59        // Advance the iterator by the immediate size
60        if immediate_size > 0 {
61            self.bytes = self
62                .bytes
63                .as_slice()
64                .get(immediate_size..)
65                .unwrap_or_default()
66                .iter();
67        }
68    }
69
70    /// Returns the current opcode without advancing the iterator.
71    #[inline]
72    pub fn peek(&self) -> Option<u8> {
73        self.bytes.as_slice().first().copied()
74    }
75
76    /// Returns the current opcode wrapped in OpCode without advancing the iterator.
77    #[inline]
78    pub fn peek_opcode(&self) -> Option<OpCode> {
79        self.peek().and_then(OpCode::new)
80    }
81}
82
83impl Iterator for BytecodeIterator<'_> {
84    type Item = u8;
85
86    #[inline]
87    fn next(&mut self) -> Option<Self::Item> {
88        self.bytes
89            .next()
90            .copied()
91            .inspect(|&current| self.skip_immediate(current))
92    }
93
94    #[inline]
95    fn size_hint(&self) -> (usize, Option<usize>) {
96        // Lower bound is 0 if empty, 1 if not empty as it depends on the bytes.
97        let byte_len = self.bytes.len();
98        (byte_len.min(1), Some(byte_len))
99    }
100}
101
102impl core::iter::FusedIterator for BytecodeIterator<'_> {}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107    use crate::LegacyRawBytecode;
108    use primitives::Bytes;
109
110    #[test]
111    fn test_simple_bytecode_iteration() {
112        // Create a simple bytecode: PUSH1 0x01 PUSH1 0x02 ADD STOP
113        let bytecode_data = vec![
114            opcode::PUSH1,
115            0x01,
116            opcode::PUSH1,
117            0x02,
118            opcode::ADD,
119            opcode::STOP,
120        ];
121        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
122        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
123        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
124        // We should only see the opcodes, not the immediates
125        assert_eq!(
126            opcodes,
127            vec![opcode::PUSH1, opcode::PUSH1, opcode::ADD, opcode::STOP]
128        );
129    }
130
131    #[test]
132    fn test_bytecode_with_various_push_sizes() {
133        // PUSH1 0x01, PUSH2 0x0203, PUSH3 0x040506, STOP
134        let bytecode_data = vec![
135            opcode::PUSH1,
136            0x01,
137            opcode::PUSH2,
138            0x02,
139            0x03,
140            opcode::PUSH3,
141            0x04,
142            0x05,
143            0x06,
144            opcode::STOP,
145        ];
146        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
147        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
148
149        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
150
151        // We should only see the opcodes, not the immediates
152        assert_eq!(
153            opcodes,
154            vec![opcode::PUSH1, opcode::PUSH2, opcode::PUSH3, opcode::STOP]
155        );
156    }
157
158    #[test]
159    fn test_bytecode_skips_immediates() {
160        // Create a bytecode with various PUSH operations
161        let bytecode_data = vec![
162            opcode::PUSH1,
163            0x01, // PUSH1 0x01
164            opcode::PUSH2,
165            0x02,
166            0x03,        // PUSH2 0x0203
167            opcode::ADD, // ADD
168            opcode::PUSH3,
169            0x04,
170            0x05,
171            0x06, // PUSH3 0x040506
172            opcode::PUSH32,
173            0x10,
174            0x11,
175            0x12,
176            0x13, // PUSH32 with 32 bytes of immediate data
177            0x14,
178            0x15,
179            0x16,
180            0x17,
181            0x18,
182            0x19,
183            0x1a,
184            0x1b,
185            0x1c,
186            0x1d,
187            0x1e,
188            0x1f,
189            0x20,
190            0x21,
191            0x22,
192            0x23,
193            0x24,
194            0x25,
195            0x26,
196            0x27,
197            0x28,
198            0x29,
199            0x2a,
200            0x2b,
201            0x2c,
202            0x2d,
203            0x2e,
204            0x2f,
205            opcode::MUL,  // MUL
206            opcode::STOP, // STOP
207        ];
208
209        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
210        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
211
212        // Use the iterator directly
213        let iter = BytecodeIterator::new(&bytecode);
214        let opcodes: Vec<u8> = iter.collect();
215
216        // Should only include the opcodes, not the immediates
217        assert_eq!(
218            opcodes,
219            vec![
220                opcode::PUSH1,
221                opcode::PUSH2,
222                opcode::ADD,
223                opcode::PUSH3,
224                opcode::PUSH32,
225                opcode::MUL,
226                opcode::STOP,
227            ]
228        );
229
230        // Use the method on the bytecode struct
231        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
232        assert_eq!(
233            opcodes,
234            vec![
235                opcode::PUSH1,
236                opcode::PUSH2,
237                opcode::ADD,
238                opcode::PUSH3,
239                opcode::PUSH32,
240                opcode::MUL,
241                opcode::STOP,
242            ]
243        );
244    }
245
246    #[test]
247    fn test_position_tracking() {
248        // PUSH1 0x01, PUSH1 0x02, ADD, STOP
249        let bytecode_data = vec![
250            opcode::PUSH1,
251            0x01,
252            opcode::PUSH1,
253            0x02,
254            opcode::ADD,
255            opcode::STOP,
256        ];
257        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
258        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
259
260        let mut iter = bytecode.iter_opcodes();
261
262        // Start at position 0
263        assert_eq!(iter.position(), 0);
264        assert_eq!(iter.next(), Some(opcode::PUSH1));
265        // After PUSH1, position should be 2 (PUSH1 + immediate)
266        assert_eq!(iter.position(), 2);
267
268        assert_eq!(iter.next(), Some(opcode::PUSH1));
269        // After second PUSH1, position should be 4 (2 + PUSH1 + immediate)
270        assert_eq!(iter.position(), 4);
271
272        assert_eq!(iter.next(), Some(opcode::ADD));
273        // After ADD, position should be 5 (4 + ADD)
274        assert_eq!(iter.position(), 5);
275
276        assert_eq!(iter.next(), Some(opcode::STOP));
277        // After STOP, position should be 6 (5 + STOP)
278        assert_eq!(iter.position(), 6);
279
280        // No more opcodes
281        assert_eq!(iter.next(), None);
282        assert_eq!(iter.position(), 6);
283    }
284
285    #[test]
286    fn test_empty_bytecode() {
287        // Empty bytecode (just STOP)
288        let bytecode_data = vec![opcode::STOP];
289        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
290        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
291
292        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
293        assert_eq!(opcodes, vec![opcode::STOP]);
294    }
295}