revm_bytecode/
iter.rs

1use crate::{opcode, Bytecode, OpCode};
2
3/// Iterator over opcodes in a bytecode, skipping immediates.
4///
5/// This allows you to iterate through the actual opcodes in the bytecode,
6/// without dealing with the immediate values that follow instructions.
7#[derive(Debug, Clone)]
8pub struct BytecodeIterator<'a> {
9    /// Start pointer of the bytecode. Only used to calculate [`position`](Self::position).
10    start: *const u8,
11    /// Iterator over the bytecode bytes.
12    bytes: core::slice::Iter<'a, u8>,
13}
14
15impl<'a> BytecodeIterator<'a> {
16    /// Creates a new iterator from a bytecode reference.
17    #[inline]
18    pub fn new(bytecode: &'a Bytecode) -> Self {
19        let bytes = match bytecode {
20            Bytecode::LegacyAnalyzed(_) => &bytecode.bytecode()[..],
21            Bytecode::Eip7702(_) => &[],
22        };
23        Self {
24            start: bytes.as_ptr(),
25            bytes: bytes.iter(),
26        }
27    }
28
29    /// Skips to the next opcode, taking into account PUSH instructions.
30    pub fn skip_to_next_opcode(&mut self) {
31        self.next();
32    }
33
34    /// Returns the remaining bytes in the bytecode as a slice.
35    #[inline]
36    pub fn as_slice(&self) -> &[u8] {
37        self.bytes.as_slice()
38    }
39
40    /// Returns the current position in the bytecode.
41    #[inline]
42    pub fn position(&self) -> usize {
43        (self.bytes.as_slice().as_ptr() as usize) - (self.start as usize)
44        // TODO: Use the following on 1.87
45        // SAFETY: `start` always points to the start of the bytecode.
46        // unsafe {
47        //     self.bytes
48        //         .as_slice()
49        //         .as_ptr()
50        //         .offset_from_unsigned(self.start)
51        // }
52    }
53
54    #[inline]
55    fn skip_immediate(&mut self, opcode: u8) {
56        // Get base immediate size from opcode info
57        let immediate_size = opcode::OPCODE_INFO[opcode as usize]
58            .map(|info| info.immediate_size() as usize)
59            .unwrap_or_default();
60
61        // Advance the iterator by the immediate size
62        if immediate_size > 0 {
63            self.bytes = self
64                .bytes
65                .as_slice()
66                .get(immediate_size..)
67                .unwrap_or_default()
68                .iter();
69        }
70    }
71
72    /// Returns the current opcode without advancing the iterator.
73    #[inline]
74    pub fn peek(&self) -> Option<u8> {
75        self.bytes.as_slice().first().copied()
76    }
77
78    /// Returns the current opcode wrapped in OpCode without advancing the iterator.
79    #[inline]
80    pub fn peek_opcode(&self) -> Option<OpCode> {
81        self.peek().and_then(OpCode::new)
82    }
83}
84
85impl Iterator for BytecodeIterator<'_> {
86    type Item = u8;
87
88    #[inline]
89    fn next(&mut self) -> Option<Self::Item> {
90        self.bytes
91            .next()
92            .copied()
93            .inspect(|&current| self.skip_immediate(current))
94    }
95
96    #[inline]
97    fn size_hint(&self) -> (usize, Option<usize>) {
98        // Lower bound is 0 if empty, 1 if not empty as it depends on the bytes.
99        let byte_len = self.bytes.len();
100        (byte_len.min(1), Some(byte_len))
101    }
102}
103
104impl core::iter::FusedIterator for BytecodeIterator<'_> {}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109    use crate::LegacyRawBytecode;
110    use primitives::Bytes;
111
112    #[test]
113    fn test_simple_bytecode_iteration() {
114        // Create a simple bytecode: PUSH1 0x01 PUSH1 0x02 ADD STOP
115        let bytecode_data = vec![
116            opcode::PUSH1,
117            0x01,
118            opcode::PUSH1,
119            0x02,
120            opcode::ADD,
121            opcode::STOP,
122        ];
123        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
124        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
125        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
126        // We should only see the opcodes, not the immediates
127        assert_eq!(
128            opcodes,
129            vec![opcode::PUSH1, opcode::PUSH1, opcode::ADD, opcode::STOP]
130        );
131    }
132
133    #[test]
134    fn test_bytecode_with_various_push_sizes() {
135        // PUSH1 0x01, PUSH2 0x0203, PUSH3 0x040506, STOP
136        let bytecode_data = vec![
137            opcode::PUSH1,
138            0x01,
139            opcode::PUSH2,
140            0x02,
141            0x03,
142            opcode::PUSH3,
143            0x04,
144            0x05,
145            0x06,
146            opcode::STOP,
147        ];
148        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
149        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
150
151        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
152
153        // We should only see the opcodes, not the immediates
154        assert_eq!(
155            opcodes,
156            vec![opcode::PUSH1, opcode::PUSH2, opcode::PUSH3, opcode::STOP]
157        );
158    }
159
160    #[test]
161    fn test_bytecode_skips_immediates() {
162        // Create a bytecode with various PUSH operations
163        let bytecode_data = vec![
164            opcode::PUSH1,
165            0x01, // PUSH1 0x01
166            opcode::PUSH2,
167            0x02,
168            0x03,        // PUSH2 0x0203
169            opcode::ADD, // ADD
170            opcode::PUSH3,
171            0x04,
172            0x05,
173            0x06, // PUSH3 0x040506
174            opcode::PUSH32,
175            0x10,
176            0x11,
177            0x12,
178            0x13, // PUSH32 with 32 bytes of immediate data
179            0x14,
180            0x15,
181            0x16,
182            0x17,
183            0x18,
184            0x19,
185            0x1a,
186            0x1b,
187            0x1c,
188            0x1d,
189            0x1e,
190            0x1f,
191            0x20,
192            0x21,
193            0x22,
194            0x23,
195            0x24,
196            0x25,
197            0x26,
198            0x27,
199            0x28,
200            0x29,
201            0x2a,
202            0x2b,
203            0x2c,
204            0x2d,
205            0x2e,
206            0x2f,
207            opcode::MUL,  // MUL
208            opcode::STOP, // STOP
209        ];
210
211        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
212        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
213
214        // Use the iterator directly
215        let iter = BytecodeIterator::new(&bytecode);
216        let opcodes: Vec<u8> = iter.collect();
217
218        // Should only include the opcodes, not the immediates
219        assert_eq!(
220            opcodes,
221            vec![
222                opcode::PUSH1,
223                opcode::PUSH2,
224                opcode::ADD,
225                opcode::PUSH3,
226                opcode::PUSH32,
227                opcode::MUL,
228                opcode::STOP,
229            ]
230        );
231
232        // Use the method on the bytecode struct
233        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
234        assert_eq!(
235            opcodes,
236            vec![
237                opcode::PUSH1,
238                opcode::PUSH2,
239                opcode::ADD,
240                opcode::PUSH3,
241                opcode::PUSH32,
242                opcode::MUL,
243                opcode::STOP,
244            ]
245        );
246    }
247
248    #[test]
249    fn test_position_tracking() {
250        // PUSH1 0x01, PUSH1 0x02, ADD, STOP
251        let bytecode_data = vec![
252            opcode::PUSH1,
253            0x01,
254            opcode::PUSH1,
255            0x02,
256            opcode::ADD,
257            opcode::STOP,
258        ];
259        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
260        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
261
262        let mut iter = bytecode.iter_opcodes();
263
264        // Start at position 0
265        assert_eq!(iter.position(), 0);
266        assert_eq!(iter.next(), Some(opcode::PUSH1));
267        // After PUSH1, position should be 2 (PUSH1 + immediate)
268        assert_eq!(iter.position(), 2);
269
270        assert_eq!(iter.next(), Some(opcode::PUSH1));
271        // After second PUSH1, position should be 4 (2 + PUSH1 + immediate)
272        assert_eq!(iter.position(), 4);
273
274        assert_eq!(iter.next(), Some(opcode::ADD));
275        // After ADD, position should be 5 (4 + ADD)
276        assert_eq!(iter.position(), 5);
277
278        assert_eq!(iter.next(), Some(opcode::STOP));
279        // After STOP, position should be 6 (5 + STOP)
280        assert_eq!(iter.position(), 6);
281
282        // No more opcodes
283        assert_eq!(iter.next(), None);
284        assert_eq!(iter.position(), 6);
285    }
286
287    #[test]
288    fn test_empty_bytecode() {
289        // Empty bytecode (just STOP)
290        let bytecode_data = vec![opcode::STOP];
291        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
292        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
293
294        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
295        assert_eq!(opcodes, vec![opcode::STOP]);
296    }
297}