Skip to main content

revm_bytecode/
iter.rs

1use crate::{opcode, Bytecode, OpCode};
2
3/// Iterator over opcodes in a bytecode, skipping immediates.
4///
5/// This allows you to iterate through the actual opcodes in the bytecode,
6/// without dealing with the immediate values that follow instructions.
7#[derive(Debug, Clone)]
8pub struct BytecodeIterator<'a> {
9    /// Iterator over the bytecode bytes.
10    bytes: core::slice::Iter<'a, u8>,
11    /// Start pointer of the bytecode. Only used to calculate [`position`](Self::position).
12    start: *const u8,
13}
14
15impl<'a> BytecodeIterator<'a> {
16    /// Creates a new iterator from a bytecode reference.
17    #[inline]
18    pub fn new(bytecode: &'a Bytecode) -> Self {
19        let bytes = if bytecode.is_legacy() {
20            &bytecode.bytecode()[..]
21        } else {
22            &[]
23        };
24        Self {
25            bytes: bytes.iter(),
26            start: bytes.as_ptr(),
27        }
28    }
29
30    /// Skips to the next opcode, taking into account PUSH instructions.
31    pub fn skip_to_next_opcode(&mut self) {
32        self.next();
33    }
34
35    /// Returns the remaining bytes in the bytecode as a slice.
36    #[inline]
37    pub fn as_slice(&self) -> &[u8] {
38        self.bytes.as_slice()
39    }
40
41    /// Returns the current position in the bytecode.
42    #[inline]
43    pub fn position(&self) -> usize {
44        // SAFETY: `start` always points to the start of the bytecode.
45        unsafe {
46            self.bytes
47                .as_slice()
48                .as_ptr()
49                .offset_from_unsigned(self.start)
50        }
51    }
52
53    #[inline]
54    fn skip_immediate(&mut self, opcode: u8) {
55        // Get base immediate size from opcode info
56        let immediate_size = opcode::OPCODE_INFO[opcode as usize]
57            .map(|info| info.immediate_size() as usize)
58            .unwrap_or_default();
59
60        // Advance the iterator by the immediate size
61        if immediate_size > 0 {
62            self.bytes = self
63                .bytes
64                .as_slice()
65                .get(immediate_size..)
66                .unwrap_or_default()
67                .iter();
68        }
69    }
70
71    /// Returns the current opcode without advancing the iterator.
72    #[inline]
73    pub fn peek(&self) -> Option<u8> {
74        self.bytes.as_slice().first().copied()
75    }
76
77    /// Returns the current opcode wrapped in OpCode without advancing the iterator.
78    #[inline]
79    pub fn peek_opcode(&self) -> Option<OpCode> {
80        self.peek().and_then(OpCode::new)
81    }
82}
83
84impl Iterator for BytecodeIterator<'_> {
85    type Item = u8;
86
87    #[inline]
88    fn next(&mut self) -> Option<Self::Item> {
89        self.bytes
90            .next()
91            .copied()
92            .inspect(|&current| self.skip_immediate(current))
93    }
94
95    #[inline]
96    fn size_hint(&self) -> (usize, Option<usize>) {
97        // Lower bound is 0 if empty, 1 if not empty as it depends on the bytes.
98        let byte_len = self.bytes.len();
99        (byte_len.min(1), Some(byte_len))
100    }
101}
102
103impl core::iter::FusedIterator for BytecodeIterator<'_> {}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108    use primitives::Bytes;
109
110    #[test]
111    fn test_simple_bytecode_iteration() {
112        // Create a simple bytecode: PUSH1 0x01 PUSH1 0x02 ADD STOP
113        let bytecode = Bytecode::new_legacy(Bytes::from_static(&[
114            opcode::PUSH1,
115            0x01,
116            opcode::PUSH1,
117            0x02,
118            opcode::ADD,
119            opcode::STOP,
120        ]));
121        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
122        assert_eq!(
123            opcodes,
124            vec![opcode::PUSH1, opcode::PUSH1, opcode::ADD, opcode::STOP]
125        );
126    }
127
128    #[test]
129    fn test_bytecode_with_various_push_sizes() {
130        let bytecode = Bytecode::new_legacy(Bytes::from_static(&[
131            opcode::PUSH1,
132            0x01,
133            opcode::PUSH2,
134            0x02,
135            0x03,
136            opcode::PUSH3,
137            0x04,
138            0x05,
139            0x06,
140            opcode::STOP,
141        ]));
142
143        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
144
145        // We should only see the opcodes, not the immediates
146        assert_eq!(
147            opcodes,
148            vec![opcode::PUSH1, opcode::PUSH2, opcode::PUSH3, opcode::STOP]
149        );
150    }
151
152    #[test]
153    fn test_bytecode_skips_immediates() {
154        let bytecode = Bytecode::new_legacy(Bytes::from_static(&[
155            opcode::PUSH1,
156            0x01,
157            opcode::PUSH2,
158            0x02,
159            0x03,
160            opcode::ADD,
161            opcode::PUSH3,
162            0x04,
163            0x05,
164            0x06,
165            opcode::PUSH32,
166            0x10,
167            0x11,
168            0x12,
169            0x13,
170            0x14,
171            0x15,
172            0x16,
173            0x17,
174            0x18,
175            0x19,
176            0x1a,
177            0x1b,
178            0x1c,
179            0x1d,
180            0x1e,
181            0x1f,
182            0x20,
183            0x21,
184            0x22,
185            0x23,
186            0x24,
187            0x25,
188            0x26,
189            0x27,
190            0x28,
191            0x29,
192            0x2a,
193            0x2b,
194            0x2c,
195            0x2d,
196            0x2e,
197            0x2f,
198            opcode::MUL,
199            opcode::STOP,
200        ]));
201
202        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
203        assert_eq!(
204            opcodes,
205            vec![
206                opcode::PUSH1,
207                opcode::PUSH2,
208                opcode::ADD,
209                opcode::PUSH3,
210                opcode::PUSH32,
211                opcode::MUL,
212                opcode::STOP,
213            ]
214        );
215    }
216
217    #[test]
218    fn test_position_tracking() {
219        let bytecode = Bytecode::new_legacy(Bytes::from_static(&[
220            opcode::PUSH1,
221            0x01,
222            opcode::PUSH1,
223            0x02,
224            opcode::ADD,
225            opcode::STOP,
226        ]));
227
228        let mut iter = bytecode.iter_opcodes();
229
230        assert_eq!(iter.position(), 0);
231        assert_eq!(iter.next(), Some(opcode::PUSH1));
232        assert_eq!(iter.position(), 2);
233
234        assert_eq!(iter.next(), Some(opcode::PUSH1));
235        assert_eq!(iter.position(), 4);
236
237        assert_eq!(iter.next(), Some(opcode::ADD));
238        assert_eq!(iter.position(), 5);
239
240        assert_eq!(iter.next(), Some(opcode::STOP));
241        assert_eq!(iter.position(), 6);
242
243        assert_eq!(iter.next(), None);
244        assert_eq!(iter.position(), 6);
245    }
246
247    #[test]
248    fn test_empty_bytecode() {
249        let bytecode = Bytecode::new_legacy(Bytes::from_static(&[opcode::STOP]));
250        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
251        assert_eq!(opcodes, vec![opcode::STOP]);
252    }
253}