revm_bytecode/
iterator.rs

1use crate::{opcode, Bytecode, OpCode};
2
3/// Iterator over opcodes in a bytecode, skipping immediates.
4///
5/// This allows you to iterate through the actual opcodes in the bytecode,
6/// without dealing with the immediate values that follow instructions.
7#[derive(Debug, Clone)]
8pub struct BytecodeIterator<'a> {
9    /// Reference to the underlying bytecode bytes
10    bytes: &'a [u8],
11    /// Current position in the bytecode
12    position: usize,
13    /// End position in the bytecode (to handle original length for legacy bytecode)
14    end: usize,
15}
16
17impl<'a> BytecodeIterator<'a> {
18    /// Creates a new iterator from a bytecode reference.
19    pub fn new(bytecode: &'a Bytecode) -> Self {
20        let bytes = bytecode.bytecode();
21        let end = match bytecode {
22            Bytecode::LegacyAnalyzed(analyzed) => analyzed.original_len(),
23            Bytecode::Eip7702(_) => 0,
24            _ => bytes.len(),
25        };
26
27        Self {
28            bytes: bytes.as_ref(),
29            position: 0,
30            end,
31        }
32    }
33
34    /// Returns the current position in the bytecode.
35    pub fn position(&self) -> usize {
36        self.position
37    }
38
39    /// Skips to the next opcode, taking into account PUSH instructions.
40    pub fn skip_to_next_opcode(&mut self) {
41        if self.position >= self.end {
42            return;
43        }
44
45        let opcode = self.bytes[self.position];
46        self.position += 1;
47
48        // Get base immediate size from opcode info
49        let mut immediate_size = opcode::OPCODE_INFO[opcode as usize]
50            .map(|info| info.immediate_size() as usize)
51            .unwrap_or_default();
52
53        // Special handling for RJUMPV which has variable immediates
54        if opcode == opcode::RJUMPV {
55            if let Some(&max_index) = self.bytes.get(self.position) {
56                immediate_size += (max_index as usize) * 2;
57            }
58        }
59
60        self.position += immediate_size;
61    }
62
63    /// Returns the current opcode without advancing the iterator.
64    pub fn peek(&self) -> Option<u8> {
65        self.bytes.get(self.position).copied()
66    }
67
68    /// Returns the current opcode wrapped in OpCode without advancing the iterator.
69    pub fn peek_opcode(&self) -> Option<OpCode> {
70        self.peek().and_then(OpCode::new)
71    }
72}
73
74impl Iterator for BytecodeIterator<'_> {
75    type Item = u8;
76
77    fn next(&mut self) -> Option<Self::Item> {
78        if self.position >= self.end {
79            return None;
80        }
81
82        // Get the opcode first with bounds check
83        let opcode = *self.bytes.get(self.position)?;
84        self.skip_to_next_opcode();
85        Some(opcode)
86    }
87}
88
89/// Extension trait for Bytecode to provide iteration capabilities.
90pub trait BytecodeIteratorExt {
91    /// Returns an iterator over the opcodes in this bytecode, skipping immediates.
92    fn iter_opcodes(&self) -> BytecodeIterator<'_>;
93}
94
95impl BytecodeIteratorExt for Bytecode {
96    fn iter_opcodes(&self) -> BytecodeIterator<'_> {
97        BytecodeIterator::new(self)
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104    #[allow(unused_imports)]
105    use crate::{eof::Eof, LegacyRawBytecode};
106    #[allow(unused_imports)]
107    use primitives::{Address, Bytes};
108
109    #[test]
110    fn test_simple_bytecode_iteration() {
111        // Create a simple bytecode: PUSH1 0x01 PUSH1 0x02 ADD STOP
112        let bytecode_data = vec![
113            opcode::PUSH1,
114            0x01,
115            opcode::PUSH1,
116            0x02,
117            opcode::ADD,
118            opcode::STOP,
119        ];
120        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
121        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
122        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
123        // We should only see the opcodes, not the immediates
124        assert_eq!(
125            opcodes,
126            vec![opcode::PUSH1, opcode::PUSH1, opcode::ADD, opcode::STOP]
127        );
128    }
129
130    #[test]
131    fn test_bytecode_with_various_push_sizes() {
132        // PUSH1 0x01, PUSH2 0x0203, PUSH3 0x040506, STOP
133        let bytecode_data = vec![
134            opcode::PUSH1,
135            0x01,
136            opcode::PUSH2,
137            0x02,
138            0x03,
139            opcode::PUSH3,
140            0x04,
141            0x05,
142            0x06,
143            opcode::STOP,
144        ];
145        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
146        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
147
148        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
149
150        // We should only see the opcodes, not the immediates
151        assert_eq!(
152            opcodes,
153            vec![opcode::PUSH1, opcode::PUSH2, opcode::PUSH3, opcode::STOP]
154        );
155    }
156
157    #[test]
158    fn test_bytecode_skips_immediates() {
159        // Create a bytecode with various PUSH operations
160        let bytecode_data = vec![
161            opcode::PUSH1,
162            0x01, // PUSH1 0x01
163            opcode::PUSH2,
164            0x02,
165            0x03,        // PUSH2 0x0203
166            opcode::ADD, // ADD
167            opcode::PUSH3,
168            0x04,
169            0x05,
170            0x06, // PUSH3 0x040506
171            opcode::PUSH32,
172            0x10,
173            0x11,
174            0x12,
175            0x13, // PUSH32 with 32 bytes of immediate data
176            0x14,
177            0x15,
178            0x16,
179            0x17,
180            0x18,
181            0x19,
182            0x1a,
183            0x1b,
184            0x1c,
185            0x1d,
186            0x1e,
187            0x1f,
188            0x20,
189            0x21,
190            0x22,
191            0x23,
192            0x24,
193            0x25,
194            0x26,
195            0x27,
196            0x28,
197            0x29,
198            0x2a,
199            0x2b,
200            0x2c,
201            0x2d,
202            0x2e,
203            0x2f,
204            opcode::MUL,  // MUL
205            opcode::STOP, // STOP
206        ];
207
208        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
209        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
210
211        // Use the iterator directly
212        let iter = BytecodeIterator::new(&bytecode);
213        let opcodes: Vec<u8> = iter.collect();
214
215        // Should only include the opcodes, not the immediates
216        assert_eq!(
217            opcodes,
218            vec![
219                opcode::PUSH1,
220                opcode::PUSH2,
221                opcode::ADD,
222                opcode::PUSH3,
223                opcode::PUSH32,
224                opcode::MUL,
225                opcode::STOP,
226            ]
227        );
228
229        // Use the method on the bytecode struct
230        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
231        assert_eq!(
232            opcodes,
233            vec![
234                opcode::PUSH1,
235                opcode::PUSH2,
236                opcode::ADD,
237                opcode::PUSH3,
238                opcode::PUSH32,
239                opcode::MUL,
240                opcode::STOP,
241            ]
242        );
243    }
244
245    #[test]
246    fn test_position_tracking() {
247        // PUSH1 0x01, PUSH1 0x02, ADD, STOP
248        let bytecode_data = vec![
249            opcode::PUSH1,
250            0x01,
251            opcode::PUSH1,
252            0x02,
253            opcode::ADD,
254            opcode::STOP,
255        ];
256        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
257        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
258
259        let mut iter = bytecode.iter_opcodes();
260
261        // Start at position 0
262        assert_eq!(iter.position(), 0);
263        assert_eq!(iter.next(), Some(opcode::PUSH1));
264        // After PUSH1, position should be 2 (PUSH1 + immediate)
265        assert_eq!(iter.position(), 2);
266
267        assert_eq!(iter.next(), Some(opcode::PUSH1));
268        // After second PUSH1, position should be 4 (2 + PUSH1 + immediate)
269        assert_eq!(iter.position(), 4);
270
271        assert_eq!(iter.next(), Some(opcode::ADD));
272        // After ADD, position should be 5 (4 + ADD)
273        assert_eq!(iter.position(), 5);
274
275        assert_eq!(iter.next(), Some(opcode::STOP));
276        // After STOP, position should be 6 (5 + STOP)
277        assert_eq!(iter.position(), 6);
278
279        // No more opcodes
280        assert_eq!(iter.next(), None);
281        assert_eq!(iter.position(), 6);
282    }
283
284    #[test]
285    fn test_empty_bytecode() {
286        // Empty bytecode (just STOP)
287        let bytecode_data = vec![opcode::STOP];
288        let raw_bytecode = LegacyRawBytecode(Bytes::from(bytecode_data));
289        let bytecode = Bytecode::LegacyAnalyzed(raw_bytecode.into_analyzed());
290
291        let opcodes: Vec<u8> = bytecode.iter_opcodes().collect();
292        assert_eq!(opcodes, vec![opcode::STOP]);
293    }
294}