revm_bytecode/legacy/
analysis.rs

1use super::JumpTable;
2use crate::opcode;
3use bitvec::{bitvec, order::Lsb0, vec::BitVec};
4use primitives::Bytes;
5use std::{sync::Arc, vec, vec::Vec};
6
7/// Analyze the bytecode to find the jumpdests. Used to create a jump table
8/// that is needed for [`crate::LegacyAnalyzedBytecode`].
9/// This function contains a hot loop and should be optimized as much as possible.
10///
11/// # Safety
12///
13/// The function uses unsafe pointer arithmetic, but maintains the following invariants:
14/// - The iterator never advances beyond the end of the bytecode
15/// - All pointer offsets are within bounds of the bytecode
16/// - The jump table is never accessed beyond its allocated size
17///
18/// Undefined behavior if the bytecode does not end with a valid STOP opcode. Please check
19/// [`crate::LegacyAnalyzedBytecode::new`] for details on how the bytecode is validated.
20pub fn analyze_legacy(bytecode: Bytes) -> (JumpTable, Bytes) {
21    if bytecode.is_empty() {
22        return (JumpTable::default(), Bytes::from_static(&[opcode::STOP]));
23    }
24
25    let mut jumps: BitVec<u8> = bitvec![u8, Lsb0; 0; bytecode.len()];
26    let range = bytecode.as_ptr_range();
27    let start = range.start;
28    let mut iterator = start;
29    let end = range.end;
30    let mut opcode = 0;
31
32    while iterator < end {
33        opcode = unsafe { *iterator };
34        if opcode::JUMPDEST == opcode {
35            // SAFETY: Jumps are max length of the code
36            unsafe { jumps.set_unchecked(iterator.offset_from(start) as usize, true) }
37            iterator = unsafe { iterator.offset(1) };
38        } else {
39            let push_offset = opcode.wrapping_sub(opcode::PUSH1);
40            if push_offset < 32 {
41                // SAFETY: Iterator access range is checked in the while loop
42                iterator = unsafe { iterator.offset((push_offset + 2) as isize) };
43            } else {
44                // SAFETY: Iterator access range is checked in the while loop
45                iterator = unsafe { iterator.offset(1) };
46            }
47        }
48    }
49
50    // Calculate padding needed to ensure bytecode ends with STOP
51    // If we're at the end and last opcode is not STOP, we need 1 more byte
52    let padding_size = (iterator as usize) - (end as usize) + (opcode != opcode::STOP) as usize;
53    if padding_size > 0 {
54        let mut padded_bytecode = Vec::with_capacity(bytecode.len() + padding_size);
55        padded_bytecode.extend_from_slice(&bytecode);
56        padded_bytecode.extend(vec![0; padding_size]);
57        (JumpTable(Arc::new(jumps)), Bytes::from(padded_bytecode))
58    } else {
59        (JumpTable(Arc::new(jumps)), bytecode)
60    }
61}
62
63mod tests {
64    #[allow(unused_imports)]
65    use crate::{legacy::analyze_legacy, opcode};
66
67    #[test]
68    fn test_bytecode_ends_with_stop_no_padding_needed() {
69        let bytecode = vec![
70            opcode::PUSH1,
71            0x01,
72            opcode::PUSH1,
73            0x02,
74            opcode::ADD,
75            opcode::STOP,
76        ];
77        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
78        assert_eq!(padded_bytecode.len(), bytecode.len());
79    }
80
81    #[test]
82    fn test_bytecode_ends_without_stop_requires_padding() {
83        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH1, 0x02, opcode::ADD];
84        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
85        assert_eq!(padded_bytecode.len(), bytecode.len() + 1);
86    }
87
88    #[test]
89    fn test_bytecode_ends_with_push16_requires_17_bytes_padding() {
90        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH16];
91        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
92        assert_eq!(padded_bytecode.len(), bytecode.len() + 17);
93    }
94
95    #[test]
96    fn test_bytecode_ends_with_push2_requires_2_bytes_padding() {
97        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH2, 0x02];
98        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
99        assert_eq!(padded_bytecode.len(), bytecode.len() + 2);
100    }
101
102    #[test]
103    fn test_empty_bytecode_requires_stop() {
104        let bytecode = vec![];
105        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
106        assert_eq!(padded_bytecode.len(), 1); // Just STOP
107    }
108
109    #[test]
110    fn test_bytecode_with_jumpdest_at_start() {
111        let bytecode = vec![opcode::JUMPDEST, opcode::PUSH1, 0x01, opcode::STOP];
112        let (jump_table, _) = analyze_legacy(bytecode.clone().into());
113        assert!(jump_table.0[0]); // First byte should be a valid jumpdest
114    }
115
116    #[test]
117    fn test_bytecode_with_jumpdest_after_push() {
118        let bytecode = vec![opcode::PUSH1, 0x01, opcode::JUMPDEST, opcode::STOP];
119        let (jump_table, _) = analyze_legacy(bytecode.clone().into());
120        assert!(jump_table.0[2]); // JUMPDEST should be at position 2
121    }
122
123    #[test]
124    fn test_bytecode_with_multiple_jumpdests() {
125        let bytecode = vec![
126            opcode::JUMPDEST,
127            opcode::PUSH1,
128            0x01,
129            opcode::JUMPDEST,
130            opcode::STOP,
131        ];
132        let (jump_table, _) = analyze_legacy(bytecode.clone().into());
133        assert!(jump_table.0[0]); // First JUMPDEST
134        assert!(jump_table.0[3]); // Second JUMPDEST
135    }
136
137    #[test]
138    fn test_bytecode_with_max_push32() {
139        let bytecode = vec![opcode::PUSH32];
140        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
141        assert_eq!(padded_bytecode.len(), bytecode.len() + 33); // PUSH32 + 32 bytes + STOP
142    }
143
144    #[test]
145    fn test_bytecode_with_invalid_opcode() {
146        let bytecode = vec![0xFF, opcode::STOP]; // 0xFF is an invalid opcode
147        let (jump_table, _) = analyze_legacy(bytecode.clone().into());
148        assert!(!jump_table.0[0]); // Invalid opcode should not be a jumpdest
149    }
150
151    #[test]
152    fn test_bytecode_with_sequential_pushes() {
153        let bytecode = vec![
154            opcode::PUSH1,
155            0x01,
156            opcode::PUSH2,
157            0x02,
158            0x03,
159            opcode::PUSH4,
160            0x04,
161            0x05,
162            0x06,
163            0x07,
164            opcode::STOP,
165        ];
166        let (jump_table, padded_bytecode) = analyze_legacy(bytecode.clone().into());
167        assert_eq!(padded_bytecode.len(), bytecode.len());
168        assert!(!jump_table.0[0]); // PUSH1
169        assert!(!jump_table.0[2]); // PUSH2
170        assert!(!jump_table.0[5]); // PUSH4
171    }
172
173    #[test]
174    fn test_bytecode_with_jumpdest_in_push_data() {
175        let bytecode = vec![
176            opcode::PUSH2,
177            opcode::JUMPDEST, // This should not be treated as a JUMPDEST
178            0x02,
179            opcode::STOP,
180        ];
181        let (jump_table, _) = analyze_legacy(bytecode.clone().into());
182        assert!(!jump_table.0[1]); // JUMPDEST in push data should not be valid
183    }
184}