revm_bytecode/legacy/
analysis.rs

1use super::JumpTable;
2use crate::opcode;
3use bitvec::{bitvec, order::Lsb0, vec::BitVec};
4use primitives::Bytes;
5use std::vec::Vec;
6
7/// Analyzes the bytecode for use in [`LegacyAnalyzedBytecode`](crate::LegacyAnalyzedBytecode).
8///
9/// See [`LegacyAnalyzedBytecode`](crate::LegacyAnalyzedBytecode) for more details.
10///
11/// Prefer using [`LegacyAnalyzedBytecode::analyze`](crate::LegacyAnalyzedBytecode::analyze) instead.
12pub fn analyze_legacy(bytecode: Bytes) -> (JumpTable, Bytes) {
13    if bytecode.is_empty() {
14        return (JumpTable::default(), Bytes::from_static(&[opcode::STOP]));
15    }
16
17    let mut jumps: BitVec<u8> = bitvec![u8, Lsb0; 0; bytecode.len()];
18    let range = bytecode.as_ptr_range();
19    let start = range.start;
20    let mut iterator = start;
21    let end = range.end;
22    let mut prev_byte: u8 = 0;
23    let mut last_byte: u8 = 0;
24
25    while iterator < end {
26        prev_byte = last_byte;
27        last_byte = unsafe { *iterator };
28        if last_byte == opcode::JUMPDEST {
29            // SAFETY: Jumps are max length of the code
30            unsafe { jumps.set_unchecked(iterator.offset_from_unsigned(start), true) }
31            iterator = unsafe { iterator.add(1) };
32        } else {
33            let push_offset = last_byte.wrapping_sub(opcode::PUSH1);
34            if push_offset < 32 {
35                // SAFETY: Iterator access range is checked in the while loop
36                iterator = unsafe { iterator.add(push_offset as usize + 2) };
37            } else {
38                // SAFETY: Iterator access range is checked in the while loop
39                iterator = unsafe { iterator.add(1) };
40            }
41        }
42    }
43
44    // Calculate padding needed:
45    // push_overflow: bytes needed for incomplete PUSH immediate data
46    let push_overflow = (iterator as usize) - (end as usize);
47    let mut padding = push_overflow;
48
49    if last_byte == opcode::STOP {
50        // DUPN/SWAPN/EXCHANGE have 1-byte immediates that aren't handled by the loop above,
51        // so we need extra padding to ensure safe execution.
52        padding += is_dupn_swapn_exchange(prev_byte) as usize;
53    } else {
54        // Add final STOP instruction and immediate for DUPN/SWAPN/EXCHANGE
55        padding += 1 + is_dupn_swapn_exchange(last_byte) as usize;
56    }
57
58    let bytecode = if padding > 0 {
59        let mut padded = Vec::with_capacity(bytecode.len() + padding);
60        padded.extend_from_slice(&bytecode);
61        padded.resize(padded.len() + padding, 0);
62        Bytes::from(padded)
63    } else {
64        bytecode
65    };
66
67    (JumpTable::new(jumps), bytecode)
68}
69
70/// Returns true if the opcode is DUPN, SWAPN, or EXCHANGE.
71const fn is_dupn_swapn_exchange(opcode: u8) -> bool {
72    opcode.wrapping_sub(opcode::DUPN) < 3
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78
79    #[test]
80    fn test_bytecode_ends_with_stop_no_padding_needed() {
81        let bytecode = vec![
82            opcode::PUSH1,
83            0x01,
84            opcode::PUSH1,
85            0x02,
86            opcode::ADD,
87            opcode::STOP,
88        ];
89        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
90        assert_eq!(padded_bytecode.len(), bytecode.len());
91    }
92
93    #[test]
94    fn test_bytecode_ends_without_stop_requires_padding() {
95        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH1, 0x02, opcode::ADD];
96        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
97        assert_eq!(padded_bytecode.len(), bytecode.len() + 1);
98    }
99
100    #[test]
101    fn test_bytecode_ends_with_push16_requires_17_bytes_padding() {
102        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH16];
103        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
104        assert_eq!(padded_bytecode.len(), bytecode.len() + 17);
105    }
106
107    #[test]
108    fn test_bytecode_ends_with_push2_requires_2_bytes_padding() {
109        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH2, 0x02];
110        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
111        assert_eq!(padded_bytecode.len(), bytecode.len() + 2);
112    }
113
114    #[test]
115    fn test_empty_bytecode_requires_stop() {
116        let bytecode = vec![];
117        let (_, padded_bytecode) = analyze_legacy(bytecode.into());
118        assert_eq!(padded_bytecode.len(), 1); // Just STOP
119    }
120
121    #[test]
122    fn test_bytecode_with_jumpdest_at_start() {
123        let bytecode = vec![opcode::JUMPDEST, opcode::PUSH1, 0x01, opcode::STOP];
124        let (jump_table, _) = analyze_legacy(bytecode.into());
125        assert!(jump_table.is_valid(0)); // First byte should be a valid jumpdest
126    }
127
128    #[test]
129    fn test_bytecode_with_jumpdest_after_push() {
130        let bytecode = vec![opcode::PUSH1, 0x01, opcode::JUMPDEST, opcode::STOP];
131        let (jump_table, _) = analyze_legacy(bytecode.into());
132        assert!(jump_table.is_valid(2)); // JUMPDEST should be at position 2
133    }
134
135    #[test]
136    fn test_bytecode_with_multiple_jumpdests() {
137        let bytecode = vec![
138            opcode::JUMPDEST,
139            opcode::PUSH1,
140            0x01,
141            opcode::JUMPDEST,
142            opcode::STOP,
143        ];
144        let (jump_table, _) = analyze_legacy(bytecode.into());
145        assert!(jump_table.is_valid(0)); // First JUMPDEST
146        assert!(jump_table.is_valid(3)); // Second JUMPDEST
147    }
148
149    #[test]
150    fn test_bytecode_with_max_push32() {
151        let bytecode = vec![opcode::PUSH32];
152        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
153        assert_eq!(padded_bytecode.len(), bytecode.len() + 33); // PUSH32 + 32 bytes + STOP
154    }
155
156    #[test]
157    fn test_bytecode_with_invalid_opcode() {
158        let bytecode = vec![0xFF, opcode::STOP]; // 0xFF is an invalid opcode
159        let (jump_table, _) = analyze_legacy(bytecode.into());
160        assert!(!jump_table.is_valid(0)); // Invalid opcode should not be a jumpdest
161    }
162
163    #[test]
164    fn test_bytecode_with_sequential_pushes() {
165        let bytecode = vec![
166            opcode::PUSH1,
167            0x01,
168            opcode::PUSH2,
169            0x02,
170            0x03,
171            opcode::PUSH4,
172            0x04,
173            0x05,
174            0x06,
175            0x07,
176            opcode::STOP,
177        ];
178        let (jump_table, padded_bytecode) = analyze_legacy(bytecode.clone().into());
179        assert_eq!(padded_bytecode.len(), bytecode.len());
180        assert!(!jump_table.is_valid(0)); // PUSH1
181        assert!(!jump_table.is_valid(2)); // PUSH2
182        assert!(!jump_table.is_valid(5)); // PUSH4
183    }
184
185    #[test]
186    fn test_bytecode_with_jumpdest_in_push_data() {
187        let bytecode = vec![
188            opcode::PUSH2,
189            opcode::JUMPDEST, // This should not be treated as a JUMPDEST
190            0x02,
191            opcode::STOP,
192        ];
193        let (jump_table, _) = analyze_legacy(bytecode.into());
194        assert!(!jump_table.is_valid(1)); // JUMPDEST in push data should not be valid
195    }
196
197    #[test]
198    fn test_bytecode_ends_with_immediate_opcode_and_stop_requires_padding() {
199        // For SWAPN/DUPN/EXCHANGE, the STOP (0x00) is consumed as the immediate operand,
200        // not as an actual STOP instruction, so padding is needed.
201        // [OPCODE]       -> [OPCODE, STOP, STOP] (3 bytes)
202        // [OPCODE, STOP] -> [OPCODE, STOP, STOP] (3 bytes)
203        for op in [opcode::SWAPN, opcode::DUPN, opcode::EXCHANGE] {
204            for bytecode in [vec![op], vec![op, opcode::STOP]] {
205                let (_, padded_bytecode) = analyze_legacy(bytecode.into());
206                assert_eq!(padded_bytecode.len(), 3);
207                assert_eq!(padded_bytecode[0], op);
208                assert_eq!(padded_bytecode[1], opcode::STOP);
209                assert_eq!(padded_bytecode[2], opcode::STOP);
210            }
211        }
212    }
213}