Skip to main content

revm_bytecode/legacy/
analysis.rs

1use super::JumpTable;
2use crate::opcode;
3use bitvec::{bitvec, order::Lsb0, vec::BitVec};
4use primitives::Bytes;
5use std::vec::Vec;
6
7/// Analyzes the bytecode to produce a jump table and potentially padded bytecode.
8///
9/// Prefer using [`Bytecode::new_legacy`](crate::Bytecode::new_legacy) instead.
10pub(crate) fn analyze_legacy(bytecode: Bytes) -> (JumpTable, Bytes) {
11    let mut jumps: BitVec<u8> = bitvec![u8, Lsb0; 0; bytecode.len()];
12    let range = bytecode.as_ptr_range();
13    let start = range.start;
14    let mut iterator = start;
15    let end = range.end;
16    let mut prev_byte: u8 = 0;
17    let mut last_byte: u8 = 0;
18
19    while iterator < end {
20        prev_byte = last_byte;
21        last_byte = unsafe { *iterator };
22        if last_byte == opcode::JUMPDEST {
23            // SAFETY: Jumps are max length of the code
24            unsafe { jumps.set_unchecked(iterator.offset_from_unsigned(start), true) }
25            iterator = unsafe { iterator.add(1) };
26        } else {
27            let push_offset = last_byte.wrapping_sub(opcode::PUSH1);
28            if push_offset < 32 {
29                // SAFETY: Iterator access range is checked in the while loop
30                iterator = unsafe { iterator.add(push_offset as usize + 2) };
31            } else {
32                // SAFETY: Iterator access range is checked in the while loop
33                iterator = unsafe { iterator.add(1) };
34            }
35        }
36    }
37
38    // Calculate padding needed:
39    // push_overflow: bytes needed for incomplete PUSH immediate data
40    let push_overflow = (iterator as usize) - (end as usize);
41    let mut padding = push_overflow;
42
43    if last_byte == opcode::STOP {
44        // DUPN/SWAPN/EXCHANGE have 1-byte immediates that aren't handled by the loop above,
45        // so we need extra padding to ensure safe execution.
46        padding += is_dupn_swapn_exchange(prev_byte) as usize;
47    } else {
48        // Add final STOP instruction and immediate for DUPN/SWAPN/EXCHANGE
49        padding += 1 + is_dupn_swapn_exchange(last_byte) as usize;
50    }
51
52    let bytecode = if padding > 0 {
53        let mut padded = Vec::with_capacity(bytecode.len() + padding);
54        padded.extend_from_slice(&bytecode);
55        padded.resize(padded.len() + padding, 0);
56        Bytes::from(padded)
57    } else {
58        bytecode
59    };
60
61    (JumpTable::new(jumps), bytecode)
62}
63
64/// Returns true if the opcode is DUPN, SWAPN, or EXCHANGE.
65const fn is_dupn_swapn_exchange(opcode: u8) -> bool {
66    opcode.wrapping_sub(opcode::DUPN) < 3
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    #[test]
74    fn test_bytecode_ends_with_stop_no_padding_needed() {
75        let bytecode = vec![
76            opcode::PUSH1,
77            0x01,
78            opcode::PUSH1,
79            0x02,
80            opcode::ADD,
81            opcode::STOP,
82        ];
83        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
84        assert_eq!(padded_bytecode.len(), bytecode.len());
85    }
86
87    #[test]
88    fn test_bytecode_ends_without_stop_requires_padding() {
89        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH1, 0x02, opcode::ADD];
90        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
91        assert_eq!(padded_bytecode.len(), bytecode.len() + 1);
92    }
93
94    #[test]
95    fn test_bytecode_ends_with_push16_requires_17_bytes_padding() {
96        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH16];
97        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
98        assert_eq!(padded_bytecode.len(), bytecode.len() + 17);
99    }
100
101    #[test]
102    fn test_bytecode_ends_with_push2_requires_2_bytes_padding() {
103        let bytecode = vec![opcode::PUSH1, 0x01, opcode::PUSH2, 0x02];
104        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
105        assert_eq!(padded_bytecode.len(), bytecode.len() + 2);
106    }
107
108    #[test]
109    fn test_bytecode_with_jumpdest_at_start() {
110        let bytecode = vec![opcode::JUMPDEST, opcode::PUSH1, 0x01, opcode::STOP];
111        let (jump_table, _) = analyze_legacy(bytecode.into());
112        assert!(jump_table.is_valid(0)); // First byte should be a valid jumpdest
113    }
114
115    #[test]
116    fn test_bytecode_with_jumpdest_after_push() {
117        let bytecode = vec![opcode::PUSH1, 0x01, opcode::JUMPDEST, opcode::STOP];
118        let (jump_table, _) = analyze_legacy(bytecode.into());
119        assert!(jump_table.is_valid(2)); // JUMPDEST should be at position 2
120    }
121
122    #[test]
123    fn test_bytecode_with_multiple_jumpdests() {
124        let bytecode = vec![
125            opcode::JUMPDEST,
126            opcode::PUSH1,
127            0x01,
128            opcode::JUMPDEST,
129            opcode::STOP,
130        ];
131        let (jump_table, _) = analyze_legacy(bytecode.into());
132        assert!(jump_table.is_valid(0)); // First JUMPDEST
133        assert!(jump_table.is_valid(3)); // Second JUMPDEST
134    }
135
136    #[test]
137    fn test_bytecode_with_max_push32() {
138        let bytecode = vec![opcode::PUSH32];
139        let (_, padded_bytecode) = analyze_legacy(bytecode.clone().into());
140        assert_eq!(padded_bytecode.len(), bytecode.len() + 33); // PUSH32 + 32 bytes + STOP
141    }
142
143    #[test]
144    fn test_bytecode_with_invalid_opcode() {
145        let bytecode = vec![0xFF, opcode::STOP]; // 0xFF is an invalid opcode
146        let (jump_table, _) = analyze_legacy(bytecode.into());
147        assert!(!jump_table.is_valid(0)); // Invalid opcode should not be a jumpdest
148    }
149
150    #[test]
151    fn test_bytecode_with_sequential_pushes() {
152        let bytecode = vec![
153            opcode::PUSH1,
154            0x01,
155            opcode::PUSH2,
156            0x02,
157            0x03,
158            opcode::PUSH4,
159            0x04,
160            0x05,
161            0x06,
162            0x07,
163            opcode::STOP,
164        ];
165        let (jump_table, padded_bytecode) = analyze_legacy(bytecode.clone().into());
166        assert_eq!(padded_bytecode.len(), bytecode.len());
167        assert!(!jump_table.is_valid(0)); // PUSH1
168        assert!(!jump_table.is_valid(2)); // PUSH2
169        assert!(!jump_table.is_valid(5)); // PUSH4
170    }
171
172    #[test]
173    fn test_bytecode_with_jumpdest_in_push_data() {
174        let bytecode = vec![
175            opcode::PUSH2,
176            opcode::JUMPDEST, // This should not be treated as a JUMPDEST
177            0x02,
178            opcode::STOP,
179        ];
180        let (jump_table, _) = analyze_legacy(bytecode.into());
181        assert!(!jump_table.is_valid(1)); // JUMPDEST in push data should not be valid
182    }
183
184    #[test]
185    fn test_bytecode_ends_with_immediate_opcode_and_stop_requires_padding() {
186        // For SWAPN/DUPN/EXCHANGE, the STOP (0x00) is consumed as the immediate operand,
187        // not as an actual STOP instruction, so padding is needed.
188        // [OPCODE]       -> [OPCODE, STOP, STOP] (3 bytes)
189        // [OPCODE, STOP] -> [OPCODE, STOP, STOP] (3 bytes)
190        for op in [opcode::SWAPN, opcode::DUPN, opcode::EXCHANGE] {
191            for bytecode in [vec![op], vec![op, opcode::STOP]] {
192                let (_, padded_bytecode) = analyze_legacy(bytecode.into());
193                assert_eq!(padded_bytecode.len(), 3);
194                assert_eq!(padded_bytecode[0], op);
195                assert_eq!(padded_bytecode[1], opcode::STOP);
196                assert_eq!(padded_bytecode[2], opcode::STOP);
197            }
198        }
199    }
200}