Skip to main content

revm_bytecode/bytecode/
mod.rs

1//! Module that contains the bytecode struct with all variants supported by Ethereum mainnet.
2//!
3//! Those are:
4//! - Legacy bytecode with jump table analysis
5//! - EIP-7702 bytecode, introduced in Prague and contains address to delegated account
6
7#[cfg(feature = "serde")]
8mod serde_impl;
9
10use crate::{
11    eip7702::{Eip7702DecodeError, EIP7702_MAGIC_BYTES, EIP7702_VERSION},
12    legacy::analyze_legacy,
13    opcode, BytecodeDecodeError, JumpTable,
14};
15use primitives::{
16    alloy_primitives::Sealable, keccak256, Address, Bytes, OnceLock, B256, KECCAK_EMPTY,
17};
18use std::sync::Arc;
19
20/// Ethereum EVM bytecode.
21#[derive(Clone, Debug)]
22pub struct Bytecode(Arc<BytecodeInner>);
23
24/// Inner bytecode representation.
25///
26/// This struct is flattened to avoid nested allocations. The `kind` field determines
27/// how the bytecode should be interpreted.
28#[derive(Debug)]
29#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
30struct BytecodeInner {
31    /// The kind of bytecode (Legacy or EIP-7702).
32    kind: BytecodeKind,
33    /// The bytecode bytes.
34    ///
35    /// For legacy bytecode, this may be padded with zeros at the end.
36    /// For EIP-7702 bytecode, this is exactly 23 bytes.
37    bytecode: Bytes,
38    /// The original length of the bytecode before padding.
39    ///
40    /// For EIP-7702 bytecode, this is always 23.
41    original_len: usize,
42    /// The jump table for legacy bytecode. Empty for EIP-7702.
43    jump_table: JumpTable,
44    /// Cached hash of the original bytecode.
45    #[cfg_attr(feature = "serde", serde(skip, default))]
46    hash: OnceLock<B256>,
47}
48
49/// The kind of bytecode.
50#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Default)]
51#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52pub enum BytecodeKind {
53    /// Legacy analyzed bytecode with jump table.
54    #[default]
55    LegacyAnalyzed,
56    /// EIP-7702 delegated bytecode.
57    Eip7702,
58}
59
60impl Default for Bytecode {
61    #[inline]
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl PartialEq for Bytecode {
68    #[inline]
69    fn eq(&self, other: &Self) -> bool {
70        self.original_byte_slice() == other.original_byte_slice()
71    }
72}
73
74impl Eq for Bytecode {}
75
76impl core::hash::Hash for Bytecode {
77    #[inline]
78    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
79        self.original_byte_slice().hash(state);
80    }
81}
82
83impl PartialOrd for Bytecode {
84    #[inline]
85    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
86        Some(self.cmp(other))
87    }
88}
89
90impl Ord for Bytecode {
91    #[inline]
92    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
93        self.original_byte_slice().cmp(other.original_byte_slice())
94    }
95}
96
97impl Sealable for Bytecode {
98    #[inline]
99    fn hash_slow(&self) -> B256 {
100        self.hash_slow()
101    }
102}
103
104impl Bytecode {
105    /// Creates a new legacy analyzed [`Bytecode`] with exactly one STOP opcode.
106    #[inline]
107    pub fn new() -> Self {
108        Self::default_ref().clone()
109    }
110
111    #[inline]
112    fn default_ref() -> &'static Self {
113        static DEFAULT: OnceLock<Bytecode> = OnceLock::new();
114        DEFAULT.get_or_init(|| {
115            Self(Arc::new(BytecodeInner {
116                kind: BytecodeKind::LegacyAnalyzed,
117                bytecode: Bytes::from_static(&[opcode::STOP]),
118                original_len: 0,
119                jump_table: JumpTable::default(),
120                hash: {
121                    let hash = OnceLock::new();
122                    let _ = hash.set(KECCAK_EMPTY);
123                    hash
124                },
125            }))
126        })
127    }
128
129    /// Creates a new legacy [`Bytecode`] by analyzing raw bytes.
130    #[inline]
131    pub fn new_legacy(raw: Bytes) -> Self {
132        if raw.is_empty() {
133            return Self::new();
134        }
135
136        let original_len = raw.len();
137        let (jump_table, bytecode) = analyze_legacy(raw);
138        Self(Arc::new(BytecodeInner {
139            kind: BytecodeKind::LegacyAnalyzed,
140            original_len,
141            bytecode,
142            jump_table,
143            hash: OnceLock::new(),
144        }))
145    }
146
147    /// Creates a new raw [`Bytecode`].
148    ///
149    /// # Panics
150    ///
151    /// Panics if bytecode is in incorrect format. If you want to handle errors use [`Self::new_raw_checked`].
152    #[inline]
153    pub fn new_raw(bytecode: Bytes) -> Self {
154        Self::new_raw_checked(bytecode).expect("Expect correct bytecode")
155    }
156
157    /// Creates a new EIP-7702 [`Bytecode`] from [`Address`].
158    #[inline]
159    pub fn new_eip7702(address: Address) -> Self {
160        let raw: Bytes = [EIP7702_MAGIC_BYTES, &[EIP7702_VERSION], &address[..]]
161            .concat()
162            .into();
163        Self(Arc::new(BytecodeInner {
164            kind: BytecodeKind::Eip7702,
165            original_len: raw.len(),
166            bytecode: raw,
167            jump_table: JumpTable::default(),
168            hash: OnceLock::new(),
169        }))
170    }
171
172    /// Creates a new raw [`Bytecode`].
173    ///
174    /// Returns an error on incorrect bytecode format.
175    #[inline]
176    pub fn new_raw_checked(bytes: Bytes) -> Result<Self, BytecodeDecodeError> {
177        if bytes.starts_with(EIP7702_MAGIC_BYTES) {
178            Self::new_eip7702_raw(bytes).map_err(Into::into)
179        } else {
180            Ok(Self::new_legacy(bytes))
181        }
182    }
183
184    /// Creates a new EIP-7702 [`Bytecode`] from raw bytes.
185    ///
186    /// Returns an error if the bytes are not valid EIP-7702 bytecode.
187    #[inline]
188    pub fn new_eip7702_raw(bytes: Bytes) -> Result<Self, Eip7702DecodeError> {
189        if bytes.len() != 23 {
190            return Err(Eip7702DecodeError::InvalidLength);
191        }
192        if !bytes.starts_with(EIP7702_MAGIC_BYTES) {
193            return Err(Eip7702DecodeError::InvalidMagic);
194        }
195        if bytes[2] != EIP7702_VERSION {
196            return Err(Eip7702DecodeError::UnsupportedVersion);
197        }
198        Ok(Self(Arc::new(BytecodeInner {
199            kind: BytecodeKind::Eip7702,
200            original_len: bytes.len(),
201            bytecode: bytes,
202            jump_table: JumpTable::default(),
203            hash: OnceLock::new(),
204        })))
205    }
206
207    /// Create new checked bytecode from pre-analyzed components.
208    ///
209    /// # Safety
210    ///
211    /// `bytecode` must satisfy the same padding invariants produced by
212    /// `analyze_legacy`. In particular, execution must never cause the
213    /// interpreter to read past the backing allocation when decoding opcode
214    /// immediates (`PUSH1`–`PUSH32` via `read_slice`, and `DUPN`/`SWAPN`/
215    /// `EXCHANGE` via `read_u8`).
216    ///
217    /// [`Bytecode::new_legacy`] handles this automatically.
218    /// This constructor is only for restoring trusted, previously analyzed
219    /// bytecode (e.g., from database storage) where the padding was already
220    /// applied.
221    ///
222    /// Violating this causes undefined behavior during execution due to
223    /// out-of-bounds reads from raw pointers.
224    ///
225    /// # Panics
226    ///
227    /// * If `original_len` is greater than `bytecode.len()`
228    /// * If jump table length is less than `original_len`
229    /// * If bytecode is empty
230    #[inline]
231    pub unsafe fn new_analyzed(
232        bytecode: Bytes,
233        original_len: usize,
234        jump_table: JumpTable,
235    ) -> Self {
236        assert!(
237            original_len <= bytecode.len(),
238            "original_len is greater than bytecode length"
239        );
240        assert!(
241            original_len <= jump_table.len(),
242            "jump table length is less than original length"
243        );
244        assert!(!bytecode.is_empty(), "bytecode cannot be empty");
245        Self(Arc::new(BytecodeInner {
246            kind: BytecodeKind::LegacyAnalyzed,
247            bytecode,
248            original_len,
249            jump_table,
250            hash: OnceLock::new(),
251        }))
252    }
253
254    /// Returns the kind of bytecode.
255    #[inline]
256    pub fn kind(&self) -> BytecodeKind {
257        self.0.kind
258    }
259
260    /// Returns `true` if bytecode is legacy.
261    #[inline]
262    pub fn is_legacy(&self) -> bool {
263        self.kind() == BytecodeKind::LegacyAnalyzed
264    }
265
266    /// Returns `true` if bytecode is EIP-7702.
267    #[inline]
268    pub fn is_eip7702(&self) -> bool {
269        self.kind() == BytecodeKind::Eip7702
270    }
271
272    /// Returns the EIP-7702 delegated address if this is EIP-7702 bytecode.
273    #[inline]
274    pub fn eip7702_address(&self) -> Option<Address> {
275        if self.is_eip7702() {
276            Some(Address::from_slice(&self.0.bytecode[3..23]))
277        } else {
278            None
279        }
280    }
281
282    /// Returns jump table if bytecode is legacy analyzed.
283    #[inline]
284    pub fn legacy_jump_table(&self) -> Option<&JumpTable> {
285        if self.is_legacy() {
286            Some(&self.0.jump_table)
287        } else {
288            None
289        }
290    }
291
292    /// Calculates or returns cached hash of the bytecode.
293    #[inline]
294    pub fn hash_slow(&self) -> B256 {
295        *self
296            .0
297            .hash
298            .get_or_init(|| keccak256(self.original_byte_slice()))
299    }
300
301    /// Returns a reference to the bytecode bytes.
302    ///
303    /// For legacy bytecode, this includes padding. For EIP-7702, this is the raw bytes.
304    #[inline]
305    pub fn bytecode(&self) -> &Bytes {
306        &self.0.bytecode
307    }
308
309    /// Pointer to the bytecode bytes.
310    #[inline]
311    pub fn bytecode_ptr(&self) -> *const u8 {
312        self.0.bytecode.as_ptr()
313    }
314
315    /// Returns a clone of the bytecode bytes.
316    #[inline]
317    pub fn bytes(&self) -> Bytes {
318        self.0.bytecode.clone()
319    }
320
321    /// Returns a reference to the bytecode bytes.
322    #[inline]
323    pub fn bytes_ref(&self) -> &Bytes {
324        &self.0.bytecode
325    }
326
327    /// Returns the bytecode as a slice.
328    #[inline]
329    pub fn bytes_slice(&self) -> &[u8] {
330        &self.0.bytecode
331    }
332
333    /// Returns the original bytecode without padding.
334    #[inline]
335    pub fn original_bytes(&self) -> Bytes {
336        self.0.bytecode.slice(..self.0.original_len)
337    }
338
339    /// Returns the original bytecode as a byte slice without padding.
340    #[inline]
341    pub fn original_byte_slice(&self) -> &[u8] {
342        &self.0.bytecode[..self.0.original_len]
343    }
344
345    /// Returns the length of the original bytes (without padding).
346    #[inline]
347    pub fn len(&self) -> usize {
348        self.0.original_len
349    }
350
351    /// Returns whether the bytecode is empty.
352    #[inline]
353    pub fn is_empty(&self) -> bool {
354        self.0.original_len == 0
355    }
356
357    /// Returns `true` if the bytecode is empty and has the default bytecode hash.
358    #[inline]
359    pub fn is_default(&self) -> bool {
360        Arc::ptr_eq(&self.0, &Self::default_ref().0)
361    }
362
363    /// Returns an iterator over the opcodes in this bytecode, skipping immediates.
364    #[inline]
365    pub fn iter_opcodes(&self) -> crate::BytecodeIterator<'_> {
366        crate::BytecodeIterator::new(self)
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373    use crate::{eip7702::Eip7702DecodeError, opcode};
374    use bitvec::{bitvec, order::Lsb0};
375    use primitives::bytes;
376
377    #[test]
378    fn test_new_empty() {
379        for bytecode in [
380            Bytecode::default(),
381            Bytecode::new(),
382            Bytecode::new().clone(),
383            Bytecode::new_legacy(Bytes::new()),
384        ] {
385            assert_eq!(bytecode.kind(), BytecodeKind::LegacyAnalyzed);
386            assert_eq!(bytecode.len(), 0);
387            assert_eq!(bytecode.bytes_slice(), [opcode::STOP]);
388        }
389    }
390
391    #[test]
392    fn test_new_analyzed() {
393        let raw = Bytes::from_static(&[opcode::PUSH1, 0x01]);
394        let bytecode = Bytecode::new_legacy(raw);
395        // SAFETY: bytecode was produced by `new_legacy` which pads correctly.
396        let _ = unsafe {
397            Bytecode::new_analyzed(
398                bytecode.bytecode().clone(),
399                bytecode.len(),
400                bytecode.legacy_jump_table().unwrap().clone(),
401            )
402        };
403    }
404
405    #[test]
406    #[should_panic(expected = "original_len is greater than bytecode length")]
407    fn test_panic_on_large_original_len() {
408        let bytecode = Bytecode::new_legacy(Bytes::from_static(&[opcode::PUSH1, 0x01]));
409        // SAFETY: testing the panic, not execution safety.
410        let _ = unsafe {
411            Bytecode::new_analyzed(
412                bytecode.bytecode().clone(),
413                100,
414                bytecode.legacy_jump_table().unwrap().clone(),
415            )
416        };
417    }
418
419    #[test]
420    #[should_panic(expected = "jump table length is less than original length")]
421    fn test_panic_on_short_jump_table() {
422        let bytecode = Bytecode::new_legacy(Bytes::from_static(&[opcode::PUSH1, 0x01]));
423        let jump_table = JumpTable::new(bitvec![u8, Lsb0; 0; 1]);
424        // SAFETY: testing the panic, not execution safety.
425        let _ = unsafe {
426            Bytecode::new_analyzed(bytecode.bytecode().clone(), bytecode.len(), jump_table)
427        };
428    }
429
430    #[test]
431    #[should_panic(expected = "bytecode cannot be empty")]
432    fn test_panic_on_empty_bytecode() {
433        let bytecode = Bytes::from_static(&[]);
434        let jump_table = JumpTable::new(bitvec![u8, Lsb0; 0; 0]);
435        // SAFETY: testing the panic, not execution safety.
436        let _ = unsafe { Bytecode::new_analyzed(bytecode, 0, jump_table) };
437    }
438
439    #[test]
440    fn eip7702_sanity_decode() {
441        let raw = bytes!("ef01deadbeef");
442        assert_eq!(
443            Bytecode::new_eip7702_raw(raw),
444            Err(Eip7702DecodeError::InvalidLength)
445        );
446
447        let raw = bytes!("ef0101deadbeef00000000000000000000000000000000");
448        assert_eq!(
449            Bytecode::new_eip7702_raw(raw),
450            Err(Eip7702DecodeError::UnsupportedVersion)
451        );
452
453        let raw = bytes!("ef0100deadbeef00000000000000000000000000000000");
454        let bytecode = Bytecode::new_eip7702_raw(raw.clone()).unwrap();
455        assert!(bytecode.is_eip7702());
456        assert_eq!(
457            bytecode.eip7702_address(),
458            Some(Address::from_slice(&raw[3..]))
459        );
460        assert_eq!(bytecode.original_bytes(), raw);
461    }
462
463    #[test]
464    fn eip7702_from_address() {
465        let address = Address::new([0x01; 20]);
466        let bytecode = Bytecode::new_eip7702(address);
467        assert_eq!(bytecode.eip7702_address(), Some(address));
468        assert_eq!(
469            bytecode.original_bytes(),
470            bytes!("ef01000101010101010101010101010101010101010101")
471        );
472    }
473
474    #[test]
475    fn is_default() {
476        assert!(Bytecode::default().is_default());
477    }
478
479    #[test]
480    #[cfg(feature = "serde")]
481    fn is_default_after_serde() {
482        let bc = Bytecode::default();
483        let json = serde_json::to_string(&bc).unwrap();
484        let deser: Bytecode = serde_json::from_str(&json).unwrap();
485        assert!(deser.is_default());
486    }
487}