revm_bytecode/
eof.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
mod body;
mod decode_helpers;
mod header;
pub mod printer;
mod types_section;
pub mod verification;

pub use body::EofBody;
pub use header::EofHeader;
pub use types_section::TypesSection;
pub use verification::*;

use core::cmp::min;
use primitives::{b256, bytes, Bytes, B256};
use std::{fmt, vec, vec::Vec};

/// Hash of EF00 bytes that is used for EXTCODEHASH when called from legacy bytecode.
pub const EOF_MAGIC_HASH: B256 =
    b256!("9dbf3648db8210552e9c4f75c6a1c3057c0ca432043bd648be15fe7be05646f5");

/// EOF Magic in u16 form.
pub const EOF_MAGIC: u16 = 0xEF00;

/// EOF magic number in array form.
pub static EOF_MAGIC_BYTES: Bytes = bytes!("ef00");

/// EVM Object Format (EOF) container.
///
/// It consists of a header, body and the raw original bytes.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Eof {
    pub header: EofHeader,
    pub body: EofBody,
    pub raw: Bytes,
}

impl Default for Eof {
    fn default() -> Self {
        let body = EofBody {
            // types section with zero inputs, zero outputs and zero max stack size.
            types_section: vec![TypesSection::default()],
            // One code section with a STOP byte.
            code_section: vec![Bytes::from_static(&[0x00])],
            container_section: vec![],
            data_section: Bytes::new(),
            is_data_filled: true,
        };
        body.into_eof()
    }
}

impl Eof {
    pub fn validate(&self) -> Result<(), EofError> {
        validate_eof(self)
    }

    pub fn valitate_raw(bytes: Bytes) -> Result<Eof, EofError> {
        validate_raw_eof(bytes)
    }

    pub fn validate_mode(&self, mode: CodeType) -> Result<(), EofError> {
        validate_eof_inner(self, Some(mode))
    }

    /// Creates a new EOF container from the given body.
    pub fn new(body: EofBody) -> Self {
        body.into_eof()
    }

    /// Returns len of the header and body in bytes.
    pub fn size(&self) -> usize {
        self.header.size() + self.header.body_size()
    }

    /// Return raw EOF bytes.
    pub fn raw(&self) -> &Bytes {
        &self.raw
    }

    /// Returns a slice of the raw bytes.
    /// If offset is greater than the length of the raw bytes, an empty slice is returned.
    /// If len is greater than the length of the raw bytes, the slice is truncated to the length of the raw bytes.
    pub fn data_slice(&self, offset: usize, len: usize) -> &[u8] {
        self.body
            .data_section
            .get(offset..)
            .and_then(|bytes| bytes.get(..min(len, bytes.len())))
            .unwrap_or(&[])
    }

    /// Returns a slice of the data section.
    pub fn data(&self) -> &[u8] {
        &self.body.data_section
    }

    /// Slow encode EOF bytes.
    pub fn encode_slow(&self) -> Bytes {
        let mut buffer: Vec<u8> = Vec::with_capacity(self.size());
        self.header.encode(&mut buffer);
        self.body.encode(&mut buffer);
        buffer.into()
    }

    /// Decode EOF that have additional dangling bytes.
    /// Assume that data section is fully filled.
    pub fn decode_dangling(mut raw: Bytes) -> Result<(Self, Bytes), EofDecodeError> {
        let (header, _) = EofHeader::decode(&raw)?;
        let eof_size = header.body_size() + header.size();
        if eof_size > raw.len() {
            return Err(EofDecodeError::MissingInput);
        }
        let dangling_data = raw.split_off(eof_size);
        let body = EofBody::decode(&raw, &header)?;
        Ok((Self { header, body, raw }, dangling_data))
    }

    /// Decode EOF from raw bytes.
    pub fn decode(raw: Bytes) -> Result<Self, EofDecodeError> {
        let (header, _) = EofHeader::decode(&raw)?;
        let body = EofBody::decode(&raw, &header)?;
        Ok(Self { header, body, raw })
    }
}

/// EOF decode errors.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum EofDecodeError {
    /// Short input while processing EOF.
    MissingInput,
    /// Short body while processing EOF.
    MissingBodyWithoutData,
    /// Body size is more than specified in the header.
    DanglingData,
    /// Invalid types section data.
    InvalidTypesSection,
    /// Invalid types section size.
    InvalidTypesSectionSize,
    /// Invalid EOF magic number.
    InvalidEOFMagicNumber,
    /// Invalid EOF version.
    InvalidEOFVersion,
    /// Invalid number for types kind
    InvalidTypesKind,
    /// Invalid number for code kind
    InvalidCodeKind,
    /// Invalid terminal code
    InvalidTerminalByte,
    /// Invalid data kind
    InvalidDataKind,
    /// Invalid kind after code
    InvalidKindAfterCode,
    /// Mismatch of code and types sizes.
    MismatchCodeAndTypesSize,
    /// There should be at least one size.
    NonSizes,
    /// Missing size.
    ShortInputForSizes,
    /// Size cant be zero
    ZeroSize,
    /// Invalid code number.
    TooManyCodeSections,
    /// Invalid number of code sections.
    ZeroCodeSections,
    /// Invalid container number.
    TooManyContainerSections,
    /// Invalid initcode size.
    InvalidEOFSize,
}

impl fmt::Display for EofDecodeError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let s = match self {
            Self::MissingInput => "Short input while processing EOF",
            Self::MissingBodyWithoutData => "Short body while processing EOF",
            Self::DanglingData => "Body size is more than specified in the header",
            Self::InvalidTypesSection => "Invalid types section data",
            Self::InvalidTypesSectionSize => "Invalid types section size",
            Self::InvalidEOFMagicNumber => "Invalid EOF magic number",
            Self::InvalidEOFVersion => "Invalid EOF version",
            Self::InvalidTypesKind => "Invalid number for types kind",
            Self::InvalidCodeKind => "Invalid number for code kind",
            Self::InvalidTerminalByte => "Invalid terminal code",
            Self::InvalidDataKind => "Invalid data kind",
            Self::InvalidKindAfterCode => "Invalid kind after code",
            Self::MismatchCodeAndTypesSize => "Mismatch of code and types sizes",
            Self::NonSizes => "There should be at least one size",
            Self::ShortInputForSizes => "Missing size",
            Self::ZeroSize => "Size cant be zero",
            Self::TooManyCodeSections => "Invalid code number",
            Self::ZeroCodeSections => "Invalid number of code sections",
            Self::TooManyContainerSections => "Invalid container number",
            Self::InvalidEOFSize => "Invalid initcode size",
        };
        f.write_str(s)
    }
}

impl core::error::Error for EofDecodeError {}

#[cfg(test)]
mod test {

    use super::*;
    use primitives::bytes;

    #[test]
    fn decode_eof() {
        let bytes = bytes!("ef000101000402000100010400000000800000fe");
        let eof = Eof::decode(bytes.clone()).unwrap();
        assert_eq!(bytes, eof.encode_slow());
    }

    #[test]
    fn decode_eof_dangling() {
        let test_cases = [
            (
                bytes!("ef000101000402000100010400000000800000fe"),
                bytes!("010203"),
                false,
            ),
            (
                bytes!("ef000101000402000100010400000000800000fe"),
                bytes!(""),
                false,
            ),
            (
                bytes!("ef000101000402000100010400000000800000"),
                bytes!(""),
                true,
            ),
        ];

        for (eof_bytes, dangling_data, is_err) in test_cases {
            let mut raw = eof_bytes.to_vec();
            raw.extend(&dangling_data);
            let raw = Bytes::from(raw);

            let result = Eof::decode_dangling(raw.clone());
            assert_eq!(result.is_err(), is_err);
            if is_err {
                continue;
            }
            let (decoded_eof, decoded_dangling) = result.unwrap();
            assert_eq!(eof_bytes, decoded_eof.encode_slow());
            assert_eq!(decoded_dangling, dangling_data);
        }
    }

    #[test]
    fn data_slice() {
        let bytes = bytes!("ef000101000402000100010400000000800000fe");
        let mut eof = Eof::decode(bytes.clone()).unwrap();
        eof.body.data_section = bytes!("01020304");
        assert_eq!(eof.data_slice(0, 1), &[0x01]);
        assert_eq!(eof.data_slice(0, 4), &[0x01, 0x02, 0x03, 0x04]);
        assert_eq!(eof.data_slice(0, 5), &[0x01, 0x02, 0x03, 0x04]);
        assert_eq!(eof.data_slice(1, 2), &[0x02, 0x03]);

        const EMPTY: &[u8] = &[];
        assert_eq!(eof.data_slice(10, 2), EMPTY);
        assert_eq!(eof.data_slice(1, 0), EMPTY);
        assert_eq!(eof.data_slice(10, 0), EMPTY);
    }
}