revme/cmd/statetest/
runner.rs

1use crate::cmd::statetest::merkle_trie::{compute_test_roots, TestValidationResult};
2use indicatif::{ProgressBar, ProgressDrawTarget};
3use revm::{
4    context::{block::BlockEnv, cfg::CfgEnv, tx::TxEnv},
5    context_interface::{
6        result::{EVMError, ExecutionResult, HaltReason, InvalidTransaction},
7        Cfg,
8    },
9    database,
10    database_interface::EmptyDB,
11    inspector::{inspectors::TracerEip3155, InspectCommitEvm},
12    primitives::U256,
13    primitives::{hardfork::SpecId, Bytes, B256},
14    Context, ExecuteCommitEvm, MainBuilder, MainContext,
15};
16use serde_json::json;
17use statetest_types::{SpecName, Test, TestSuite, TestUnit};
18use std::{
19    convert::Infallible,
20    fmt::Debug,
21    io::stderr,
22    path::{Path, PathBuf},
23    sync::{
24        atomic::{AtomicBool, AtomicUsize, Ordering},
25        Arc, Mutex,
26    },
27    time::{Duration, Instant},
28};
29use thiserror::Error;
30use walkdir::{DirEntry, WalkDir};
31
32/// Error that occurs during test execution
33#[derive(Debug, Error)]
34#[error("Path: {path}\nName: {name}\nError: {kind}")]
35pub struct TestError {
36    pub name: String,
37    pub path: String,
38    pub kind: TestErrorKind,
39}
40
41/// Specific kind of error that occurred during test execution
42#[derive(Debug, Error)]
43pub enum TestErrorKind {
44    #[error("logs root mismatch: got {got}, expected {expected}")]
45    LogsRootMismatch { got: B256, expected: B256 },
46    #[error("state root mismatch: got {got}, expected {expected}")]
47    StateRootMismatch { got: B256, expected: B256 },
48    #[error("unknown private key: {0:?}")]
49    UnknownPrivateKey(B256),
50    #[error("unexpected exception: got {got_exception:?}, expected {expected_exception:?}")]
51    UnexpectedException {
52        expected_exception: Option<String>,
53        got_exception: Option<String>,
54    },
55    #[error("unexpected output: got {got_output:?}, expected {expected_output:?}")]
56    UnexpectedOutput {
57        expected_output: Option<Bytes>,
58        got_output: Option<Bytes>,
59    },
60    #[error(transparent)]
61    SerdeDeserialize(#[from] serde_json::Error),
62    #[error("thread panicked")]
63    Panic,
64    #[error("path does not exist")]
65    InvalidPath,
66    #[error("no JSON test files found in path")]
67    NoJsonFiles,
68}
69
70/// Find all JSON test files in the given path
71/// If path is a file, returns it in a vector
72/// If path is a directory, recursively finds all .json files
73pub fn find_all_json_tests(path: &Path) -> Vec<PathBuf> {
74    if path.is_file() {
75        vec![path.to_path_buf()]
76    } else {
77        WalkDir::new(path)
78            .into_iter()
79            .filter_map(Result::ok)
80            .filter(|e| e.path().extension() == Some("json".as_ref()))
81            .map(DirEntry::into_path)
82            .collect()
83    }
84}
85
86/// Check if a test should be skipped based on its filename
87/// Some tests are known to be problematic or take too long
88fn skip_test(path: &Path) -> bool {
89    let path_str = path.to_str().unwrap_or_default();
90
91    // Skip tets that have storage for newly created account.
92    if path_str.contains("paris/eip7610_create_collision") {
93        return true;
94    }
95
96    let name = path.file_name().unwrap().to_str().unwrap_or_default();
97
98    matches!(
99        name,
100        // Test check if gas price overflows, we handle this correctly but does not match tests specific exception.
101        | "CreateTransactionHighNonce.json"
102
103        // Test with some storage check.
104        | "RevertInCreateInInit_Paris.json"
105        | "RevertInCreateInInit.json"
106        | "dynamicAccountOverwriteEmpty.json"
107        | "dynamicAccountOverwriteEmpty_Paris.json"
108        | "RevertInCreateInInitCreate2Paris.json"
109        | "create2collisionStorage.json"
110        | "RevertInCreateInInitCreate2.json"
111        | "create2collisionStorageParis.json"
112        | "InitCollision.json"
113        | "InitCollisionParis.json"
114        | "test_init_collision_create_opcode.json"
115
116        // Malformed value.
117        | "ValueOverflow.json"
118        | "ValueOverflowParis.json"
119
120        // These tests are passing, but they take a lot of time to execute so we are going to skip them.
121        | "Call50000_sha256.json"
122        | "static_Call50000_sha256.json"
123        | "loopMul.json"
124        | "CALLBlake2f_MaxRounds.json"
125    )
126}
127
128struct TestExecutionContext<'a> {
129    name: &'a str,
130    unit: &'a TestUnit,
131    test: &'a Test,
132    cfg: &'a CfgEnv,
133    block: &'a BlockEnv,
134    tx: &'a TxEnv,
135    cache_state: &'a database::CacheState,
136    elapsed: &'a Arc<Mutex<Duration>>,
137    trace: bool,
138    print_json_outcome: bool,
139}
140
141struct DebugContext<'a> {
142    name: &'a str,
143    path: &'a str,
144    index: usize,
145    test: &'a Test,
146    cfg: &'a CfgEnv,
147    block: &'a BlockEnv,
148    tx: &'a TxEnv,
149    cache_state: &'a database::CacheState,
150    error: &'a TestErrorKind,
151}
152
153fn build_json_output(
154    test: &Test,
155    test_name: &str,
156    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
157    validation: &TestValidationResult,
158    spec: SpecId,
159    error: Option<String>,
160) -> serde_json::Value {
161    json!({
162        "stateRoot": validation.state_root,
163        "logsRoot": validation.logs_root,
164        "output": exec_result.as_ref().ok().and_then(|r| r.output().cloned()).unwrap_or_default(),
165        "gasUsed": exec_result.as_ref().ok().map(|r| r.gas_used()).unwrap_or_default(),
166        "pass": error.is_none(),
167        "errorMsg": error.unwrap_or_default(),
168        "evmResult": format_evm_result(exec_result),
169        "postLogsHash": validation.logs_root,
170        "fork": spec,
171        "test": test_name,
172        "d": test.indexes.data,
173        "g": test.indexes.gas,
174        "v": test.indexes.value,
175    })
176}
177
178fn format_evm_result(
179    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
180) -> String {
181    match exec_result {
182        Ok(r) => match r {
183            ExecutionResult::Success { reason, .. } => format!("Success: {reason:?}"),
184            ExecutionResult::Revert { .. } => "Revert".to_string(),
185            ExecutionResult::Halt { reason, .. } => format!("Halt: {reason:?}"),
186        },
187        Err(e) => e.to_string(),
188    }
189}
190
191fn validate_exception(
192    test: &Test,
193    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
194) -> Result<bool, TestErrorKind> {
195    match (&test.expect_exception, exec_result) {
196        (None, Ok(_)) => Ok(false), // No exception expected, execution succeeded
197        (Some(_), Err(_)) => Ok(true), // Exception expected and occurred
198        _ => Err(TestErrorKind::UnexpectedException {
199            expected_exception: test.expect_exception.clone(),
200            got_exception: exec_result.as_ref().err().map(|e| e.to_string()),
201        }),
202    }
203}
204
205fn validate_output(
206    expected_output: Option<&Bytes>,
207    actual_result: &ExecutionResult<HaltReason>,
208) -> Result<(), TestErrorKind> {
209    if let Some((expected, actual)) = expected_output.zip(actual_result.output()) {
210        if expected != actual {
211            return Err(TestErrorKind::UnexpectedOutput {
212                expected_output: Some(expected.clone()),
213                got_output: actual_result.output().cloned(),
214            });
215        }
216    }
217    Ok(())
218}
219
220fn check_evm_execution(
221    test: &Test,
222    expected_output: Option<&Bytes>,
223    test_name: &str,
224    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
225    db: &mut database::State<EmptyDB>,
226    spec: SpecId,
227    print_json_outcome: bool,
228) -> Result<(), TestErrorKind> {
229    let validation = compute_test_roots(exec_result, db);
230
231    let print_json = |error: Option<&TestErrorKind>| {
232        if print_json_outcome {
233            let json = build_json_output(
234                test,
235                test_name,
236                exec_result,
237                &validation,
238                spec,
239                error.map(|e| e.to_string()),
240            );
241            eprintln!("{json}");
242        }
243    };
244
245    // Check if exception handling is correct
246    let exception_expected = validate_exception(test, exec_result).inspect_err(|e| {
247        print_json(Some(e));
248    })?;
249
250    // If exception was expected and occurred, we're done
251    if exception_expected {
252        print_json(None);
253        return Ok(());
254    }
255
256    // Validate output if execution succeeded
257    if let Ok(result) = exec_result {
258        validate_output(expected_output, result).inspect_err(|e| {
259            print_json(Some(e));
260        })?;
261    }
262
263    // Validate logs root
264    if validation.logs_root != test.logs {
265        let error = TestErrorKind::LogsRootMismatch {
266            got: validation.logs_root,
267            expected: test.logs,
268        };
269        print_json(Some(&error));
270        return Err(error);
271    }
272
273    // Validate state root
274    if validation.state_root != test.hash {
275        let error = TestErrorKind::StateRootMismatch {
276            got: validation.state_root,
277            expected: test.hash,
278        };
279        print_json(Some(&error));
280        return Err(error);
281    }
282
283    print_json(None);
284    Ok(())
285}
286
287/// Execute a single test suite file containing multiple tests
288///
289/// # Arguments
290/// * `path` - Path to the JSON test file
291/// * `elapsed` - Shared counter for total execution time
292/// * `trace` - Whether to enable EVM tracing
293/// * `print_json_outcome` - Whether to print JSON formatted results
294pub fn execute_test_suite(
295    path: &Path,
296    elapsed: &Arc<Mutex<Duration>>,
297    trace: bool,
298    print_json_outcome: bool,
299) -> Result<(), TestError> {
300    if skip_test(path) {
301        return Ok(());
302    }
303
304    let s = std::fs::read_to_string(path).unwrap();
305    let path = path.to_string_lossy().into_owned();
306    let suite: TestSuite = serde_json::from_str(&s).map_err(|e| TestError {
307        name: "Unknown".to_string(),
308        path: path.clone(),
309        kind: e.into(),
310    })?;
311
312    for (name, unit) in suite.0 {
313        // Prepare initial state
314        let cache_state = unit.state();
315
316        // Setup base configuration
317        let mut cfg = CfgEnv::default();
318        cfg.chain_id = unit
319            .env
320            .current_chain_id
321            .unwrap_or(U256::ONE)
322            .try_into()
323            .unwrap_or(1);
324
325        // Post and execution
326        for (spec_name, tests) in &unit.post {
327            // Skip Constantinople spec
328            if *spec_name == SpecName::Constantinople {
329                continue;
330            }
331
332            cfg.spec = spec_name.to_spec_id();
333
334            // Configure max blobs per spec
335            if cfg.spec.is_enabled_in(SpecId::OSAKA) {
336                cfg.set_max_blobs_per_tx(6);
337            } else if cfg.spec.is_enabled_in(SpecId::PRAGUE) {
338                cfg.set_max_blobs_per_tx(9);
339            } else {
340                cfg.set_max_blobs_per_tx(6);
341            }
342
343            // Setup block environment for this spec
344            let block = unit.block_env(&cfg);
345
346            for (index, test) in tests.iter().enumerate() {
347                // Setup transaction environment
348                let tx = match test.tx_env(&unit) {
349                    Ok(tx) => tx,
350                    Err(_) if test.expect_exception.is_some() => continue,
351                    Err(_) => {
352                        return Err(TestError {
353                            name: name.clone(),
354                            path: path.clone(),
355                            kind: TestErrorKind::UnknownPrivateKey(unit.transaction.secret_key),
356                        });
357                    }
358                };
359
360                // Execute the test
361                let result = execute_single_test(TestExecutionContext {
362                    name: &name,
363                    unit: &unit,
364                    test,
365                    cfg: &cfg,
366                    block: &block,
367                    tx: &tx,
368                    cache_state: &cache_state,
369                    elapsed,
370                    trace,
371                    print_json_outcome,
372                });
373
374                if let Err(e) = result {
375                    // Handle error with debug trace if needed
376                    static FAILED: AtomicBool = AtomicBool::new(false);
377                    if print_json_outcome || FAILED.swap(true, Ordering::SeqCst) {
378                        return Err(TestError {
379                            name: name.clone(),
380                            path: path.clone(),
381                            kind: e,
382                        });
383                    }
384
385                    // Re-run with trace for debugging
386                    debug_failed_test(DebugContext {
387                        name: &name,
388                        path: &path,
389                        index,
390                        test,
391                        cfg: &cfg,
392                        block: &block,
393                        tx: &tx,
394                        cache_state: &cache_state,
395                        error: &e,
396                    });
397
398                    return Err(TestError {
399                        path: path.clone(),
400                        name: name.clone(),
401                        kind: e,
402                    });
403                }
404            }
405        }
406    }
407    Ok(())
408}
409
410fn execute_single_test(ctx: TestExecutionContext) -> Result<(), TestErrorKind> {
411    // Prepare state
412    let mut cache = ctx.cache_state.clone();
413    cache.set_state_clear_flag(ctx.cfg.spec.is_enabled_in(SpecId::SPURIOUS_DRAGON));
414    let mut state = database::State::builder()
415        .with_cached_prestate(cache)
416        .with_bundle_update()
417        .build();
418
419    let evm_context = Context::mainnet()
420        .with_block(ctx.block)
421        .with_tx(ctx.tx)
422        .with_cfg(ctx.cfg)
423        .with_db(&mut state);
424
425    // Execute
426    let timer = Instant::now();
427    let (db, exec_result) = if ctx.trace {
428        let mut evm = evm_context
429            .build_mainnet_with_inspector(TracerEip3155::buffered(stderr()).without_summary());
430        let res = evm.inspect_tx_commit(ctx.tx);
431        let db = evm.ctx.journaled_state.database;
432        (db, res)
433    } else {
434        let mut evm = evm_context.build_mainnet();
435        let res = evm.transact_commit(ctx.tx);
436        let db = evm.ctx.journaled_state.database;
437        (db, res)
438    };
439    *ctx.elapsed.lock().unwrap() += timer.elapsed();
440
441    // Check results
442    check_evm_execution(
443        ctx.test,
444        ctx.unit.out.as_ref(),
445        ctx.name,
446        &exec_result,
447        db,
448        ctx.cfg.spec(),
449        ctx.print_json_outcome,
450    )
451}
452
453fn debug_failed_test(ctx: DebugContext) {
454    println!("\nTraces:");
455
456    // Re-run with tracing
457    let mut cache = ctx.cache_state.clone();
458    cache.set_state_clear_flag(ctx.cfg.spec.is_enabled_in(SpecId::SPURIOUS_DRAGON));
459    let mut state = database::State::builder()
460        .with_cached_prestate(cache)
461        .with_bundle_update()
462        .build();
463
464    let mut evm = Context::mainnet()
465        .with_db(&mut state)
466        .with_block(ctx.block)
467        .with_tx(ctx.tx)
468        .with_cfg(ctx.cfg)
469        .build_mainnet_with_inspector(TracerEip3155::buffered(stderr()).without_summary());
470
471    let exec_result = evm.inspect_tx_commit(ctx.tx);
472
473    println!("\nExecution result: {exec_result:#?}");
474    println!("\nExpected exception: {:?}", ctx.test.expect_exception);
475    println!("\nState before:\n{}", ctx.cache_state.pretty_print());
476    println!(
477        "\nState after:\n{}",
478        evm.ctx.journaled_state.database.cache.pretty_print()
479    );
480    println!("\nSpecification: {:?}", ctx.cfg.spec);
481    println!("\nTx: {:#?}", ctx.tx);
482    println!("Block: {:#?}", ctx.block);
483    println!("Cfg: {:#?}", ctx.cfg);
484    println!(
485        "\nTest name: {:?} (index: {}, path: {:?}) failed:\n{}",
486        ctx.name, ctx.index, ctx.path, ctx.error
487    );
488}
489
490#[derive(Clone, Copy)]
491struct TestRunnerConfig {
492    single_thread: bool,
493    trace: bool,
494    print_outcome: bool,
495    keep_going: bool,
496}
497
498impl TestRunnerConfig {
499    fn new(single_thread: bool, trace: bool, print_outcome: bool, keep_going: bool) -> Self {
500        // Trace implies print_outcome
501        let print_outcome = print_outcome || trace;
502        // print_outcome or trace implies single_thread
503        let single_thread = single_thread || print_outcome;
504
505        Self {
506            single_thread,
507            trace,
508            print_outcome,
509            keep_going,
510        }
511    }
512}
513
514#[derive(Clone)]
515struct TestRunnerState {
516    n_errors: Arc<AtomicUsize>,
517    console_bar: Arc<ProgressBar>,
518    queue: Arc<Mutex<(usize, Vec<PathBuf>)>>,
519    elapsed: Arc<Mutex<Duration>>,
520}
521
522impl TestRunnerState {
523    fn new(test_files: Vec<PathBuf>) -> Self {
524        let n_files = test_files.len();
525        Self {
526            n_errors: Arc::new(AtomicUsize::new(0)),
527            console_bar: Arc::new(ProgressBar::with_draw_target(
528                Some(n_files as u64),
529                ProgressDrawTarget::stdout(),
530            )),
531            queue: Arc::new(Mutex::new((0usize, test_files))),
532            elapsed: Arc::new(Mutex::new(Duration::ZERO)),
533        }
534    }
535
536    fn next_test(&self) -> Option<PathBuf> {
537        let (current_idx, queue) = &mut *self.queue.lock().unwrap();
538        let idx = *current_idx;
539        let test_path = queue.get(idx).cloned()?;
540        *current_idx = idx + 1;
541        Some(test_path)
542    }
543}
544
545fn run_test_worker(state: TestRunnerState, config: TestRunnerConfig) -> Result<(), TestError> {
546    loop {
547        if !config.keep_going && state.n_errors.load(Ordering::SeqCst) > 0 {
548            return Ok(());
549        }
550
551        let Some(test_path) = state.next_test() else {
552            return Ok(());
553        };
554
555        let result = execute_test_suite(
556            &test_path,
557            &state.elapsed,
558            config.trace,
559            config.print_outcome,
560        );
561
562        state.console_bar.inc(1);
563
564        if let Err(err) = result {
565            state.n_errors.fetch_add(1, Ordering::SeqCst);
566            if !config.keep_going {
567                return Err(err);
568            }
569        }
570    }
571}
572
573fn determine_thread_count(single_thread: bool, n_files: usize) -> usize {
574    match (single_thread, std::thread::available_parallelism()) {
575        (true, _) | (false, Err(_)) => 1,
576        (false, Ok(n)) => n.get().min(n_files),
577    }
578}
579
580/// Run all test files in parallel or single-threaded mode
581///
582/// # Arguments
583/// * `test_files` - List of test files to execute
584/// * `single_thread` - Force single-threaded execution
585/// * `trace` - Enable EVM execution tracing
586/// * `print_outcome` - Print test outcomes in JSON format
587/// * `keep_going` - Continue running tests even if some fail
588pub fn run(
589    test_files: Vec<PathBuf>,
590    single_thread: bool,
591    trace: bool,
592    print_outcome: bool,
593    keep_going: bool,
594) -> Result<(), TestError> {
595    let config = TestRunnerConfig::new(single_thread, trace, print_outcome, keep_going);
596    let n_files = test_files.len();
597    let state = TestRunnerState::new(test_files);
598    let num_threads = determine_thread_count(config.single_thread, n_files);
599
600    // Spawn worker threads
601    let mut handles = Vec::with_capacity(num_threads);
602    for i in 0..num_threads {
603        let state = state.clone();
604
605        let thread = std::thread::Builder::new()
606            .name(format!("runner-{i}"))
607            .spawn(move || run_test_worker(state, config))
608            .unwrap();
609
610        handles.push(thread);
611    }
612
613    // Collect results from all threads
614    let mut thread_errors = Vec::new();
615    for (i, handle) in handles.into_iter().enumerate() {
616        match handle.join() {
617            Ok(Ok(())) => {}
618            Ok(Err(e)) => thread_errors.push(e),
619            Err(_) => thread_errors.push(TestError {
620                name: format!("thread {i} panicked"),
621                path: String::new(),
622                kind: TestErrorKind::Panic,
623            }),
624        }
625    }
626
627    state.console_bar.finish();
628
629    // Print summary
630    println!(
631        "Finished execution. Total CPU time: {:.6}s",
632        state.elapsed.lock().unwrap().as_secs_f64()
633    );
634
635    let n_errors = state.n_errors.load(Ordering::SeqCst);
636    let n_thread_errors = thread_errors.len();
637
638    if n_errors == 0 && n_thread_errors == 0 {
639        println!("All tests passed!");
640        Ok(())
641    } else {
642        println!("Encountered {n_errors} errors out of {n_files} total tests");
643
644        if n_thread_errors == 0 {
645            std::process::exit(1);
646        }
647
648        if n_thread_errors > 1 {
649            println!("{n_thread_errors} threads returned an error, out of {num_threads} total:");
650            for error in &thread_errors {
651                println!("{error}");
652            }
653        }
654        Err(thread_errors.swap_remove(0))
655    }
656}