revme/cmd/statetest/
runner.rs

1use crate::cmd::statetest::merkle_trie::{compute_test_roots, TestValidationResult};
2use database::State;
3use indicatif::{ProgressBar, ProgressDrawTarget};
4use inspector::{inspectors::TracerEip3155, InspectCommitEvm};
5use primitives::U256;
6use revm::{
7    context::{block::BlockEnv, cfg::CfgEnv, tx::TxEnv},
8    context_interface::{
9        result::{EVMError, ExecutionResult, HaltReason, InvalidTransaction},
10        Cfg,
11    },
12    database_interface::EmptyDB,
13    primitives::{hardfork::SpecId, Bytes, B256},
14    Context, ExecuteCommitEvm, MainBuilder, MainContext,
15};
16use serde_json::json;
17use statetest_types::{SpecName, Test, TestSuite, TestUnit};
18use std::{
19    convert::Infallible,
20    fmt::Debug,
21    io::stderr,
22    path::{Path, PathBuf},
23    sync::{
24        atomic::{AtomicBool, AtomicUsize, Ordering},
25        Arc, Mutex,
26    },
27    time::{Duration, Instant},
28};
29use thiserror::Error;
30use walkdir::{DirEntry, WalkDir};
31
32/// Error that occurs during test execution
33#[derive(Debug, Error)]
34#[error("Path: {path}\nName: {name}\nError: {kind}")]
35pub struct TestError {
36    pub name: String,
37    pub path: String,
38    pub kind: TestErrorKind,
39}
40
41/// Specific kind of error that occurred during test execution
42#[derive(Debug, Error)]
43pub enum TestErrorKind {
44    #[error("logs root mismatch: got {got}, expected {expected}")]
45    LogsRootMismatch { got: B256, expected: B256 },
46    #[error("state root mismatch: got {got}, expected {expected}")]
47    StateRootMismatch { got: B256, expected: B256 },
48    #[error("unknown private key: {0:?}")]
49    UnknownPrivateKey(B256),
50    #[error("unexpected exception: got {got_exception:?}, expected {expected_exception:?}")]
51    UnexpectedException {
52        expected_exception: Option<String>,
53        got_exception: Option<String>,
54    },
55    #[error("unexpected output: got {got_output:?}, expected {expected_output:?}")]
56    UnexpectedOutput {
57        expected_output: Option<Bytes>,
58        got_output: Option<Bytes>,
59    },
60    #[error(transparent)]
61    SerdeDeserialize(#[from] serde_json::Error),
62    #[error("thread panicked")]
63    Panic,
64    #[error("path does not exist")]
65    InvalidPath,
66    #[error("no JSON test files found in path")]
67    NoJsonFiles,
68}
69
70/// Find all JSON test files in the given path
71/// If path is a file, returns it in a vector
72/// If path is a directory, recursively finds all .json files
73pub fn find_all_json_tests(path: &Path) -> Vec<PathBuf> {
74    if path.is_file() {
75        vec![path.to_path_buf()]
76    } else {
77        WalkDir::new(path)
78            .into_iter()
79            .filter_map(Result::ok)
80            .filter(|e| e.path().extension() == Some("json".as_ref()))
81            .map(DirEntry::into_path)
82            .collect()
83    }
84}
85
86/// Check if a test should be skipped based on its filename
87/// Some tests are known to be problematic or take too long
88fn skip_test(path: &Path) -> bool {
89    let Some(name) = path.file_name().and_then(|s| s.to_str()) else {
90        // Non-UTF file names or missing file name: do not skip by default.
91        return false;
92    };
93
94    matches!(
95        name,
96        // Test check if gas price overflows, we handle this correctly but does not match tests specific exception.
97        | "CreateTransactionHighNonce.json"
98
99        // Test with some storage check.
100        | "RevertInCreateInInit_Paris.json"
101        | "RevertInCreateInInit.json"
102        | "dynamicAccountOverwriteEmpty.json"
103        | "dynamicAccountOverwriteEmpty_Paris.json"
104        | "RevertInCreateInInitCreate2Paris.json"
105        | "create2collisionStorage.json"
106        | "RevertInCreateInInitCreate2.json"
107        | "create2collisionStorageParis.json"
108        | "InitCollision.json"
109        | "InitCollisionParis.json"
110
111        // Malformed value.
112        | "ValueOverflow.json"
113        | "ValueOverflowParis.json"
114
115        // These tests are passing, but they take a lot of time to execute so we are going to skip them.
116        | "Call50000_sha256.json"
117        | "static_Call50000_sha256.json"
118        | "loopMul.json"
119        | "CALLBlake2f_MaxRounds.json"
120    )
121}
122
123struct TestExecutionContext<'a> {
124    name: &'a str,
125    unit: &'a TestUnit,
126    test: &'a Test,
127    cfg: &'a CfgEnv,
128    block: &'a BlockEnv,
129    tx: &'a TxEnv,
130    cache_state: &'a database::CacheState,
131    elapsed: &'a Arc<Mutex<Duration>>,
132    trace: bool,
133    print_json_outcome: bool,
134}
135
136struct DebugContext<'a> {
137    name: &'a str,
138    path: &'a str,
139    index: usize,
140    test: &'a Test,
141    cfg: &'a CfgEnv,
142    block: &'a BlockEnv,
143    tx: &'a TxEnv,
144    cache_state: &'a database::CacheState,
145    error: &'a TestErrorKind,
146}
147
148fn build_json_output(
149    test: &Test,
150    test_name: &str,
151    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
152    validation: &TestValidationResult,
153    spec: SpecId,
154    error: Option<String>,
155) -> serde_json::Value {
156    json!({
157        "stateRoot": validation.state_root,
158        "logsRoot": validation.logs_root,
159        "output": exec_result.as_ref().ok().and_then(|r| r.output().cloned()).unwrap_or_default(),
160        "gasUsed": exec_result.as_ref().ok().map(|r| r.gas_used()).unwrap_or_default(),
161        "pass": error.is_none(),
162        "errorMsg": error.unwrap_or_default(),
163        "evmResult": format_evm_result(exec_result),
164        "postLogsHash": validation.logs_root,
165        "fork": spec,
166        "test": test_name,
167        "d": test.indexes.data,
168        "g": test.indexes.gas,
169        "v": test.indexes.value,
170    })
171}
172
173fn format_evm_result(
174    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
175) -> String {
176    match exec_result {
177        Ok(r) => match r {
178            ExecutionResult::Success { reason, .. } => format!("Success: {reason:?}"),
179            ExecutionResult::Revert { .. } => "Revert".to_string(),
180            ExecutionResult::Halt { reason, .. } => format!("Halt: {reason:?}"),
181        },
182        Err(e) => e.to_string(),
183    }
184}
185
186fn validate_exception(
187    test: &Test,
188    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
189) -> Result<bool, TestErrorKind> {
190    match (&test.expect_exception, exec_result) {
191        (None, Ok(_)) => Ok(false), // No exception expected, execution succeeded
192        (Some(_), Err(_)) => Ok(true), // Exception expected and occurred
193        _ => Err(TestErrorKind::UnexpectedException {
194            expected_exception: test.expect_exception.clone(),
195            got_exception: exec_result.as_ref().err().map(|e| e.to_string()),
196        }),
197    }
198}
199
200fn validate_output(
201    expected_output: Option<&Bytes>,
202    actual_result: &ExecutionResult<HaltReason>,
203) -> Result<(), TestErrorKind> {
204    if let Some((expected, actual)) = expected_output.zip(actual_result.output()) {
205        if expected != actual {
206            return Err(TestErrorKind::UnexpectedOutput {
207                expected_output: Some(expected.clone()),
208                got_output: actual_result.output().cloned(),
209            });
210        }
211    }
212    Ok(())
213}
214
215fn check_evm_execution(
216    test: &Test,
217    expected_output: Option<&Bytes>,
218    test_name: &str,
219    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
220    db: &mut State<EmptyDB>,
221    spec: SpecId,
222    print_json_outcome: bool,
223) -> Result<(), TestErrorKind> {
224    let validation = compute_test_roots(exec_result, db);
225
226    let print_json = |error: Option<&TestErrorKind>| {
227        if print_json_outcome {
228            let json = build_json_output(
229                test,
230                test_name,
231                exec_result,
232                &validation,
233                spec,
234                error.map(|e| e.to_string()),
235            );
236            eprintln!("{json}");
237        }
238    };
239
240    // Check if exception handling is correct
241    let exception_expected = validate_exception(test, exec_result).inspect_err(|e| {
242        print_json(Some(e));
243    })?;
244
245    // If exception was expected and occurred, we're done
246    if exception_expected {
247        print_json(None);
248        return Ok(());
249    }
250
251    // Validate output if execution succeeded
252    if let Ok(result) = exec_result {
253        validate_output(expected_output, result).inspect_err(|e| {
254            print_json(Some(e));
255        })?;
256    }
257
258    // Validate logs root
259    if validation.logs_root != test.logs {
260        let error = TestErrorKind::LogsRootMismatch {
261            got: validation.logs_root,
262            expected: test.logs,
263        };
264        print_json(Some(&error));
265        return Err(error);
266    }
267
268    // Validate state root
269    if validation.state_root != test.hash {
270        let error = TestErrorKind::StateRootMismatch {
271            got: validation.state_root,
272            expected: test.hash,
273        };
274        print_json(Some(&error));
275        return Err(error);
276    }
277
278    print_json(None);
279    Ok(())
280}
281
282/// Execute a single test suite file containing multiple tests
283///
284/// # Arguments
285/// * `path` - Path to the JSON test file
286/// * `elapsed` - Shared counter for total execution time
287/// * `trace` - Whether to enable EVM tracing
288/// * `print_json_outcome` - Whether to print JSON formatted results
289pub fn execute_test_suite(
290    path: &Path,
291    elapsed: &Arc<Mutex<Duration>>,
292    trace: bool,
293    print_json_outcome: bool,
294) -> Result<(), TestError> {
295    if skip_test(path) {
296        return Ok(());
297    }
298
299    let s = std::fs::read_to_string(path).unwrap();
300    let path = path.to_string_lossy().into_owned();
301    let suite: TestSuite = serde_json::from_str(&s).map_err(|e| TestError {
302        name: "Unknown".to_string(),
303        path: path.clone(),
304        kind: e.into(),
305    })?;
306
307    for (name, unit) in suite.0 {
308        // Prepare initial state
309        let cache_state = unit.state();
310
311        // Setup base configuration
312        let mut cfg = CfgEnv::default();
313        cfg.chain_id = unit
314            .env
315            .current_chain_id
316            .unwrap_or(U256::ONE)
317            .try_into()
318            .unwrap_or(1);
319
320        // Post and execution
321        for (spec_name, tests) in &unit.post {
322            // Skip Constantinople spec
323            if *spec_name == SpecName::Constantinople {
324                continue;
325            }
326
327            cfg.spec = spec_name.to_spec_id();
328
329            // Configure max blobs per spec
330            if cfg.spec.is_enabled_in(SpecId::OSAKA) {
331                cfg.set_max_blobs_per_tx(6);
332            } else if cfg.spec.is_enabled_in(SpecId::PRAGUE) {
333                cfg.set_max_blobs_per_tx(9);
334            } else {
335                cfg.set_max_blobs_per_tx(6);
336            }
337
338            // Setup block environment for this spec
339            let block = unit.block_env(&cfg);
340
341            for (index, test) in tests.iter().enumerate() {
342                // Setup transaction environment
343                let tx = match test.tx_env(&unit) {
344                    Ok(tx) => tx,
345                    Err(_) if test.expect_exception.is_some() => continue,
346                    Err(_) => {
347                        return Err(TestError {
348                            name: name.clone(),
349                            path: path.clone(),
350                            kind: TestErrorKind::UnknownPrivateKey(unit.transaction.secret_key),
351                        });
352                    }
353                };
354
355                // Execute the test
356                let result = execute_single_test(TestExecutionContext {
357                    name: &name,
358                    unit: &unit,
359                    test,
360                    cfg: &cfg,
361                    block: &block,
362                    tx: &tx,
363                    cache_state: &cache_state,
364                    elapsed,
365                    trace,
366                    print_json_outcome,
367                });
368
369                if let Err(e) = result {
370                    // Handle error with debug trace if needed
371                    static FAILED: AtomicBool = AtomicBool::new(false);
372                    if print_json_outcome || FAILED.swap(true, Ordering::SeqCst) {
373                        return Err(TestError {
374                            name: name.clone(),
375                            path: path.clone(),
376                            kind: e,
377                        });
378                    }
379
380                    // Re-run with trace for debugging
381                    debug_failed_test(DebugContext {
382                        name: &name,
383                        path: &path,
384                        index,
385                        test,
386                        cfg: &cfg,
387                        block: &block,
388                        tx: &tx,
389                        cache_state: &cache_state,
390                        error: &e,
391                    });
392
393                    return Err(TestError {
394                        path: path.clone(),
395                        name: name.clone(),
396                        kind: e,
397                    });
398                }
399            }
400        }
401    }
402    Ok(())
403}
404
405fn execute_single_test(ctx: TestExecutionContext) -> Result<(), TestErrorKind> {
406    // Prepare state
407    let mut cache = ctx.cache_state.clone();
408    cache.set_state_clear_flag(ctx.cfg.spec.is_enabled_in(SpecId::SPURIOUS_DRAGON));
409    let mut state = database::State::builder()
410        .with_cached_prestate(cache)
411        .with_bundle_update()
412        .build();
413
414    let evm_context = Context::mainnet()
415        .with_block(ctx.block)
416        .with_tx(ctx.tx)
417        .with_cfg(ctx.cfg)
418        .with_db(&mut state);
419
420    // Execute
421    let timer = Instant::now();
422    let (db, exec_result) = if ctx.trace {
423        let mut evm = evm_context
424            .build_mainnet_with_inspector(TracerEip3155::buffered(stderr()).without_summary());
425        let res = evm.inspect_tx_commit(ctx.tx);
426        let db = evm.ctx.journaled_state.database;
427        (db, res)
428    } else {
429        let mut evm = evm_context.build_mainnet();
430        let res = evm.transact_commit(ctx.tx);
431        let db = evm.ctx.journaled_state.database;
432        (db, res)
433    };
434    *ctx.elapsed.lock().unwrap() += timer.elapsed();
435
436    // Check results
437    check_evm_execution(
438        ctx.test,
439        ctx.unit.out.as_ref(),
440        ctx.name,
441        &exec_result,
442        db,
443        ctx.cfg.spec(),
444        ctx.print_json_outcome,
445    )
446}
447
448fn debug_failed_test(ctx: DebugContext) {
449    println!("\nTraces:");
450
451    // Re-run with tracing
452    let mut cache = ctx.cache_state.clone();
453    cache.set_state_clear_flag(ctx.cfg.spec.is_enabled_in(SpecId::SPURIOUS_DRAGON));
454    let mut state = database::State::builder()
455        .with_cached_prestate(cache)
456        .with_bundle_update()
457        .build();
458
459    let mut evm = Context::mainnet()
460        .with_db(&mut state)
461        .with_block(ctx.block)
462        .with_tx(ctx.tx)
463        .with_cfg(ctx.cfg)
464        .build_mainnet_with_inspector(TracerEip3155::buffered(stderr()).without_summary());
465
466    let exec_result = evm.inspect_tx_commit(ctx.tx);
467
468    println!("\nExecution result: {exec_result:#?}");
469    println!("\nExpected exception: {:?}", ctx.test.expect_exception);
470    println!("\nState before:\n{}", ctx.cache_state.pretty_print());
471    println!(
472        "\nState after:\n{}",
473        evm.ctx.journaled_state.database.cache.pretty_print()
474    );
475    println!("\nSpecification: {:?}", ctx.cfg.spec);
476    println!("\nTx: {:#?}", ctx.tx);
477    println!("Block: {:#?}", ctx.block);
478    println!("Cfg: {:#?}", ctx.cfg);
479    println!(
480        "\nTest name: {:?} (index: {}, path: {:?}) failed:\n{}",
481        ctx.name, ctx.index, ctx.path, ctx.error
482    );
483}
484
485#[derive(Clone, Copy)]
486struct TestRunnerConfig {
487    single_thread: bool,
488    trace: bool,
489    print_outcome: bool,
490    keep_going: bool,
491}
492
493impl TestRunnerConfig {
494    fn new(single_thread: bool, trace: bool, print_outcome: bool, keep_going: bool) -> Self {
495        // Trace implies print_outcome
496        let print_outcome = print_outcome || trace;
497        // print_outcome or trace implies single_thread
498        let single_thread = single_thread || print_outcome;
499
500        Self {
501            single_thread,
502            trace,
503            print_outcome,
504            keep_going,
505        }
506    }
507}
508
509#[derive(Clone)]
510struct TestRunnerState {
511    n_errors: Arc<AtomicUsize>,
512    console_bar: Arc<ProgressBar>,
513    queue: Arc<Mutex<(usize, Vec<PathBuf>)>>,
514    elapsed: Arc<Mutex<Duration>>,
515}
516
517impl TestRunnerState {
518    fn new(test_files: Vec<PathBuf>) -> Self {
519        let n_files = test_files.len();
520        Self {
521            n_errors: Arc::new(AtomicUsize::new(0)),
522            console_bar: Arc::new(ProgressBar::with_draw_target(
523                Some(n_files as u64),
524                ProgressDrawTarget::stdout(),
525            )),
526            queue: Arc::new(Mutex::new((0usize, test_files))),
527            elapsed: Arc::new(Mutex::new(Duration::ZERO)),
528        }
529    }
530
531    fn next_test(&self) -> Option<PathBuf> {
532        let (current_idx, queue) = &mut *self.queue.lock().unwrap();
533        let idx = *current_idx;
534        let test_path = queue.get(idx).cloned()?;
535        *current_idx = idx + 1;
536        Some(test_path)
537    }
538}
539
540fn run_test_worker(state: TestRunnerState, config: TestRunnerConfig) -> Result<(), TestError> {
541    loop {
542        if !config.keep_going && state.n_errors.load(Ordering::SeqCst) > 0 {
543            return Ok(());
544        }
545
546        let Some(test_path) = state.next_test() else {
547            return Ok(());
548        };
549
550        let result = execute_test_suite(
551            &test_path,
552            &state.elapsed,
553            config.trace,
554            config.print_outcome,
555        );
556
557        state.console_bar.inc(1);
558
559        if let Err(err) = result {
560            state.n_errors.fetch_add(1, Ordering::SeqCst);
561            if !config.keep_going {
562                return Err(err);
563            }
564        }
565    }
566}
567
568fn determine_thread_count(single_thread: bool, n_files: usize) -> usize {
569    match (single_thread, std::thread::available_parallelism()) {
570        (true, _) | (false, Err(_)) => 1,
571        (false, Ok(n)) => n.get().min(n_files),
572    }
573}
574
575/// Run all test files in parallel or single-threaded mode
576///
577/// # Arguments
578/// * `test_files` - List of test files to execute
579/// * `single_thread` - Force single-threaded execution
580/// * `trace` - Enable EVM execution tracing
581/// * `print_outcome` - Print test outcomes in JSON format
582/// * `keep_going` - Continue running tests even if some fail
583pub fn run(
584    test_files: Vec<PathBuf>,
585    single_thread: bool,
586    trace: bool,
587    print_outcome: bool,
588    keep_going: bool,
589) -> Result<(), TestError> {
590    let config = TestRunnerConfig::new(single_thread, trace, print_outcome, keep_going);
591    let n_files = test_files.len();
592    let state = TestRunnerState::new(test_files);
593    let num_threads = determine_thread_count(config.single_thread, n_files);
594
595    // Spawn worker threads
596    let mut handles = Vec::with_capacity(num_threads);
597    for i in 0..num_threads {
598        let state = state.clone();
599
600        let thread = std::thread::Builder::new()
601            .name(format!("runner-{i}"))
602            .spawn(move || run_test_worker(state, config))
603            .unwrap();
604
605        handles.push(thread);
606    }
607
608    // Collect results from all threads
609    let mut thread_errors = Vec::new();
610    for (i, handle) in handles.into_iter().enumerate() {
611        match handle.join() {
612            Ok(Ok(())) => {}
613            Ok(Err(e)) => thread_errors.push(e),
614            Err(_) => thread_errors.push(TestError {
615                name: format!("thread {i} panicked"),
616                path: String::new(),
617                kind: TestErrorKind::Panic,
618            }),
619        }
620    }
621
622    state.console_bar.finish();
623
624    // Print summary
625    println!(
626        "Finished execution. Total CPU time: {:.6}s",
627        state.elapsed.lock().unwrap().as_secs_f64()
628    );
629
630    let n_errors = state.n_errors.load(Ordering::SeqCst);
631    let n_thread_errors = thread_errors.len();
632
633    if n_errors == 0 && n_thread_errors == 0 {
634        println!("All tests passed!");
635        Ok(())
636    } else {
637        println!("Encountered {n_errors} errors out of {n_files} total tests");
638
639        if n_thread_errors == 0 {
640            std::process::exit(1);
641        }
642
643        if n_thread_errors > 1 {
644            println!("{n_thread_errors} threads returned an error, out of {num_threads} total:");
645            for error in &thread_errors {
646                println!("{error}");
647            }
648        }
649        Err(thread_errors.swap_remove(0))
650    }
651}