revme/cmd/statetest/
runner.rs

1use crate::cmd::statetest::merkle_trie::{compute_test_roots, TestValidationResult};
2use database::State;
3use indicatif::{ProgressBar, ProgressDrawTarget};
4use inspector::{inspectors::TracerEip3155, InspectCommitEvm};
5use primitives::U256;
6use revm::{
7    context::{block::BlockEnv, cfg::CfgEnv, tx::TxEnv},
8    context_interface::{
9        result::{EVMError, ExecutionResult, HaltReason, InvalidTransaction},
10        Cfg,
11    },
12    database_interface::EmptyDB,
13    primitives::{hardfork::SpecId, Bytes, B256},
14    Context, ExecuteCommitEvm, MainBuilder, MainContext,
15};
16use serde_json::json;
17use statetest_types::{SpecName, Test, TestSuite, TestUnit};
18use std::{
19    convert::Infallible,
20    fmt::Debug,
21    io::stderr,
22    path::{Path, PathBuf},
23    sync::{
24        atomic::{AtomicBool, AtomicUsize, Ordering},
25        Arc, Mutex,
26    },
27    time::{Duration, Instant},
28};
29use thiserror::Error;
30use walkdir::{DirEntry, WalkDir};
31
32/// Error that occurs during test execution
33#[derive(Debug, Error)]
34#[error("Path: {path}\nName: {name}\nError: {kind}")]
35pub struct TestError {
36    pub name: String,
37    pub path: String,
38    pub kind: TestErrorKind,
39}
40
41/// Specific kind of error that occurred during test execution
42#[derive(Debug, Error)]
43pub enum TestErrorKind {
44    #[error("logs root mismatch: got {got}, expected {expected}")]
45    LogsRootMismatch { got: B256, expected: B256 },
46    #[error("state root mismatch: got {got}, expected {expected}")]
47    StateRootMismatch { got: B256, expected: B256 },
48    #[error("unknown private key: {0:?}")]
49    UnknownPrivateKey(B256),
50    #[error("unexpected exception: got {got_exception:?}, expected {expected_exception:?}")]
51    UnexpectedException {
52        expected_exception: Option<String>,
53        got_exception: Option<String>,
54    },
55    #[error("unexpected output: got {got_output:?}, expected {expected_output:?}")]
56    UnexpectedOutput {
57        expected_output: Option<Bytes>,
58        got_output: Option<Bytes>,
59    },
60    #[error(transparent)]
61    SerdeDeserialize(#[from] serde_json::Error),
62    #[error("thread panicked")]
63    Panic,
64    #[error("path does not exist")]
65    InvalidPath,
66    #[error("no JSON test files found in path")]
67    NoJsonFiles,
68}
69
70/// Find all JSON test files in the given path
71/// If path is a file, returns it in a vector
72/// If path is a directory, recursively finds all .json files
73pub fn find_all_json_tests(path: &Path) -> Vec<PathBuf> {
74    if path.is_file() {
75        vec![path.to_path_buf()]
76    } else {
77        WalkDir::new(path)
78            .into_iter()
79            .filter_map(Result::ok)
80            .filter(|e| e.path().extension() == Some("json".as_ref()))
81            .map(DirEntry::into_path)
82            .collect()
83    }
84}
85
86/// Check if a test should be skipped based on its filename
87/// Some tests are known to be problematic or take too long
88fn skip_test(path: &Path) -> bool {
89    let name = path.file_name().unwrap().to_str().unwrap();
90
91    matches!(
92        name,
93        // Test check if gas price overflows, we handle this correctly but does not match tests specific exception.
94        | "CreateTransactionHighNonce.json"
95
96        // Test with some storage check.
97        | "RevertInCreateInInit_Paris.json"
98        | "RevertInCreateInInit.json"
99        | "dynamicAccountOverwriteEmpty.json"
100        | "dynamicAccountOverwriteEmpty_Paris.json"
101        | "RevertInCreateInInitCreate2Paris.json"
102        | "create2collisionStorage.json"
103        | "RevertInCreateInInitCreate2.json"
104        | "create2collisionStorageParis.json"
105        | "InitCollision.json"
106        | "InitCollisionParis.json"
107
108        // Malformed value.
109        | "ValueOverflow.json"
110        | "ValueOverflowParis.json"
111
112        // These tests are passing, but they take a lot of time to execute so we are going to skip them.
113        | "Call50000_sha256.json"
114        | "static_Call50000_sha256.json"
115        | "loopMul.json"
116        | "CALLBlake2f_MaxRounds.json"
117    )
118}
119
120struct TestExecutionContext<'a> {
121    name: &'a str,
122    unit: &'a TestUnit,
123    test: &'a Test,
124    cfg: &'a CfgEnv,
125    block: &'a BlockEnv,
126    tx: &'a TxEnv,
127    cache_state: &'a database::CacheState,
128    elapsed: &'a Arc<Mutex<Duration>>,
129    trace: bool,
130    print_json_outcome: bool,
131}
132
133struct DebugContext<'a> {
134    name: &'a str,
135    path: &'a str,
136    index: usize,
137    test: &'a Test,
138    cfg: &'a CfgEnv,
139    block: &'a BlockEnv,
140    tx: &'a TxEnv,
141    cache_state: &'a database::CacheState,
142    error: &'a TestErrorKind,
143}
144
145fn build_json_output(
146    test: &Test,
147    test_name: &str,
148    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
149    validation: &TestValidationResult,
150    spec: SpecId,
151    error: Option<String>,
152) -> serde_json::Value {
153    json!({
154        "stateRoot": validation.state_root,
155        "logsRoot": validation.logs_root,
156        "output": exec_result.as_ref().ok().and_then(|r| r.output().cloned()).unwrap_or_default(),
157        "gasUsed": exec_result.as_ref().ok().map(|r| r.gas_used()).unwrap_or_default(),
158        "pass": error.is_none(),
159        "errorMsg": error.unwrap_or_default(),
160        "evmResult": format_evm_result(exec_result),
161        "postLogsHash": validation.logs_root,
162        "fork": spec,
163        "test": test_name,
164        "d": test.indexes.data,
165        "g": test.indexes.gas,
166        "v": test.indexes.value,
167    })
168}
169
170fn format_evm_result(
171    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
172) -> String {
173    match exec_result {
174        Ok(r) => match r {
175            ExecutionResult::Success { reason, .. } => format!("Success: {reason:?}"),
176            ExecutionResult::Revert { .. } => "Revert".to_string(),
177            ExecutionResult::Halt { reason, .. } => format!("Halt: {reason:?}"),
178        },
179        Err(e) => e.to_string(),
180    }
181}
182
183fn validate_exception(
184    test: &Test,
185    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
186) -> Result<bool, TestErrorKind> {
187    match (&test.expect_exception, exec_result) {
188        (None, Ok(_)) => Ok(false), // No exception expected, execution succeeded
189        (Some(_), Err(_)) => Ok(true), // Exception expected and occurred
190        _ => Err(TestErrorKind::UnexpectedException {
191            expected_exception: test.expect_exception.clone(),
192            got_exception: exec_result.as_ref().err().map(|e| e.to_string()),
193        }),
194    }
195}
196
197fn validate_output(
198    expected_output: Option<&Bytes>,
199    actual_result: &ExecutionResult<HaltReason>,
200) -> Result<(), TestErrorKind> {
201    if let Some((expected, actual)) = expected_output.zip(actual_result.output()) {
202        if expected != actual {
203            return Err(TestErrorKind::UnexpectedOutput {
204                expected_output: Some(expected.clone()),
205                got_output: actual_result.output().cloned(),
206            });
207        }
208    }
209    Ok(())
210}
211
212fn check_evm_execution(
213    test: &Test,
214    expected_output: Option<&Bytes>,
215    test_name: &str,
216    exec_result: &Result<ExecutionResult<HaltReason>, EVMError<Infallible, InvalidTransaction>>,
217    db: &mut State<EmptyDB>,
218    spec: SpecId,
219    print_json_outcome: bool,
220) -> Result<(), TestErrorKind> {
221    let validation = compute_test_roots(exec_result, db);
222
223    let print_json = |error: Option<&TestErrorKind>| {
224        if print_json_outcome {
225            let json = build_json_output(
226                test,
227                test_name,
228                exec_result,
229                &validation,
230                spec,
231                error.map(|e| e.to_string()),
232            );
233            eprintln!("{json}");
234        }
235    };
236
237    // Check if exception handling is correct
238    let exception_expected = validate_exception(test, exec_result).inspect_err(|e| {
239        print_json(Some(e));
240    })?;
241
242    // If exception was expected and occurred, we're done
243    if exception_expected {
244        print_json(None);
245        return Ok(());
246    }
247
248    // Validate output if execution succeeded
249    if let Ok(result) = exec_result {
250        validate_output(expected_output, result).inspect_err(|e| {
251            print_json(Some(e));
252        })?;
253    }
254
255    // Validate logs root
256    if validation.logs_root != test.logs {
257        let error = TestErrorKind::LogsRootMismatch {
258            got: validation.logs_root,
259            expected: test.logs,
260        };
261        print_json(Some(&error));
262        return Err(error);
263    }
264
265    // Validate state root
266    if validation.state_root != test.hash {
267        let error = TestErrorKind::StateRootMismatch {
268            got: validation.state_root,
269            expected: test.hash,
270        };
271        print_json(Some(&error));
272        return Err(error);
273    }
274
275    print_json(None);
276    Ok(())
277}
278
279/// Execute a single test suite file containing multiple tests
280///
281/// # Arguments
282/// * `path` - Path to the JSON test file
283/// * `elapsed` - Shared counter for total execution time
284/// * `trace` - Whether to enable EVM tracing
285/// * `print_json_outcome` - Whether to print JSON formatted results
286pub fn execute_test_suite(
287    path: &Path,
288    elapsed: &Arc<Mutex<Duration>>,
289    trace: bool,
290    print_json_outcome: bool,
291) -> Result<(), TestError> {
292    if skip_test(path) {
293        return Ok(());
294    }
295
296    let s = std::fs::read_to_string(path).unwrap();
297    let path = path.to_string_lossy().into_owned();
298    let suite: TestSuite = serde_json::from_str(&s).map_err(|e| TestError {
299        name: "Unknown".to_string(),
300        path: path.clone(),
301        kind: e.into(),
302    })?;
303
304    for (name, unit) in suite.0 {
305        // Prepare initial state
306        let cache_state = unit.state();
307
308        // Setup base configuration
309        let mut cfg = CfgEnv::default();
310        cfg.chain_id = unit
311            .env
312            .current_chain_id
313            .unwrap_or(U256::ONE)
314            .try_into()
315            .unwrap_or(1);
316
317        // Post and execution
318        for (spec_name, tests) in &unit.post {
319            // Skip Constantinople spec
320            if *spec_name == SpecName::Constantinople {
321                continue;
322            }
323
324            cfg.spec = spec_name.to_spec_id();
325
326            // Configure max blobs per spec
327            if cfg.spec.is_enabled_in(SpecId::OSAKA) {
328                cfg.set_max_blobs_per_tx(6);
329            } else if cfg.spec.is_enabled_in(SpecId::PRAGUE) {
330                cfg.set_max_blobs_per_tx(9);
331            } else {
332                cfg.set_max_blobs_per_tx(6);
333            }
334
335            // Setup block environment for this spec
336            let block = unit.block_env(&cfg);
337
338            for (index, test) in tests.iter().enumerate() {
339                // Setup transaction environment
340                let tx = match test.tx_env(&unit) {
341                    Ok(tx) => tx,
342                    Err(_) if test.expect_exception.is_some() => continue,
343                    Err(_) => {
344                        return Err(TestError {
345                            name: name.clone(),
346                            path: path.clone(),
347                            kind: TestErrorKind::UnknownPrivateKey(unit.transaction.secret_key),
348                        });
349                    }
350                };
351
352                // Execute the test
353                let result = execute_single_test(TestExecutionContext {
354                    name: &name,
355                    unit: &unit,
356                    test,
357                    cfg: &cfg,
358                    block: &block,
359                    tx: &tx,
360                    cache_state: &cache_state,
361                    elapsed,
362                    trace,
363                    print_json_outcome,
364                });
365
366                if let Err(e) = result {
367                    // Handle error with debug trace if needed
368                    static FAILED: AtomicBool = AtomicBool::new(false);
369                    if print_json_outcome || FAILED.swap(true, Ordering::SeqCst) {
370                        return Err(TestError {
371                            name: name.clone(),
372                            path: path.clone(),
373                            kind: e,
374                        });
375                    }
376
377                    // Re-run with trace for debugging
378                    debug_failed_test(DebugContext {
379                        name: &name,
380                        path: &path,
381                        index,
382                        test,
383                        cfg: &cfg,
384                        block: &block,
385                        tx: &tx,
386                        cache_state: &cache_state,
387                        error: &e,
388                    });
389
390                    return Err(TestError {
391                        path: path.clone(),
392                        name: name.clone(),
393                        kind: e,
394                    });
395                }
396            }
397        }
398    }
399    Ok(())
400}
401
402fn execute_single_test(ctx: TestExecutionContext) -> Result<(), TestErrorKind> {
403    // Prepare state
404    let mut cache = ctx.cache_state.clone();
405    cache.set_state_clear_flag(ctx.cfg.spec.is_enabled_in(SpecId::SPURIOUS_DRAGON));
406    let mut state = database::State::builder()
407        .with_cached_prestate(cache)
408        .with_bundle_update()
409        .build();
410
411    let evm_context = Context::mainnet()
412        .with_block(ctx.block)
413        .with_tx(ctx.tx)
414        .with_cfg(ctx.cfg)
415        .with_db(&mut state);
416
417    // Execute
418    let timer = Instant::now();
419    let (db, exec_result) = if ctx.trace {
420        let mut evm = evm_context
421            .build_mainnet_with_inspector(TracerEip3155::buffered(stderr()).without_summary());
422        let res = evm.inspect_tx_commit(ctx.tx);
423        let db = evm.ctx.journaled_state.database;
424        (db, res)
425    } else {
426        let mut evm = evm_context.build_mainnet();
427        let res = evm.transact_commit(ctx.tx);
428        let db = evm.ctx.journaled_state.database;
429        (db, res)
430    };
431    *ctx.elapsed.lock().unwrap() += timer.elapsed();
432
433    // Check results
434    check_evm_execution(
435        ctx.test,
436        ctx.unit.out.as_ref(),
437        ctx.name,
438        &exec_result,
439        db,
440        ctx.cfg.spec(),
441        ctx.print_json_outcome,
442    )
443}
444
445fn debug_failed_test(ctx: DebugContext) {
446    println!("\nTraces:");
447
448    // Re-run with tracing
449    let mut cache = ctx.cache_state.clone();
450    cache.set_state_clear_flag(ctx.cfg.spec.is_enabled_in(SpecId::SPURIOUS_DRAGON));
451    let mut state = database::State::builder()
452        .with_cached_prestate(cache)
453        .with_bundle_update()
454        .build();
455
456    let mut evm = Context::mainnet()
457        .with_db(&mut state)
458        .with_block(ctx.block)
459        .with_tx(ctx.tx)
460        .with_cfg(ctx.cfg)
461        .build_mainnet_with_inspector(TracerEip3155::buffered(stderr()).without_summary());
462
463    let exec_result = evm.inspect_tx_commit(ctx.tx);
464
465    println!("\nExecution result: {exec_result:#?}");
466    println!("\nExpected exception: {:?}", ctx.test.expect_exception);
467    println!("\nState before: {:#?}", ctx.cache_state);
468    println!(
469        "\nState after: {:#?}",
470        evm.ctx.journaled_state.database.cache
471    );
472    println!("\nSpecification: {:?}", ctx.cfg.spec);
473    println!("\nTx: {:#?}", ctx.tx);
474    println!("Block: {:#?}", ctx.block);
475    println!("Cfg: {:#?}", ctx.cfg);
476    println!(
477        "\nTest name: {:?} (index: {}, path: {:?}) failed:\n{}",
478        ctx.name, ctx.index, ctx.path, ctx.error
479    );
480}
481
482#[derive(Clone, Copy)]
483struct TestRunnerConfig {
484    single_thread: bool,
485    trace: bool,
486    print_outcome: bool,
487    keep_going: bool,
488}
489
490impl TestRunnerConfig {
491    fn new(single_thread: bool, trace: bool, print_outcome: bool, keep_going: bool) -> Self {
492        // Trace implies print_outcome
493        let print_outcome = print_outcome || trace;
494        // print_outcome or trace implies single_thread
495        let single_thread = single_thread || print_outcome;
496
497        Self {
498            single_thread,
499            trace,
500            print_outcome,
501            keep_going,
502        }
503    }
504}
505
506#[derive(Clone)]
507struct TestRunnerState {
508    n_errors: Arc<AtomicUsize>,
509    console_bar: Arc<ProgressBar>,
510    queue: Arc<Mutex<(usize, Vec<PathBuf>)>>,
511    elapsed: Arc<Mutex<Duration>>,
512}
513
514impl TestRunnerState {
515    fn new(test_files: Vec<PathBuf>) -> Self {
516        let n_files = test_files.len();
517        Self {
518            n_errors: Arc::new(AtomicUsize::new(0)),
519            console_bar: Arc::new(ProgressBar::with_draw_target(
520                Some(n_files as u64),
521                ProgressDrawTarget::stdout(),
522            )),
523            queue: Arc::new(Mutex::new((0usize, test_files))),
524            elapsed: Arc::new(Mutex::new(Duration::ZERO)),
525        }
526    }
527
528    fn next_test(&self) -> Option<PathBuf> {
529        let (current_idx, queue) = &mut *self.queue.lock().unwrap();
530        let idx = *current_idx;
531        let test_path = queue.get(idx).cloned()?;
532        *current_idx = idx + 1;
533        Some(test_path)
534    }
535}
536
537fn run_test_worker(state: TestRunnerState, config: TestRunnerConfig) -> Result<(), TestError> {
538    loop {
539        if !config.keep_going && state.n_errors.load(Ordering::SeqCst) > 0 {
540            return Ok(());
541        }
542
543        let Some(test_path) = state.next_test() else {
544            return Ok(());
545        };
546
547        let result = execute_test_suite(
548            &test_path,
549            &state.elapsed,
550            config.trace,
551            config.print_outcome,
552        );
553
554        state.console_bar.inc(1);
555
556        if let Err(err) = result {
557            state.n_errors.fetch_add(1, Ordering::SeqCst);
558            if !config.keep_going {
559                return Err(err);
560            }
561        }
562    }
563}
564
565fn determine_thread_count(single_thread: bool, n_files: usize) -> usize {
566    match (single_thread, std::thread::available_parallelism()) {
567        (true, _) | (false, Err(_)) => 1,
568        (false, Ok(n)) => n.get().min(n_files),
569    }
570}
571
572/// Run all test files in parallel or single-threaded mode
573///
574/// # Arguments
575/// * `test_files` - List of test files to execute
576/// * `single_thread` - Force single-threaded execution
577/// * `trace` - Enable EVM execution tracing
578/// * `print_outcome` - Print test outcomes in JSON format
579/// * `keep_going` - Continue running tests even if some fail
580pub fn run(
581    test_files: Vec<PathBuf>,
582    single_thread: bool,
583    trace: bool,
584    print_outcome: bool,
585    keep_going: bool,
586) -> Result<(), TestError> {
587    let config = TestRunnerConfig::new(single_thread, trace, print_outcome, keep_going);
588    let n_files = test_files.len();
589    let state = TestRunnerState::new(test_files);
590    let num_threads = determine_thread_count(config.single_thread, n_files);
591
592    // Spawn worker threads
593    let mut handles = Vec::with_capacity(num_threads);
594    for i in 0..num_threads {
595        let state = state.clone();
596
597        let thread = std::thread::Builder::new()
598            .name(format!("runner-{i}"))
599            .spawn(move || run_test_worker(state, config))
600            .unwrap();
601
602        handles.push(thread);
603    }
604
605    // Collect results from all threads
606    let mut thread_errors = Vec::new();
607    for (i, handle) in handles.into_iter().enumerate() {
608        match handle.join() {
609            Ok(Ok(())) => {}
610            Ok(Err(e)) => thread_errors.push(e),
611            Err(_) => thread_errors.push(TestError {
612                name: format!("thread {i} panicked"),
613                path: String::new(),
614                kind: TestErrorKind::Panic,
615            }),
616        }
617    }
618
619    state.console_bar.finish();
620
621    // Print summary
622    println!(
623        "Finished execution. Total CPU time: {:.6}s",
624        state.elapsed.lock().unwrap().as_secs_f64()
625    );
626
627    let n_errors = state.n_errors.load(Ordering::SeqCst);
628    let n_thread_errors = thread_errors.len();
629
630    if n_errors == 0 && n_thread_errors == 0 {
631        println!("All tests passed!");
632        Ok(())
633    } else {
634        println!("Encountered {n_errors} errors out of {n_files} total tests");
635
636        if n_thread_errors == 0 {
637            std::process::exit(1);
638        }
639
640        if n_thread_errors > 1 {
641            println!("{n_thread_errors} threads returned an error, out of {num_threads} total:");
642            for error in &thread_errors {
643                println!("{error}");
644            }
645        }
646        Err(thread_errors.swap_remove(0))
647    }
648}