Skip to main content

revme/cmd/statetest/
runner.rs

1use crate::cmd::statetest::merkle_trie::{compute_test_roots, TestValidationResult};
2use indicatif::{ProgressBar, ProgressDrawTarget};
3use revm::{
4    context::{block::BlockEnv, cfg::CfgEnv, tx::TxEnv},
5    context_interface::result::{EVMError, ExecutionResult, HaltReason, InvalidTransaction},
6    database::{self, bal::EvmDatabaseError},
7    database_interface::EmptyDB,
8    inspector::{inspectors::TracerEip3155, InspectCommitEvm},
9    primitives::{hardfork::SpecId, Bytes, B256, U256},
10    statetest_types::{SpecName, Test, TestSuite, TestUnit},
11    Context, ExecuteCommitEvm, InspectEvm, MainBuilder, MainContext,
12};
13use serde_json::json;
14use std::{
15    convert::Infallible,
16    fmt::Debug,
17    io::stderr,
18    path::{Path, PathBuf},
19    sync::{
20        atomic::{AtomicBool, AtomicUsize, Ordering},
21        Arc, Mutex,
22    },
23    time::{Duration, Instant},
24};
25use thiserror::Error;
26
27/// Error that occurs during test execution
28#[derive(Debug, Error)]
29#[error("Path: {path}\nName: {name}\nError: {kind}")]
30pub struct TestError {
31    pub name: String,
32    pub path: String,
33    pub kind: TestErrorKind,
34}
35
36/// Specific kind of error that occurred during test execution
37#[derive(Debug, Error)]
38pub enum TestErrorKind {
39    #[error("logs root mismatch: got {got}, expected {expected}")]
40    LogsRootMismatch { got: B256, expected: B256 },
41    #[error("state root mismatch: got {got}, expected {expected}")]
42    StateRootMismatch { got: B256, expected: B256 },
43    #[error("unknown private key: {0:?}")]
44    UnknownPrivateKey(B256),
45    #[error("unexpected exception: got {got_exception:?}, expected {expected_exception:?}")]
46    UnexpectedException {
47        expected_exception: Option<String>,
48        got_exception: Option<String>,
49    },
50    #[error("unexpected output: got {got_output:?}, expected {expected_output:?}")]
51    UnexpectedOutput {
52        expected_output: Option<Bytes>,
53        got_output: Option<Bytes>,
54    },
55    #[error(transparent)]
56    SerdeDeserialize(#[from] serde_json::Error),
57    #[error("thread panicked")]
58    Panic,
59    #[error("path does not exist")]
60    InvalidPath,
61    #[error("no JSON test files found in path")]
62    NoJsonFiles,
63}
64
65/// Check if a test should be skipped based on its filename
66/// Some tests are known to be problematic or take too long
67fn skip_test(path: &Path) -> bool {
68    let path_str = path.to_str().unwrap_or_default();
69
70    // Skip tets that have storage for newly created account.
71    if path_str.contains("paris/eip7610_create_collision") {
72        return true;
73    }
74
75    let name = path.file_name().unwrap().to_str().unwrap_or_default();
76
77    matches!(
78        name,
79        // Test with some storage check.
80        "RevertInCreateInInit_Paris.json"
81        | "RevertInCreateInInit.json"
82        | "dynamicAccountOverwriteEmpty.json"
83        | "dynamicAccountOverwriteEmpty_Paris.json"
84        | "RevertInCreateInInitCreate2Paris.json"
85        | "create2collisionStorage.json"
86        | "RevertInCreateInInitCreate2.json"
87        | "create2collisionStorageParis.json"
88        | "InitCollision.json"
89        | "InitCollisionParis.json"
90        | "test_init_collision_create_opcode.json"
91
92        // Malformed value.
93        | "ValueOverflow.json"
94        | "ValueOverflowParis.json"
95
96        // These tests are passing, but they take a lot of time to execute so we are going to skip them.
97        | "Call50000_sha256.json"
98        | "static_Call50000_sha256.json"
99        | "loopMul.json"
100        | "CALLBlake2f_MaxRounds.json"
101    )
102}
103
104struct TestExecutionContext<'a> {
105    name: &'a str,
106    unit: &'a TestUnit,
107    test: &'a Test,
108    cfg: &'a CfgEnv,
109    block: &'a BlockEnv,
110    tx: &'a TxEnv,
111    cache_state: &'a database::CacheState,
112    elapsed: &'a Arc<Mutex<Duration>>,
113    trace: bool,
114    print_json_outcome: bool,
115}
116
117struct DebugContext<'a> {
118    name: &'a str,
119    path: &'a str,
120    index: usize,
121    test: &'a Test,
122    cfg: &'a CfgEnv,
123    block: &'a BlockEnv,
124    tx: &'a TxEnv,
125    cache_state: &'a database::CacheState,
126    error: &'a TestErrorKind,
127}
128
129fn build_json_output(
130    test: &Test,
131    test_name: &str,
132    exec_result: &Result<
133        ExecutionResult<HaltReason>,
134        EVMError<EvmDatabaseError<Infallible>, InvalidTransaction>,
135    >,
136    validation: &TestValidationResult,
137    spec: SpecId,
138    error: Option<String>,
139) -> serde_json::Value {
140    json!({
141        "stateRoot": validation.state_root,
142        "logsRoot": validation.logs_root,
143        "output": exec_result.as_ref().ok().and_then(|r| r.output().cloned()).unwrap_or_default(),
144        "gasUsed": exec_result.as_ref().ok().map(|r| r.tx_gas_used()).unwrap_or_default(),
145        "pass": error.is_none(),
146        "errorMsg": error.unwrap_or_default(),
147        "evmResult": format_evm_result(exec_result),
148        "postLogsHash": validation.logs_root,
149        "fork": spec,
150        "test": test_name,
151        "d": test.indexes.data,
152        "g": test.indexes.gas,
153        "v": test.indexes.value,
154    })
155}
156
157fn format_evm_result(
158    exec_result: &Result<
159        ExecutionResult<HaltReason>,
160        EVMError<EvmDatabaseError<Infallible>, InvalidTransaction>,
161    >,
162) -> String {
163    match exec_result {
164        Ok(r) => match r {
165            ExecutionResult::Success { reason, .. } => format!("Success: {reason:?}"),
166            ExecutionResult::Revert { .. } => "Revert".to_string(),
167            ExecutionResult::Halt { reason, .. } => format!("Halt: {reason:?}"),
168        },
169        Err(e) => e.to_string(),
170    }
171}
172
173fn validate_exception(
174    test: &Test,
175    exec_result: &Result<
176        ExecutionResult<HaltReason>,
177        EVMError<EvmDatabaseError<Infallible>, InvalidTransaction>,
178    >,
179) -> Result<bool, TestErrorKind> {
180    match (&test.expect_exception, exec_result) {
181        (None, Ok(_)) => Ok(false), // No exception expected, execution succeeded
182        (Some(_), Err(_)) => Ok(true), // Exception expected and occurred
183        _ => Err(TestErrorKind::UnexpectedException {
184            expected_exception: test.expect_exception.clone(),
185            got_exception: exec_result.as_ref().err().map(|e| e.to_string()),
186        }),
187    }
188}
189
190fn validate_output(
191    expected_output: Option<&Bytes>,
192    actual_result: &ExecutionResult<HaltReason>,
193) -> Result<(), TestErrorKind> {
194    if let Some((expected, actual)) = expected_output.zip(actual_result.output()) {
195        if expected != actual {
196            return Err(TestErrorKind::UnexpectedOutput {
197                expected_output: Some(expected.clone()),
198                got_output: actual_result.output().cloned(),
199            });
200        }
201    }
202    Ok(())
203}
204
205fn check_evm_execution(
206    test: &Test,
207    expected_output: Option<&Bytes>,
208    test_name: &str,
209    exec_result: &Result<
210        ExecutionResult<HaltReason>,
211        EVMError<EvmDatabaseError<Infallible>, InvalidTransaction>,
212    >,
213    db: &mut database::State<EmptyDB>,
214    spec: SpecId,
215    print_json_outcome: bool,
216) -> Result<(), TestErrorKind> {
217    let validation = compute_test_roots(exec_result, db);
218
219    let print_json = |error: Option<&TestErrorKind>| {
220        if print_json_outcome {
221            let json = build_json_output(
222                test,
223                test_name,
224                exec_result,
225                &validation,
226                spec,
227                error.map(|e| e.to_string()),
228            );
229            eprintln!("{json}");
230        }
231    };
232
233    // Check if exception handling is correct
234    let exception_expected = validate_exception(test, exec_result).inspect_err(|e| {
235        print_json(Some(e));
236    })?;
237
238    // If exception was expected and occurred, we're done
239    if exception_expected {
240        print_json(None);
241        return Ok(());
242    }
243
244    // Validate output if execution succeeded
245    if let Ok(result) = exec_result {
246        validate_output(expected_output, result).inspect_err(|e| {
247            print_json(Some(e));
248        })?;
249    }
250
251    // Validate logs root
252    if validation.logs_root != test.logs {
253        let error = TestErrorKind::LogsRootMismatch {
254            got: validation.logs_root,
255            expected: test.logs,
256        };
257        print_json(Some(&error));
258        return Err(error);
259    }
260
261    // Validate state root
262    if validation.state_root != test.hash {
263        let error = TestErrorKind::StateRootMismatch {
264            got: validation.state_root,
265            expected: test.hash,
266        };
267        print_json(Some(&error));
268        return Err(error);
269    }
270
271    print_json(None);
272    Ok(())
273}
274
275/// Execute a single test suite file containing multiple tests
276///
277/// # Arguments
278/// * `path` - Path to the JSON test file
279/// * `elapsed` - Shared counter for total execution time
280/// * `trace` - Whether to enable EVM tracing
281/// * `print_json_outcome` - Whether to print JSON formatted results
282pub fn execute_test_suite(
283    path: &Path,
284    elapsed: &Arc<Mutex<Duration>>,
285    trace: bool,
286    print_json_outcome: bool,
287) -> Result<(), TestError> {
288    if skip_test(path) {
289        return Ok(());
290    }
291
292    let s = std::fs::read_to_string(path).unwrap();
293    let path = path.to_string_lossy().into_owned();
294    let suite: TestSuite = serde_json::from_str(&s).map_err(|e| TestError {
295        name: "Unknown".to_string(),
296        path: path.clone(),
297        kind: e.into(),
298    })?;
299
300    for (name, unit) in suite.0 {
301        // Prepare initial state
302        let cache_state = unit.state();
303
304        // Setup base configuration
305        let mut cfg = CfgEnv::default();
306        cfg.chain_id = unit
307            .env
308            .current_chain_id
309            .unwrap_or(U256::ONE)
310            .try_into()
311            .unwrap_or(1);
312
313        // Post and execution
314        for (spec_name, tests) in &unit.post {
315            // Skip Constantinople spec
316            if *spec_name == SpecName::Constantinople {
317                continue;
318            }
319
320            cfg.set_spec_and_mainnet_gas_params(spec_name.to_spec_id());
321
322            // Configure max blobs per spec
323            if cfg.spec().is_enabled_in(SpecId::OSAKA) {
324                cfg.set_max_blobs_per_tx(6);
325            } else if cfg.spec().is_enabled_in(SpecId::PRAGUE) {
326                cfg.set_max_blobs_per_tx(9);
327            } else {
328                cfg.set_max_blobs_per_tx(6);
329            }
330
331            // Setup block environment for this spec
332            let block = unit.block_env(&mut cfg);
333
334            for (index, test) in tests.iter().enumerate() {
335                // Setup transaction environment
336                let tx = match test.tx_env(&unit) {
337                    Ok(tx) => tx,
338                    Err(_) if test.expect_exception.is_some() => continue,
339                    Err(_) => {
340                        return Err(TestError {
341                            name,
342                            path,
343                            kind: TestErrorKind::UnknownPrivateKey(unit.transaction.secret_key),
344                        });
345                    }
346                };
347
348                // Execute the test
349                let result = execute_single_test(TestExecutionContext {
350                    name: &name,
351                    unit: &unit,
352                    test,
353                    cfg: &cfg,
354                    block: &block,
355                    tx: &tx,
356                    cache_state: &cache_state,
357                    elapsed,
358                    trace,
359                    print_json_outcome,
360                });
361
362                if let Err(e) = result {
363                    // Handle error with debug trace if needed
364                    static FAILED: AtomicBool = AtomicBool::new(false);
365                    if print_json_outcome || FAILED.swap(true, Ordering::SeqCst) {
366                        return Err(TestError {
367                            name,
368                            path,
369                            kind: e,
370                        });
371                    }
372
373                    // Re-run with trace for debugging
374                    debug_failed_test(DebugContext {
375                        name: &name,
376                        path: &path,
377                        index,
378                        test,
379                        cfg: &cfg,
380                        block: &block,
381                        tx: &tx,
382                        cache_state: &cache_state,
383                        error: &e,
384                    });
385
386                    return Err(TestError {
387                        path,
388                        name,
389                        kind: e,
390                    });
391                }
392            }
393        }
394    }
395    Ok(())
396}
397
398fn execute_single_test(ctx: TestExecutionContext) -> Result<(), TestErrorKind> {
399    // Prepare state
400    let cache = ctx.cache_state.clone();
401    let mut state = database::State::builder()
402        .with_cached_prestate(cache)
403        .with_bundle_update()
404        .build();
405
406    let evm_context = Context::mainnet()
407        .with_block(ctx.block)
408        .with_tx(ctx.tx)
409        .with_cfg(ctx.cfg.clone())
410        .with_db(&mut state);
411
412    // Execute
413    let timer = Instant::now();
414    let (db, exec_result) = if ctx.trace {
415        let mut evm = evm_context
416            .build_mainnet_with_inspector(TracerEip3155::buffered(stderr()).without_summary());
417        let res = evm.inspect_tx_commit(ctx.tx);
418        let db = evm.ctx.journaled_state.database;
419        (db, res)
420    } else {
421        let mut evm = evm_context.build_mainnet();
422        let res = evm.transact_commit(ctx.tx);
423        let db = evm.ctx.journaled_state.database;
424        (db, res)
425    };
426    *ctx.elapsed.lock().unwrap() += timer.elapsed();
427
428    let exec_result = exec_result;
429    // Check results
430    check_evm_execution(
431        ctx.test,
432        ctx.unit.out.as_ref(),
433        ctx.name,
434        &exec_result,
435        db,
436        *ctx.cfg.spec(),
437        ctx.print_json_outcome,
438    )
439}
440
441fn debug_failed_test(ctx: DebugContext) {
442    println!("\nTraces:");
443
444    // Re-run with tracing
445    let cache = ctx.cache_state.clone();
446    let mut state = database::State::builder()
447        .with_cached_prestate(cache)
448        .with_bundle_update()
449        .build();
450
451    let mut evm = Context::mainnet()
452        .with_db(&mut state)
453        .with_block(ctx.block)
454        .with_tx(ctx.tx)
455        .with_cfg(ctx.cfg.clone())
456        .build_mainnet_with_inspector(TracerEip3155::buffered(stderr()).without_summary());
457
458    let _ = evm.inspect_tx(ctx.tx);
459
460    // Execute the transaction without tracing
461    let exec_result = evm.transact_commit(ctx.tx);
462
463    println!("\nExecution result: {exec_result:#?}");
464    println!("\nExpected exception: {:?}", ctx.test.expect_exception);
465    println!("\nState before:\n{}", ctx.cache_state.pretty_print());
466    println!(
467        "\nState after:\n{}",
468        evm.ctx.journaled_state.database.cache.pretty_print()
469    );
470    println!("\nSpecification: {:?}", ctx.cfg.spec());
471    println!("\nTx: {:#?}", ctx.tx);
472    println!("Block: {:#?}", ctx.block);
473    println!("Cfg: {:#?}", ctx.cfg);
474    println!(
475        "\nTest name: {:?} (index: {}, path: {:?}) failed:\n{}",
476        ctx.name, ctx.index, ctx.path, ctx.error
477    );
478}
479
480#[derive(Clone, Copy)]
481struct TestRunnerConfig {
482    single_thread: bool,
483    trace: bool,
484    print_outcome: bool,
485    keep_going: bool,
486}
487
488impl TestRunnerConfig {
489    fn new(single_thread: bool, trace: bool, print_outcome: bool, keep_going: bool) -> Self {
490        // Trace implies print_outcome
491        let print_outcome = print_outcome || trace;
492        // print_outcome or trace implies single_thread
493        let single_thread = single_thread || print_outcome;
494
495        Self {
496            single_thread,
497            trace,
498            print_outcome,
499            keep_going,
500        }
501    }
502}
503
504#[derive(Clone)]
505struct TestRunnerState {
506    n_errors: Arc<AtomicUsize>,
507    console_bar: Arc<ProgressBar>,
508    queue: Arc<Mutex<(usize, Vec<PathBuf>)>>,
509    elapsed: Arc<Mutex<Duration>>,
510    errors: Arc<Mutex<Vec<TestError>>>,
511}
512
513impl TestRunnerState {
514    fn new(test_files: Vec<PathBuf>, omit_progress: bool) -> Self {
515        let n_files = test_files.len();
516        let draw_target = if omit_progress {
517            ProgressDrawTarget::hidden()
518        } else {
519            ProgressDrawTarget::stdout()
520        };
521        Self {
522            n_errors: Arc::new(AtomicUsize::new(0)),
523            console_bar: Arc::new(ProgressBar::with_draw_target(
524                Some(n_files as u64),
525                draw_target,
526            )),
527            queue: Arc::new(Mutex::new((0usize, test_files))),
528            elapsed: Arc::new(Mutex::new(Duration::ZERO)),
529            errors: Arc::new(Mutex::new(Vec::new())),
530        }
531    }
532
533    fn next_test(&self) -> Option<PathBuf> {
534        let (current_idx, queue) = &mut *self.queue.lock().unwrap();
535        let idx = *current_idx;
536        let test_path = queue.get(idx).cloned()?;
537        *current_idx = idx + 1;
538        Some(test_path)
539    }
540}
541
542fn run_test_worker(state: TestRunnerState, config: TestRunnerConfig) -> Result<(), TestError> {
543    loop {
544        if !config.keep_going && state.n_errors.load(Ordering::SeqCst) > 0 {
545            return Ok(());
546        }
547
548        let Some(test_path) = state.next_test() else {
549            return Ok(());
550        };
551
552        let result = execute_test_suite(
553            &test_path,
554            &state.elapsed,
555            config.trace,
556            config.print_outcome,
557        );
558
559        state.console_bar.inc(1);
560
561        if let Err(err) = result {
562            state.n_errors.fetch_add(1, Ordering::SeqCst);
563            if config.keep_going {
564                state.errors.lock().unwrap().push(err);
565            } else {
566                return Err(err);
567            }
568        }
569    }
570}
571
572fn determine_thread_count(single_thread: bool, n_files: usize) -> usize {
573    match (single_thread, std::thread::available_parallelism()) {
574        (true, _) | (false, Err(_)) => 1,
575        (false, Ok(n)) => n.get().min(n_files),
576    }
577}
578
579/// Run all test files in parallel or single-threaded mode
580///
581/// # Arguments
582/// * `test_files` - List of test files to execute
583/// * `single_thread` - Force single-threaded execution
584/// * `trace` - Enable EVM execution tracing
585/// * `print_outcome` - Print test outcomes in JSON format
586/// * `keep_going` - Continue running tests even if some fail
587pub fn run(
588    test_files: Vec<PathBuf>,
589    single_thread: bool,
590    trace: bool,
591    print_outcome: bool,
592    keep_going: bool,
593    omit_progress: bool,
594) -> Result<(), TestError> {
595    let config = TestRunnerConfig::new(single_thread, trace, print_outcome, keep_going);
596    let n_files = test_files.len();
597    let state = TestRunnerState::new(test_files, omit_progress);
598    let num_threads = determine_thread_count(config.single_thread, n_files);
599
600    // Spawn worker threads
601    let mut handles = Vec::with_capacity(num_threads);
602    for i in 0..num_threads {
603        let state = state.clone();
604
605        let thread = std::thread::Builder::new()
606            .name(format!("runner-{i}"))
607            .spawn(move || run_test_worker(state, config))
608            .unwrap();
609
610        handles.push(thread);
611    }
612
613    // Collect results from all threads
614    let mut thread_errors = Vec::new();
615    for (i, handle) in handles.into_iter().enumerate() {
616        match handle.join() {
617            Ok(Ok(())) => {}
618            Ok(Err(e)) => thread_errors.push(e),
619            Err(_) => thread_errors.push(TestError {
620                name: format!("thread {i} panicked"),
621                path: String::new(),
622                kind: TestErrorKind::Panic,
623            }),
624        }
625    }
626
627    state.console_bar.finish();
628
629    // Print summary
630    println!(
631        "Finished execution. Total CPU time: {:.6}s",
632        state.elapsed.lock().unwrap().as_secs_f64()
633    );
634
635    let n_errors = state.n_errors.load(Ordering::SeqCst);
636    let n_thread_errors = thread_errors.len();
637
638    if n_errors == 0 && n_thread_errors == 0 {
639        println!("All tests passed!");
640        Ok(())
641    } else {
642        println!("Encountered {n_errors} errors out of {n_files} total tests");
643
644        let collected_errors = state.errors.lock().unwrap();
645        if !collected_errors.is_empty() {
646            println!("\nFailed tests:");
647            for error in collected_errors.iter() {
648                println!("  {error}");
649            }
650        }
651        drop(collected_errors);
652
653        if n_thread_errors == 0 {
654            std::process::exit(1);
655        }
656
657        if n_thread_errors > 1 {
658            println!("{n_thread_errors} threads returned an error, out of {num_threads} total:");
659            for error in &thread_errors {
660                println!("{error}");
661            }
662        }
663        Err(thread_errors.swap_remove(0))
664    }
665}