Skip to content

climate_ref.cli.test_cases.run #

ref test-cases run.

Executes diagnostics for their declared test cases, writes the native into an output slot, rebuilds the committed bundle, and (when asked) promotes it to the tracked regression baseline.

run_test_case(ctx, provider, diagnostic=None, test_case=None, output_directory=None, force_regen=False, fetch=False, size_threshold=1.0, dry_run=False, only_missing=False, if_changed=False, clean=False, label='latest') #

Run test cases for diagnostics.

Executes diagnostics using pre-defined datasets from the test_data_spec and optionally compares against regression baselines.

Use --provider to select which provider's diagnostics to run (required). Use --diagnostic and --test-case to further narrow the scope.

Examples:

ref test-cases run --provider ilamb              # Run all ILAMB test cases
ref test-cases run --provider example --diagnostic global-mean-timeseries
ref test-cases run --provider ilamb --test-case default --fetch
ref test-cases run --provider pmp --only-missing # Skip test cases with regression data
ref test-cases run --provider pmp --if-changed   # Only run if catalog changed
Source code in packages/climate-ref/src/climate_ref/cli/test_cases/run.py
@app.command(name="run")
def run_test_case(  # noqa: PLR0912, PLR0913, PLR0915
    ctx: typer.Context,
    provider: Annotated[
        str,
        typer.Option(help="Provider slug (required, e.g., 'example', 'ilamb')"),
    ],
    diagnostic: Annotated[
        str | None,
        typer.Option(help="Specific diagnostic slug to run (e.g., 'global-mean-timeseries')"),
    ] = None,
    test_case: Annotated[
        str | None,
        typer.Option(help="Specific test case name to run (e.g., 'default')"),
    ] = None,
    output_directory: Annotated[
        Path | None,
        typer.Option(
            help=(
                "Scratch directory for the diagnostic execution results. "
                "The regression workflow also writes the gitignored output/<label> slot."
            )
        ),
    ] = None,
    force_regen: Annotated[
        bool,
        typer.Option(help="Force regeneration of regression baselines"),
    ] = False,
    fetch: Annotated[
        bool,
        typer.Option(help="Fetch test data from ESGF before running"),
    ] = False,
    size_threshold: Annotated[
        float,
        typer.Option(help="Flag files larger than this size in MB (default: 1.0)"),
    ] = 1.0,
    dry_run: Annotated[
        bool,
        typer.Option(help="Show what would be run without executing"),
    ] = False,
    only_missing: Annotated[
        bool,
        typer.Option(help="Only run test cases without existing regression data"),
    ] = False,
    if_changed: Annotated[
        bool,
        typer.Option(help="Only run if catalog has changed since regression data was generated"),
    ] = False,
    clean: Annotated[
        bool,
        typer.Option(help="Delete existing output directory before running"),
    ] = False,
    label: Annotated[
        str,
        typer.Option(help="Output slot name under output/ (default: latest)"),
    ] = "latest",
) -> None:
    """
    Run test cases for diagnostics.

    Executes diagnostics using pre-defined datasets from the test_data_spec
    and optionally compares against regression baselines.

    Use --provider to select which provider's diagnostics to run (required).
    Use --diagnostic and --test-case to further narrow the scope.

    Examples
    --------
        ref test-cases run --provider ilamb              # Run all ILAMB test cases
        ref test-cases run --provider example --diagnostic global-mean-timeseries
        ref test-cases run --provider ilamb --test-case default --fetch
        ref test-cases run --provider pmp --only-missing # Skip test cases with regression data
        ref test-cases run --provider pmp --if-changed   # Only run if catalog changed
    """
    from climate_ref.provider_registry import ProviderRegistry
    from climate_ref_core.testing import (
        TestCasePaths,
        catalog_changed_since_regression,
    )

    config: Config = ctx.obj.config
    db = ctx.obj.database
    console: Console = ctx.obj.console

    # Build provider registry
    registry = ProviderRegistry.build_from_config(config, db)

    # Find the provider
    _validate_provider_in_registry(registry, provider)
    _validate_requested_filters(registry, provider=provider, diagnostic=diagnostic, test_case=test_case)
    provider_instance = next(p for p in registry.providers if p.slug == provider)

    # Collect test cases to run
    test_cases_to_run: list[tuple[Diagnostic, TestCase]] = []
    skipped_cases: list[tuple[Diagnostic, TestCase]] = []

    for diag in provider_instance.diagnostics():
        if diagnostic and diag.slug != diagnostic:
            continue
        if diag.test_data_spec is None:
            continue

        for tc in diag.test_data_spec.test_cases:
            if test_case and tc.name != test_case:
                continue
            # Skip if regression exists when using --only-missing
            paths = TestCasePaths.from_diagnostic(diag, tc.name)
            if only_missing:
                if paths and paths.regression.exists():
                    skipped_cases.append((diag, tc))
                    continue
            # Skip if catalog hasn't changed when using --if-changed
            if if_changed:
                if paths and not catalog_changed_since_regression(paths):
                    skipped_cases.append((diag, tc))
                    continue
            test_cases_to_run.append((diag, tc))

    if not test_cases_to_run:
        if only_missing and skipped_cases:
            logger.info(
                f"All {len(skipped_cases)} matching test case(s) skipped because "
                "regression baselines already exist"
            )
        elif if_changed and skipped_cases:
            logger.info(
                f"All {len(skipped_cases)} matching test case(s) skipped because catalogs are unchanged"
            )
        else:
            logger.warning(f"No test cases found for provider '{provider}'")
            if diagnostic:
                logger.warning(f"  with diagnostic filter: {diagnostic}")
            if test_case:
                logger.warning(f"  with test case filter: {test_case}")
        raise typer.Exit(code=0)

    logger.info(f"Found {len(test_cases_to_run)} test case(s) to run")
    if skipped_cases:
        if only_missing:
            logger.info(f"Skipping {len(skipped_cases)} test case(s) with existing regression data")
        elif if_changed:
            logger.info(f"Skipping {len(skipped_cases)} test case(s) with unchanged catalogs")

    if dry_run:  # pragma: no cover
        table = Table(title="Test Cases to Run")
        table.add_column("Provider", style="cyan")
        table.add_column("Diagnostic", style="green")
        table.add_column("Test Case", style="yellow")
        table.add_column("Description")
        table.add_column("Status", justify="center")

        for diag, tc in test_cases_to_run:
            table.add_row(provider, diag.slug, tc.name, tc.description, "[green]will run[/green]")

        for diag, tc in skipped_cases:
            table.add_row(provider, diag.slug, tc.name, tc.description, "[dim]skip (regression exists)[/dim]")

        console.print(table)
        return

    # Run each test case
    successes = 0
    failures = 0
    failed_cases: list[str] = []

    if output_directory is not None:
        logger.info(
            f"Using {output_directory} as the execution scratch directory; rebuilt native/bundle files "
            f"will also be written to each test case's gitignored output/{label} slot"
        )

    for diag, tc in test_cases_to_run:
        success = _run_single_test_case(
            config=config,
            console=console,
            diag=diag,
            tc=tc,
            execution_dir=output_directory,
            force_regen=force_regen,
            fetch=fetch,
            size_threshold=size_threshold,
            clean=clean,
            label=label,
        )
        if success:
            successes += 1
        else:
            failures += 1
            failed_cases.append(f"{provider}/{diag.slug}/{tc.name}")

    # Print summary
    console.print()
    if failures == 0:
        console.print(f"[green]All {successes} test case(s) passed[/green]")
    else:
        console.print(f"[yellow]Results: {successes} passed, {failures} failed[/yellow]")
        console.print("[red]Failed test cases:[/red]")
        for case in failed_cases:
            console.print(f"  - {case}")
        raise typer.Exit(code=1)