@app.command(name="run")
def run_test_case( # noqa: PLR0912, PLR0913, PLR0915
ctx: typer.Context,
provider: Annotated[
str,
typer.Option(help="Provider slug (required, e.g., 'example', 'ilamb')"),
],
diagnostic: Annotated[
str | None,
typer.Option(help="Specific diagnostic slug to run (e.g., 'global-mean-timeseries')"),
] = None,
test_case: Annotated[
str | None,
typer.Option(help="Specific test case name to run (e.g., 'default')"),
] = None,
output_directory: Annotated[
Path | None,
typer.Option(
help=(
"Scratch directory for the diagnostic execution results. "
"The regression workflow also writes the gitignored output/<label> slot."
)
),
] = None,
force_regen: Annotated[
bool,
typer.Option(help="Force regeneration of regression baselines"),
] = False,
fetch: Annotated[
bool,
typer.Option(help="Fetch test data from ESGF before running"),
] = False,
size_threshold: Annotated[
float,
typer.Option(help="Flag files larger than this size in MB (default: 1.0)"),
] = 1.0,
dry_run: Annotated[
bool,
typer.Option(help="Show what would be run without executing"),
] = False,
only_missing: Annotated[
bool,
typer.Option(help="Only run test cases without existing regression data"),
] = False,
if_changed: Annotated[
bool,
typer.Option(help="Only run if catalog has changed since regression data was generated"),
] = False,
clean: Annotated[
bool,
typer.Option(help="Delete existing output directory before running"),
] = False,
label: Annotated[
str,
typer.Option(help="Output slot name under output/ (default: latest)"),
] = "latest",
) -> None:
"""
Run test cases for diagnostics.
Executes diagnostics using pre-defined datasets from the test_data_spec
and optionally compares against regression baselines.
Use --provider to select which provider's diagnostics to run (required).
Use --diagnostic and --test-case to further narrow the scope.
Examples
--------
ref test-cases run --provider ilamb # Run all ILAMB test cases
ref test-cases run --provider example --diagnostic global-mean-timeseries
ref test-cases run --provider ilamb --test-case default --fetch
ref test-cases run --provider pmp --only-missing # Skip test cases with regression data
ref test-cases run --provider pmp --if-changed # Only run if catalog changed
"""
from climate_ref.provider_registry import ProviderRegistry
from climate_ref_core.testing import (
TestCasePaths,
catalog_changed_since_regression,
)
config: Config = ctx.obj.config
db = ctx.obj.database
console: Console = ctx.obj.console
# Build provider registry
registry = ProviderRegistry.build_from_config(config, db)
# Find the provider
_validate_provider_in_registry(registry, provider)
_validate_requested_filters(registry, provider=provider, diagnostic=diagnostic, test_case=test_case)
provider_instance = next(p for p in registry.providers if p.slug == provider)
# Collect test cases to run
test_cases_to_run: list[tuple[Diagnostic, TestCase]] = []
skipped_cases: list[tuple[Diagnostic, TestCase]] = []
for diag in provider_instance.diagnostics():
if diagnostic and diag.slug != diagnostic:
continue
if diag.test_data_spec is None:
continue
for tc in diag.test_data_spec.test_cases:
if test_case and tc.name != test_case:
continue
# Skip if regression exists when using --only-missing
paths = TestCasePaths.from_diagnostic(diag, tc.name)
if only_missing:
if paths and paths.regression.exists():
skipped_cases.append((diag, tc))
continue
# Skip if catalog hasn't changed when using --if-changed
if if_changed:
if paths and not catalog_changed_since_regression(paths):
skipped_cases.append((diag, tc))
continue
test_cases_to_run.append((diag, tc))
if not test_cases_to_run:
if only_missing and skipped_cases:
logger.info(
f"All {len(skipped_cases)} matching test case(s) skipped because "
"regression baselines already exist"
)
elif if_changed and skipped_cases:
logger.info(
f"All {len(skipped_cases)} matching test case(s) skipped because catalogs are unchanged"
)
else:
logger.warning(f"No test cases found for provider '{provider}'")
if diagnostic:
logger.warning(f" with diagnostic filter: {diagnostic}")
if test_case:
logger.warning(f" with test case filter: {test_case}")
raise typer.Exit(code=0)
logger.info(f"Found {len(test_cases_to_run)} test case(s) to run")
if skipped_cases:
if only_missing:
logger.info(f"Skipping {len(skipped_cases)} test case(s) with existing regression data")
elif if_changed:
logger.info(f"Skipping {len(skipped_cases)} test case(s) with unchanged catalogs")
if dry_run: # pragma: no cover
table = Table(title="Test Cases to Run")
table.add_column("Provider", style="cyan")
table.add_column("Diagnostic", style="green")
table.add_column("Test Case", style="yellow")
table.add_column("Description")
table.add_column("Status", justify="center")
for diag, tc in test_cases_to_run:
table.add_row(provider, diag.slug, tc.name, tc.description, "[green]will run[/green]")
for diag, tc in skipped_cases:
table.add_row(provider, diag.slug, tc.name, tc.description, "[dim]skip (regression exists)[/dim]")
console.print(table)
return
# Run each test case
successes = 0
failures = 0
failed_cases: list[str] = []
if output_directory is not None:
logger.info(
f"Using {output_directory} as the execution scratch directory; rebuilt native/bundle files "
f"will also be written to each test case's gitignored output/{label} slot"
)
for diag, tc in test_cases_to_run:
success = _run_single_test_case(
config=config,
console=console,
diag=diag,
tc=tc,
execution_dir=output_directory,
force_regen=force_regen,
fetch=fetch,
size_threshold=size_threshold,
clean=clean,
label=label,
)
if success:
successes += 1
else:
failures += 1
failed_cases.append(f"{provider}/{diag.slug}/{tc.name}")
# Print summary
console.print()
if failures == 0:
console.print(f"[green]All {successes} test case(s) passed[/green]")
else:
console.print(f"[yellow]Results: {successes} passed, {failures} failed[/yellow]")
console.print("[red]Failed test cases:[/red]")
for case in failed_cases:
console.print(f" - {case}")
raise typer.Exit(code=1)