Skip to content

climate_ref_core.regression.manifest #

Manifest model and digest utilities for test case regression bundles.

The manifest (manifest.json) lives alongside a test case's regression data. It records: - schema: the manifest schema version (SCHEMA_VERSION). - test_case_version: a monotonic, author-bumped integer coupling the committed bundle to its native outputs. - committed: sha256 digests of the committed regression JSON artefacts, computed over the exact placeholder-substituted bytes as they sit on disk, so that a CI recompute is deterministic. - native: digests of the curated native output files, authored ONLY by mint.

Digests use sha256 throughout for hashing files.

COMMITTED_BUNDLE_FILES = ('series.json', 'diagnostic.json', 'output.json') module-attribute #

The committed CMEC artefacts tracked in git.

Their digests are tracked in :attr:Manifest.committed.

SCHEMA_VERSION = 1 module-attribute #

Current manifest schema version.

Manifest #

The on-disk manifest for a test case regression bundle.

Serialised as manifest.json with stable key ordering and a trailing newline, so repeated dumps are byte-identical.

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
@frozen
class Manifest:
    """
    The on-disk manifest for a test case regression bundle.

    Serialised as ``manifest.json`` with stable key ordering and a trailing newline,
    so repeated dumps are byte-identical.
    """

    schema: int
    """Manifest schema version; equals :data:`SCHEMA_VERSION` for current manifests."""

    test_case_version: int
    """Monotonic, author-bumped version coupling the bundle to its native outputs."""

    committed: dict[str, str]
    """Digests of committed regression JSON artefacts: ``{relpath: sha256}``."""

    native: dict[str, NativeEntry]
    """Digests of curated native output files: ``{relpath: NativeEntry}``."""

    catalog_hash: str | None = None
    """Hash of the test case input ``catalog.yaml`` (its ``_metadata.hash``) that produced this baseline.
    This couples the baseline to its inputs.
    The CI gate fails a case whose live catalog hash no longer matches this value.
    """

    @classmethod
    def load(cls, path: Path) -> Manifest:
        """
        Load a manifest from ``manifest.json``.

        Parameters
        ----------
        path
            Path to the manifest file.

        Returns
        -------
        :
            The parsed manifest.

        Raises
        ------
        ValueError
            If the manifest is missing required keys or has malformed native entries
            (e.g. hand-edited or written by an incompatible version).
        """
        return cls.loads(path.read_text(encoding="utf-8"), source=str(path))

    @classmethod
    def loads(cls, text: str, *, source: str = "<string>") -> Manifest:
        """
        Parse a manifest from its JSON text.

        Used when the manifest does not live on disk at parse time,
        e.g. when reading the base-branch copy via ``git show`` for the CI coupling gate.

        Parameters
        ----------
        text
            The manifest JSON.
        source
            A label for the text's origin, used in error messages.

        Returns
        -------
        :
            The parsed manifest.

        Raises
        ------
        ValueError
            If the manifest is missing required keys or has malformed native entries
            (e.g. hand-edited or written by an incompatible version).
        """
        data = json.loads(text)
        missing = [key for key in ("schema", "test_case_version", "committed", "native") if key not in data]
        if missing:
            raise ValueError(
                f"Invalid manifest {source}: missing required keys {missing}. "
                "The manifest may be corrupted or written by an incompatible version; "
                "regenerate it with `ref test-cases run --force-regen`."
            )
        schema = data["schema"]
        if isinstance(schema, bool) or not isinstance(schema, int) or schema != SCHEMA_VERSION:
            raise ValueError(
                f"Invalid manifest {source}: unsupported schema {schema!r}, "
                f"expected {SCHEMA_VERSION}. The manifest was written by an incompatible "
                "version; regenerate it with `ref test-cases run --force-regen`."
            )
        try:
            native = {
                relpath: NativeEntry(sha256=entry["sha256"], size=entry["size"])
                for relpath, entry in data["native"].items()
            }
        except (KeyError, TypeError, AttributeError) as exc:
            raise ValueError(
                f"Invalid manifest {source}: malformed 'native' entry ({exc!r}). "
                "Each entry must be a mapping with 'sha256' and 'size' keys."
            ) from exc
        # Reject hand-edited or hostile manifests that could escape the
        # destination directory or carry a malformed digest when materialised.
        for relpath, entry in native.items():
            try:
                safe_path(relpath, label="native path")
            except ValueError as exc:
                raise ValueError(f"Invalid manifest {source}: {exc}") from exc
            _validate_digest(entry.sha256)
            if isinstance(entry.size, bool) or not isinstance(entry.size, int) or entry.size < 0:
                raise ValueError(
                    f"Invalid manifest {source}: native entry {relpath!r} has invalid size "
                    f"{entry.size!r}; expected a non-negative integer."
                )
        return cls(
            schema=data["schema"],
            test_case_version=data["test_case_version"],
            committed=dict(data["committed"]),
            native=native,
            # TODO: remove optonality when all manifests have this field.
            catalog_hash=data.get("catalog_hash"),
        )

    def dump(self, path: Path) -> None:
        """
        Write the manifest to ``manifest.json``.

        Keys are stably ordered (``sort_keys=True``) and a trailing newline is added,
        so ``dump`` followed by ``load`` round-trips byte-identically.

        Parameters
        ----------
        path
            Path to write the manifest to.
        """
        payload = {
            "schema": self.schema,
            "test_case_version": self.test_case_version,
            "catalog_hash": self.catalog_hash,
            "committed": self.committed,
            "native": {relpath: asdict(entry) for relpath, entry in self.native.items()},
        }
        text = json.dumps(payload, indent=2, sort_keys=True) + "\n"
        path.write_text(text, encoding="utf-8")

    @classmethod
    def seed_v1(cls, committed_digests: dict[str, str], catalog_hash: str | None = None) -> Manifest:
        """
        Create an initial manifest at ``test_case_version == 1`` with no native outputs.

        Parameters
        ----------
        committed_digests
            Digests of the committed regression JSON artefacts.
        catalog_hash
            Hash of the input ``catalog.yaml`` that produced the baseline, if known.

        Returns
        -------
        :
            A fresh manifest with ``test_case_version=1`` and ``native={}``.
        """
        return cls(
            schema=SCHEMA_VERSION,
            test_case_version=1,
            committed=dict(committed_digests),
            catalog_hash=catalog_hash,
            native={},
        )

catalog_hash = None class-attribute instance-attribute #

Hash of the test case input catalog.yaml (its _metadata.hash) that produced this baseline. This couples the baseline to its inputs. The CI gate fails a case whose live catalog hash no longer matches this value.

committed instance-attribute #

Digests of committed regression JSON artefacts: {relpath: sha256}.

native instance-attribute #

Digests of curated native output files: {relpath: NativeEntry}.

schema instance-attribute #

Manifest schema version; equals :data:SCHEMA_VERSION for current manifests.

test_case_version instance-attribute #

Monotonic, author-bumped version coupling the bundle to its native outputs.

dump(path) #

Write the manifest to manifest.json.

Keys are stably ordered (sort_keys=True) and a trailing newline is added, so dump followed by load round-trips byte-identically.

Parameters:

Name Type Description Default
path Path

Path to write the manifest to.

required
Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
def dump(self, path: Path) -> None:
    """
    Write the manifest to ``manifest.json``.

    Keys are stably ordered (``sort_keys=True``) and a trailing newline is added,
    so ``dump`` followed by ``load`` round-trips byte-identically.

    Parameters
    ----------
    path
        Path to write the manifest to.
    """
    payload = {
        "schema": self.schema,
        "test_case_version": self.test_case_version,
        "catalog_hash": self.catalog_hash,
        "committed": self.committed,
        "native": {relpath: asdict(entry) for relpath, entry in self.native.items()},
    }
    text = json.dumps(payload, indent=2, sort_keys=True) + "\n"
    path.write_text(text, encoding="utf-8")

load(path) classmethod #

Load a manifest from manifest.json.

Parameters:

Name Type Description Default
path Path

Path to the manifest file.

required

Returns:

Type Description
Manifest

The parsed manifest.

Raises:

Type Description
ValueError

If the manifest is missing required keys or has malformed native entries (e.g. hand-edited or written by an incompatible version).

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
@classmethod
def load(cls, path: Path) -> Manifest:
    """
    Load a manifest from ``manifest.json``.

    Parameters
    ----------
    path
        Path to the manifest file.

    Returns
    -------
    :
        The parsed manifest.

    Raises
    ------
    ValueError
        If the manifest is missing required keys or has malformed native entries
        (e.g. hand-edited or written by an incompatible version).
    """
    return cls.loads(path.read_text(encoding="utf-8"), source=str(path))

loads(text, *, source='<string>') classmethod #

Parse a manifest from its JSON text.

Used when the manifest does not live on disk at parse time, e.g. when reading the base-branch copy via git show for the CI coupling gate.

Parameters:

Name Type Description Default
text str

The manifest JSON.

required
source str

A label for the text's origin, used in error messages.

'<string>'

Returns:

Type Description
Manifest

The parsed manifest.

Raises:

Type Description
ValueError

If the manifest is missing required keys or has malformed native entries (e.g. hand-edited or written by an incompatible version).

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
@classmethod
def loads(cls, text: str, *, source: str = "<string>") -> Manifest:
    """
    Parse a manifest from its JSON text.

    Used when the manifest does not live on disk at parse time,
    e.g. when reading the base-branch copy via ``git show`` for the CI coupling gate.

    Parameters
    ----------
    text
        The manifest JSON.
    source
        A label for the text's origin, used in error messages.

    Returns
    -------
    :
        The parsed manifest.

    Raises
    ------
    ValueError
        If the manifest is missing required keys or has malformed native entries
        (e.g. hand-edited or written by an incompatible version).
    """
    data = json.loads(text)
    missing = [key for key in ("schema", "test_case_version", "committed", "native") if key not in data]
    if missing:
        raise ValueError(
            f"Invalid manifest {source}: missing required keys {missing}. "
            "The manifest may be corrupted or written by an incompatible version; "
            "regenerate it with `ref test-cases run --force-regen`."
        )
    schema = data["schema"]
    if isinstance(schema, bool) or not isinstance(schema, int) or schema != SCHEMA_VERSION:
        raise ValueError(
            f"Invalid manifest {source}: unsupported schema {schema!r}, "
            f"expected {SCHEMA_VERSION}. The manifest was written by an incompatible "
            "version; regenerate it with `ref test-cases run --force-regen`."
        )
    try:
        native = {
            relpath: NativeEntry(sha256=entry["sha256"], size=entry["size"])
            for relpath, entry in data["native"].items()
        }
    except (KeyError, TypeError, AttributeError) as exc:
        raise ValueError(
            f"Invalid manifest {source}: malformed 'native' entry ({exc!r}). "
            "Each entry must be a mapping with 'sha256' and 'size' keys."
        ) from exc
    # Reject hand-edited or hostile manifests that could escape the
    # destination directory or carry a malformed digest when materialised.
    for relpath, entry in native.items():
        try:
            safe_path(relpath, label="native path")
        except ValueError as exc:
            raise ValueError(f"Invalid manifest {source}: {exc}") from exc
        _validate_digest(entry.sha256)
        if isinstance(entry.size, bool) or not isinstance(entry.size, int) or entry.size < 0:
            raise ValueError(
                f"Invalid manifest {source}: native entry {relpath!r} has invalid size "
                f"{entry.size!r}; expected a non-negative integer."
            )
    return cls(
        schema=data["schema"],
        test_case_version=data["test_case_version"],
        committed=dict(data["committed"]),
        native=native,
        # TODO: remove optonality when all manifests have this field.
        catalog_hash=data.get("catalog_hash"),
    )

seed_v1(committed_digests, catalog_hash=None) classmethod #

Create an initial manifest at test_case_version == 1 with no native outputs.

Parameters:

Name Type Description Default
committed_digests dict[str, str]

Digests of the committed regression JSON artefacts.

required
catalog_hash str | None

Hash of the input catalog.yaml that produced the baseline, if known.

None

Returns:

Type Description
Manifest

A fresh manifest with test_case_version=1 and native={}.

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
@classmethod
def seed_v1(cls, committed_digests: dict[str, str], catalog_hash: str | None = None) -> Manifest:
    """
    Create an initial manifest at ``test_case_version == 1`` with no native outputs.

    Parameters
    ----------
    committed_digests
        Digests of the committed regression JSON artefacts.
    catalog_hash
        Hash of the input ``catalog.yaml`` that produced the baseline, if known.

    Returns
    -------
    :
        A fresh manifest with ``test_case_version=1`` and ``native={}``.
    """
    return cls(
        schema=SCHEMA_VERSION,
        test_case_version=1,
        committed=dict(committed_digests),
        catalog_hash=catalog_hash,
        native={},
    )

NativeEntry #

A single curated native output file recorded in the manifest.

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
@frozen
class NativeEntry:
    """A single curated native output file recorded in the manifest."""

    sha256: str
    """Hex-encoded sha256 digest of the curated file."""

    size: int
    """Size of the curated file in bytes."""

sha256 instance-attribute #

Hex-encoded sha256 digest of the curated file.

size instance-attribute #

Size of the curated file in bytes.

compute_committed_digests(regression_dir) #

Compute sha256 digests of the committed regression JSON artefacts.

The digests are taken over the bytes exactly as they sit on disk (placeholder text included), so a CI recompute is deterministic. Only files that exist are included.

Parameters:

Name Type Description Default
regression_dir Path

The test case regression/ directory.

required

Returns:

Type Description
dict[str, str]

Mapping of {relpath: sha256} for each present committed artefact.

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
def compute_committed_digests(regression_dir: Path) -> dict[str, str]:
    """
    Compute sha256 digests of the committed regression JSON artefacts.

    The digests are taken over the bytes exactly as they sit on disk (placeholder text included),
    so a CI recompute is deterministic. Only files that exist are included.

    Parameters
    ----------
    regression_dir
        The test case ``regression/`` directory.

    Returns
    -------
    :
        Mapping of ``{relpath: sha256}`` for each present committed artefact.
    """
    digests: dict[str, str] = {}
    for relpath in COMMITTED_BUNDLE_FILES:
        candidate = regression_dir / relpath
        if candidate.exists():
            digests[relpath] = sha256_file(candidate)
    return digests

sha256_bytes(data) #

Compute the sha256 digest of an in-memory byte string.

Parameters:

Name Type Description Default
data bytes

The bytes to hash.

required

Returns:

Type Description
str

The hex-encoded sha256 digest.

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
def sha256_bytes(data: bytes) -> str:
    """
    Compute the sha256 digest of an in-memory byte string.

    Parameters
    ----------
    data
        The bytes to hash.

    Returns
    -------
    :
        The hex-encoded sha256 digest.
    """
    return hashlib.sha256(data).hexdigest()

sha256_file(path) #

Compute the sha256 digest of a file.

Reuses :func:pooch.hashes.file_hash so the digest agrees with pooch elsewhere.

Parameters:

Name Type Description Default
path Path

Path to the file to hash.

required

Returns:

Type Description
str

The hex-encoded sha256 digest.

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
def sha256_file(path: Path) -> str:
    """
    Compute the sha256 digest of a file.

    Reuses :func:`pooch.hashes.file_hash` so the digest agrees with pooch elsewhere.

    Parameters
    ----------
    path
        Path to the file to hash.

    Returns
    -------
    :
        The hex-encoded sha256 digest.
    """
    return pooch.hashes.file_hash(str(path), alg="sha256")

verify_committed_integrity(manifest, regression_dir) #

Check that the committed regression artefacts match the manifest digests.

Used by the CI integrity check. An empty return value means the bundle is intact.

Parameters:

Name Type Description Default
manifest Manifest

The manifest holding the expected committed digests.

required
regression_dir Path

The test case regression/ directory to verify against.

required

Returns:

Type Description
list[str]

A list of human-readable mismatch descriptions; empty when everything matches.

Source code in packages/climate-ref-core/src/climate_ref_core/regression/manifest.py
def verify_committed_integrity(manifest: Manifest, regression_dir: Path) -> list[str]:
    """
    Check that the committed regression artefacts match the manifest digests.

    Used by the CI integrity check. An empty return value means the bundle is intact.

    Parameters
    ----------
    manifest
        The manifest holding the expected committed digests.
    regression_dir
        The test case ``regression/`` directory to verify against.

    Returns
    -------
    :
        A list of human-readable mismatch descriptions; empty when everything matches.
    """
    mismatches: list[str] = []
    for relpath, expected in manifest.committed.items():
        candidate = regression_dir / relpath
        if not candidate.exists():
            mismatches.append(
                f"{relpath}: missing on disk — expected at {candidate} (manifest sha256 {expected})"
            )
            continue
        actual = sha256_file(candidate)
        if actual != expected:
            mismatches.append(
                f"{relpath}: content differs from manifest — {candidate} "
                f"(manifest sha256 {expected}, on-disk sha256 {actual})"
            )
    return mismatches