Skip to content

Specs

SpecCache

aitaem.specs.loader.SpecCache

Eagerly-loaded cache for metric, slice, and segment specs.

Use from_yaml() or from_string() as the primary entry points. The constructor creates an empty cache; specs can be added via add().

Source code in aitaem/specs/loader.py
class SpecCache:
    """Eagerly-loaded cache for metric, slice, and segment specs.

    Use from_yaml() or from_string() as the primary entry points.
    The constructor creates an empty cache; specs can be added via add().
    """

    def __init__(self) -> None:
        """Empty cache. Use from_yaml() or from_string() to load specs."""
        self._metrics: dict[str, MetricSpec] = {}
        self._slices: dict[str, SliceSpec] = {}
        self._segments: dict[str, SegmentSpec] = {}

    @classmethod
    def from_yaml(
        cls,
        metric_paths: str | list[str] | None = None,
        slice_paths: str | list[str] | None = None,
        segment_paths: str | list[str] | None = None,
    ) -> "SpecCache":
        """Load and validate all specs from YAML files or directories.

        Loading is eager — all specs are loaded and validated before returning.

        Raises:
            FileNotFoundError: if a path does not exist
            SpecValidationError: if any spec is invalid
        """
        cache = cls()
        cache._metrics = cls._load_paths_strict(metric_paths, MetricSpec)  # type: ignore[arg-type, assignment]
        cache._slices = cls._load_paths_strict(slice_paths, SliceSpec)  # type: ignore[arg-type, assignment]
        cache._segments = cls._load_paths_strict(segment_paths, SegmentSpec)  # type: ignore[arg-type, assignment]
        cache._validate_slice_cross_references()
        return cache

    @classmethod
    def from_string(
        cls,
        metric_yaml: str | list[str] | None = None,
        slice_yaml: str | list[str] | None = None,
        segment_yaml: str | list[str] | None = None,
    ) -> "SpecCache":
        """Load specs from YAML strings. Validates eagerly.

        Each argument can be a single YAML string or a list of YAML strings.

        Raises:
            SpecValidationError: if any spec is invalid
        """
        cache = cls()
        for yaml_str in cls._normalize_strings(metric_yaml):
            spec = load_spec_from_string(yaml_str, MetricSpec)
            cache._metrics.setdefault(spec.name, spec)  # type: ignore[arg-type]
        for yaml_str in cls._normalize_strings(slice_yaml):
            spec = load_spec_from_string(yaml_str, SliceSpec)
            cache._slices.setdefault(spec.name, spec)  # type: ignore[arg-type]
        for yaml_str in cls._normalize_strings(segment_yaml):
            spec = load_spec_from_string(yaml_str, SegmentSpec)
            cache._segments.setdefault(spec.name, spec)  # type: ignore[arg-type]
        cache._validate_slice_cross_references()
        return cache

    def add(self, spec: MetricSpec | SliceSpec | SegmentSpec) -> None:
        """Add a spec programmatically. First-write-wins for duplicate names."""
        if isinstance(spec, MetricSpec):
            self._metrics.setdefault(spec.name, spec)
        elif isinstance(spec, SliceSpec):
            self._slices.setdefault(spec.name, spec)
        elif isinstance(spec, SegmentSpec):
            self._segments.setdefault(spec.name, spec)

    def get_metric(self, name: str) -> MetricSpec:
        """Return MetricSpec for the given name.

        Raises:
            SpecNotFoundError: if name not found
        """
        if name not in self._metrics:
            raise SpecNotFoundError("metric", name, [])
        return self._metrics[name]

    def get_slice(self, name: str) -> SliceSpec:
        """Return SliceSpec for the given name.

        Raises:
            SpecNotFoundError: if name not found
        """
        if name not in self._slices:
            raise SpecNotFoundError("slice", name, [])
        return self._slices[name]

    def get_segment(self, name: str) -> SegmentSpec:
        """Return SegmentSpec for the given name.

        Raises:
            SpecNotFoundError: if name not found
        """
        if name not in self._segments:
            raise SpecNotFoundError("segment", name, [])
        return self._segments[name]

    def clear(self) -> None:
        """Clear all cached specs."""
        self._metrics = {}
        self._slices = {}
        self._segments = {}

    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------

    @staticmethod
    def _normalize_paths(paths: str | list[str] | None) -> list[Path]:
        if paths is None:
            return []
        if isinstance(paths, (str, Path)):
            return [Path(paths)]
        return [Path(p) for p in paths]

    @staticmethod
    def _normalize_strings(strings: str | list[str] | None) -> list[str]:
        if strings is None:
            return []
        if isinstance(strings, str):
            return [strings]
        return list(strings)

    @classmethod
    def _load_paths_strict(
        cls, paths: str | list[str] | None, spec_type: SpecType
    ) -> dict[str, AnySpec]:
        """Load specs from paths/directories, raising on any error."""
        result: dict[str, AnySpec] = {}
        for path in cls._normalize_paths(paths):
            if not path.exists():
                raise FileNotFoundError(f"Spec path not found: {path}")
            if path.is_dir():
                yaml_files = sorted(list(path.glob("*.yaml")) + list(path.glob("*.yml")))
                for yaml_file in yaml_files:
                    spec = spec_type.from_yaml(yaml_file)
                    if spec.name in result:
                        logger.warning("Duplicate spec name '%s'. Overwriting.", spec.name)
                    result[spec.name] = spec
            else:
                spec = spec_type.from_yaml(path)
                if spec.name in result:
                    logger.warning("Duplicate spec name '%s'. Overwriting.", spec.name)
                result[spec.name] = spec
        return result

    def _validate_slice_cross_references(self) -> None:
        """Validate that composite specs' cross-product references resolve.

        Raises:
            SpecValidationError: if a referenced name is missing or is itself composite.
        """
        for spec_name, spec in self._slices.items():
            if not spec.is_composite:
                continue
            for ref_name in spec.cross_product:
                if ref_name not in self._slices:
                    raise SpecValidationError(
                        "slice",
                        spec_name,
                        [
                            ValidationError(
                                field="cross_product",
                                message=f"Referenced slice '{ref_name}' not found in loaded slices",
                            )
                        ],
                    )
                ref_spec = self._slices[ref_name]
                if ref_spec.is_composite:
                    raise SpecValidationError(
                        "slice",
                        spec_name,
                        [
                            ValidationError(
                                field="cross_product",
                                message=f"Nested composite slices not supported (Phase 1): "
                                f"'{ref_name}' is also composite",
                            )
                        ],
                    )

__init__

__init__() -> None

Empty cache. Use from_yaml() or from_string() to load specs.

Source code in aitaem/specs/loader.py
def __init__(self) -> None:
    """Empty cache. Use from_yaml() or from_string() to load specs."""
    self._metrics: dict[str, MetricSpec] = {}
    self._slices: dict[str, SliceSpec] = {}
    self._segments: dict[str, SegmentSpec] = {}

from_yaml classmethod

from_yaml(metric_paths: str | list[str] | None = None, slice_paths: str | list[str] | None = None, segment_paths: str | list[str] | None = None) -> 'SpecCache'

Load and validate all specs from YAML files or directories.

Loading is eager — all specs are loaded and validated before returning.

Raises:

Type Description
FileNotFoundError

if a path does not exist

SpecValidationError

if any spec is invalid

Source code in aitaem/specs/loader.py
@classmethod
def from_yaml(
    cls,
    metric_paths: str | list[str] | None = None,
    slice_paths: str | list[str] | None = None,
    segment_paths: str | list[str] | None = None,
) -> "SpecCache":
    """Load and validate all specs from YAML files or directories.

    Loading is eager — all specs are loaded and validated before returning.

    Raises:
        FileNotFoundError: if a path does not exist
        SpecValidationError: if any spec is invalid
    """
    cache = cls()
    cache._metrics = cls._load_paths_strict(metric_paths, MetricSpec)  # type: ignore[arg-type, assignment]
    cache._slices = cls._load_paths_strict(slice_paths, SliceSpec)  # type: ignore[arg-type, assignment]
    cache._segments = cls._load_paths_strict(segment_paths, SegmentSpec)  # type: ignore[arg-type, assignment]
    cache._validate_slice_cross_references()
    return cache

from_string classmethod

from_string(metric_yaml: str | list[str] | None = None, slice_yaml: str | list[str] | None = None, segment_yaml: str | list[str] | None = None) -> 'SpecCache'

Load specs from YAML strings. Validates eagerly.

Each argument can be a single YAML string or a list of YAML strings.

Raises:

Type Description
SpecValidationError

if any spec is invalid

Source code in aitaem/specs/loader.py
@classmethod
def from_string(
    cls,
    metric_yaml: str | list[str] | None = None,
    slice_yaml: str | list[str] | None = None,
    segment_yaml: str | list[str] | None = None,
) -> "SpecCache":
    """Load specs from YAML strings. Validates eagerly.

    Each argument can be a single YAML string or a list of YAML strings.

    Raises:
        SpecValidationError: if any spec is invalid
    """
    cache = cls()
    for yaml_str in cls._normalize_strings(metric_yaml):
        spec = load_spec_from_string(yaml_str, MetricSpec)
        cache._metrics.setdefault(spec.name, spec)  # type: ignore[arg-type]
    for yaml_str in cls._normalize_strings(slice_yaml):
        spec = load_spec_from_string(yaml_str, SliceSpec)
        cache._slices.setdefault(spec.name, spec)  # type: ignore[arg-type]
    for yaml_str in cls._normalize_strings(segment_yaml):
        spec = load_spec_from_string(yaml_str, SegmentSpec)
        cache._segments.setdefault(spec.name, spec)  # type: ignore[arg-type]
    cache._validate_slice_cross_references()
    return cache

add

add(spec: MetricSpec | SliceSpec | SegmentSpec) -> None

Add a spec programmatically. First-write-wins for duplicate names.

Source code in aitaem/specs/loader.py
def add(self, spec: MetricSpec | SliceSpec | SegmentSpec) -> None:
    """Add a spec programmatically. First-write-wins for duplicate names."""
    if isinstance(spec, MetricSpec):
        self._metrics.setdefault(spec.name, spec)
    elif isinstance(spec, SliceSpec):
        self._slices.setdefault(spec.name, spec)
    elif isinstance(spec, SegmentSpec):
        self._segments.setdefault(spec.name, spec)

get_metric

get_metric(name: str) -> MetricSpec

Return MetricSpec for the given name.

Raises:

Type Description
SpecNotFoundError

if name not found

Source code in aitaem/specs/loader.py
def get_metric(self, name: str) -> MetricSpec:
    """Return MetricSpec for the given name.

    Raises:
        SpecNotFoundError: if name not found
    """
    if name not in self._metrics:
        raise SpecNotFoundError("metric", name, [])
    return self._metrics[name]

get_slice

get_slice(name: str) -> SliceSpec

Return SliceSpec for the given name.

Raises:

Type Description
SpecNotFoundError

if name not found

Source code in aitaem/specs/loader.py
def get_slice(self, name: str) -> SliceSpec:
    """Return SliceSpec for the given name.

    Raises:
        SpecNotFoundError: if name not found
    """
    if name not in self._slices:
        raise SpecNotFoundError("slice", name, [])
    return self._slices[name]

get_segment

get_segment(name: str) -> SegmentSpec

Return SegmentSpec for the given name.

Raises:

Type Description
SpecNotFoundError

if name not found

Source code in aitaem/specs/loader.py
def get_segment(self, name: str) -> SegmentSpec:
    """Return SegmentSpec for the given name.

    Raises:
        SpecNotFoundError: if name not found
    """
    if name not in self._segments:
        raise SpecNotFoundError("segment", name, [])
    return self._segments[name]

clear

clear() -> None

Clear all cached specs.

Source code in aitaem/specs/loader.py
def clear(self) -> None:
    """Clear all cached specs."""
    self._metrics = {}
    self._slices = {}
    self._segments = {}

MetricSpec

aitaem.specs.metric.MetricSpec dataclass

Source code in aitaem/specs/metric.py
@dataclass(frozen=True)
class MetricSpec:
    name: str
    source: str
    numerator: str
    timestamp_col: str
    description: str = ""
    denominator: str | None = None
    entities: list[str] | None = None

    @classmethod
    def from_yaml(cls, yaml_input: str | Path) -> "MetricSpec":
        """Load and validate a MetricSpec from a YAML file path or YAML string.

        If yaml_input is a valid file path (exists on disk), it is read as a file.
        Otherwise, it is treated as a YAML string.

        Raises:
            SpecValidationError: if validation fails or YAML is malformed
            FileNotFoundError: if path provided but file does not exist
        """
        is_path = isinstance(yaml_input, Path)
        path: Path = yaml_input if isinstance(yaml_input, Path) else Path(str(yaml_input))

        if is_path or path.exists():
            if not path.exists():
                raise FileNotFoundError(f"Spec file not found: {path}")
            try:
                raw = path.read_text(encoding="utf-8")
            except OSError as e:
                raise FileNotFoundError(f"Cannot read file: {path}") from e
        else:
            raw = str(yaml_input)

        if not raw or not raw.strip():
            raise SpecValidationError("metric", None, [])

        try:
            data = yaml.safe_load(raw)
        except yaml.YAMLError as e:
            from aitaem.utils.validation import ValidationError

            raise SpecValidationError(
                "metric", None, [ValidationError(field="yaml", message=f"Invalid YAML syntax: {e}")]
            )

        if not isinstance(data, dict) or "metric" not in data:
            from aitaem.utils.validation import ValidationError

            got_keys = list(data.keys()) if isinstance(data, dict) else []
            raise SpecValidationError(
                "metric",
                None,
                [
                    ValidationError(
                        field="yaml", message=f"Expected top-level key 'metric', got: {got_keys}"
                    )
                ],
            )

        spec_dict = data["metric"]
        if not isinstance(spec_dict, dict):
            from aitaem.utils.validation import ValidationError

            raise SpecValidationError(
                "metric",
                None,
                [ValidationError(field="metric", message="'metric' value must be a mapping")],
            )

        result = validate_metric_spec(spec_dict)
        name = spec_dict.get("name") if isinstance(spec_dict.get("name"), str) else None

        if not result.valid:
            raise SpecValidationError("metric", name, result.errors)

        denominator = spec_dict.get("denominator") or None
        entities_raw = spec_dict.get("entities")
        entities = list(entities_raw) if entities_raw else None

        unknown_fields = set(spec_dict.keys()) - {f.name for f in fields(cls)}
        if unknown_fields:
            logger.debug("MetricSpec '%s': ignoring unknown fields: %s", name, unknown_fields)

        return cls(
            name=spec_dict["name"],
            source=spec_dict["source"],
            numerator=spec_dict["numerator"],
            timestamp_col=spec_dict["timestamp_col"],
            description=spec_dict.get("description", ""),
            denominator=denominator,
            entities=entities,
        )

    def validate(self) -> ValidationResult:
        """Validate spec fields and return a ValidationResult (does not raise)."""
        spec_dict: dict[str, object] = {
            "name": self.name,
            "source": self.source,
            "numerator": self.numerator,
            "timestamp_col": self.timestamp_col,
            "description": self.description,
        }
        if self.denominator is not None:
            spec_dict["denominator"] = self.denominator
        if self.entities is not None:
            spec_dict["entities"] = self.entities
        return validate_metric_spec(spec_dict)

from_yaml classmethod

from_yaml(yaml_input: str | Path) -> 'MetricSpec'

Load and validate a MetricSpec from a YAML file path or YAML string.

If yaml_input is a valid file path (exists on disk), it is read as a file. Otherwise, it is treated as a YAML string.

Raises:

Type Description
SpecValidationError

if validation fails or YAML is malformed

FileNotFoundError

if path provided but file does not exist

Source code in aitaem/specs/metric.py
@classmethod
def from_yaml(cls, yaml_input: str | Path) -> "MetricSpec":
    """Load and validate a MetricSpec from a YAML file path or YAML string.

    If yaml_input is a valid file path (exists on disk), it is read as a file.
    Otherwise, it is treated as a YAML string.

    Raises:
        SpecValidationError: if validation fails or YAML is malformed
        FileNotFoundError: if path provided but file does not exist
    """
    is_path = isinstance(yaml_input, Path)
    path: Path = yaml_input if isinstance(yaml_input, Path) else Path(str(yaml_input))

    if is_path or path.exists():
        if not path.exists():
            raise FileNotFoundError(f"Spec file not found: {path}")
        try:
            raw = path.read_text(encoding="utf-8")
        except OSError as e:
            raise FileNotFoundError(f"Cannot read file: {path}") from e
    else:
        raw = str(yaml_input)

    if not raw or not raw.strip():
        raise SpecValidationError("metric", None, [])

    try:
        data = yaml.safe_load(raw)
    except yaml.YAMLError as e:
        from aitaem.utils.validation import ValidationError

        raise SpecValidationError(
            "metric", None, [ValidationError(field="yaml", message=f"Invalid YAML syntax: {e}")]
        )

    if not isinstance(data, dict) or "metric" not in data:
        from aitaem.utils.validation import ValidationError

        got_keys = list(data.keys()) if isinstance(data, dict) else []
        raise SpecValidationError(
            "metric",
            None,
            [
                ValidationError(
                    field="yaml", message=f"Expected top-level key 'metric', got: {got_keys}"
                )
            ],
        )

    spec_dict = data["metric"]
    if not isinstance(spec_dict, dict):
        from aitaem.utils.validation import ValidationError

        raise SpecValidationError(
            "metric",
            None,
            [ValidationError(field="metric", message="'metric' value must be a mapping")],
        )

    result = validate_metric_spec(spec_dict)
    name = spec_dict.get("name") if isinstance(spec_dict.get("name"), str) else None

    if not result.valid:
        raise SpecValidationError("metric", name, result.errors)

    denominator = spec_dict.get("denominator") or None
    entities_raw = spec_dict.get("entities")
    entities = list(entities_raw) if entities_raw else None

    unknown_fields = set(spec_dict.keys()) - {f.name for f in fields(cls)}
    if unknown_fields:
        logger.debug("MetricSpec '%s': ignoring unknown fields: %s", name, unknown_fields)

    return cls(
        name=spec_dict["name"],
        source=spec_dict["source"],
        numerator=spec_dict["numerator"],
        timestamp_col=spec_dict["timestamp_col"],
        description=spec_dict.get("description", ""),
        denominator=denominator,
        entities=entities,
    )

validate

validate() -> ValidationResult

Validate spec fields and return a ValidationResult (does not raise).

Source code in aitaem/specs/metric.py
def validate(self) -> ValidationResult:
    """Validate spec fields and return a ValidationResult (does not raise)."""
    spec_dict: dict[str, object] = {
        "name": self.name,
        "source": self.source,
        "numerator": self.numerator,
        "timestamp_col": self.timestamp_col,
        "description": self.description,
    }
    if self.denominator is not None:
        spec_dict["denominator"] = self.denominator
    if self.entities is not None:
        spec_dict["entities"] = self.entities
    return validate_metric_spec(spec_dict)

SliceSpec

aitaem.specs.slice.SliceSpec dataclass

Source code in aitaem/specs/slice.py
@dataclass(frozen=True)
class SliceSpec:
    name: str
    values: tuple[SliceValue, ...] = ()  # Leaf spec — direct WHERE-based values
    cross_product: tuple[str, ...] = ()  # Composite spec — names of other SliceSpecs
    column: str = ""  # Wildcard spec — bare column name
    description: str = ""

    @property
    def is_composite(self) -> bool:
        """True if this spec references other SliceSpecs via cross_product."""
        return bool(self.cross_product)

    @property
    def is_wildcard(self) -> bool:
        """True if this spec auto-discovers values from a column at query time."""
        return bool(self.column)

    @classmethod
    def from_yaml(cls, yaml_input: str | Path) -> "SliceSpec":
        """Load and validate a SliceSpec from a YAML file path or YAML string.

        Expects top-level key 'slice:'.

        Raises:
            SpecValidationError: if validation fails or YAML is malformed
            FileNotFoundError: if path provided but file does not exist
        """
        is_path = isinstance(yaml_input, Path)
        path: Path = yaml_input if isinstance(yaml_input, Path) else Path(str(yaml_input))

        if is_path or path.exists():
            if not path.exists():
                raise FileNotFoundError(f"Spec file not found: {path}")
            try:
                raw = path.read_text(encoding="utf-8")
            except OSError as e:
                raise FileNotFoundError(f"Cannot read file: {path}") from e
        else:
            raw = str(yaml_input)

        if not raw or not raw.strip():
            raise SpecValidationError("slice", None, [])

        try:
            data = yaml.safe_load(raw)
        except yaml.YAMLError as e:
            from aitaem.utils.validation import ValidationError

            raise SpecValidationError(
                "slice", None, [ValidationError(field="yaml", message=f"Invalid YAML syntax: {e}")]
            )

        if not isinstance(data, dict) or "slice" not in data:
            from aitaem.utils.validation import ValidationError

            got_keys = list(data.keys()) if isinstance(data, dict) else []
            raise SpecValidationError(
                "slice",
                None,
                [
                    ValidationError(
                        field="yaml", message=f"Expected top-level key 'slice', got: {got_keys}"
                    )
                ],
            )

        spec_dict = data["slice"]
        if not isinstance(spec_dict, dict):
            from aitaem.utils.validation import ValidationError

            raise SpecValidationError(
                "slice",
                None,
                [ValidationError(field="slice", message="'slice' value must be a mapping")],
            )

        result = validate_slice_spec(spec_dict)
        name = spec_dict.get("name") if isinstance(spec_dict.get("name"), str) else None

        if not result.valid:
            raise SpecValidationError("slice", name, result.errors)

        raw_cross_product = spec_dict.get("cross_product")
        raw_where = spec_dict.get("where")
        values: tuple[SliceValue, ...] = ()
        cross_product: tuple[str, ...] = ()
        column: str = ""
        if raw_cross_product is not None:
            # Composite spec
            cross_product = tuple(raw_cross_product)
        elif raw_where is not None:
            # Wildcard spec
            column = str(raw_where)
        else:
            # Leaf spec
            values = tuple(
                SliceValue(name=v["name"], where=v["where"]) for v in spec_dict["values"]
            )

        unknown_fields = set(spec_dict.keys()) - {
            "name",
            "values",
            "cross_product",
            "where",
            "description",
        }
        if unknown_fields:
            logger.debug("SliceSpec '%s': ignoring unknown fields: %s", name, unknown_fields)

        return cls(
            name=spec_dict["name"],
            values=values,
            cross_product=cross_product,
            column=column,
            description=spec_dict.get("description", ""),
        )

    def validate(self) -> ValidationResult:
        """Validate spec fields and return a ValidationResult (does not raise)."""
        spec_dict: dict = {"name": self.name, "description": self.description}
        if self.is_composite:
            spec_dict["cross_product"] = list(self.cross_product)
        elif self.is_wildcard:
            spec_dict["where"] = self.column
        else:
            spec_dict["values"] = [{"name": v.name, "where": v.where} for v in self.values]
        return validate_slice_spec(spec_dict)

is_composite property

is_composite: bool

True if this spec references other SliceSpecs via cross_product.

is_wildcard property

is_wildcard: bool

True if this spec auto-discovers values from a column at query time.

from_yaml classmethod

from_yaml(yaml_input: str | Path) -> 'SliceSpec'

Load and validate a SliceSpec from a YAML file path or YAML string.

Expects top-level key 'slice:'.

Raises:

Type Description
SpecValidationError

if validation fails or YAML is malformed

FileNotFoundError

if path provided but file does not exist

Source code in aitaem/specs/slice.py
@classmethod
def from_yaml(cls, yaml_input: str | Path) -> "SliceSpec":
    """Load and validate a SliceSpec from a YAML file path or YAML string.

    Expects top-level key 'slice:'.

    Raises:
        SpecValidationError: if validation fails or YAML is malformed
        FileNotFoundError: if path provided but file does not exist
    """
    is_path = isinstance(yaml_input, Path)
    path: Path = yaml_input if isinstance(yaml_input, Path) else Path(str(yaml_input))

    if is_path or path.exists():
        if not path.exists():
            raise FileNotFoundError(f"Spec file not found: {path}")
        try:
            raw = path.read_text(encoding="utf-8")
        except OSError as e:
            raise FileNotFoundError(f"Cannot read file: {path}") from e
    else:
        raw = str(yaml_input)

    if not raw or not raw.strip():
        raise SpecValidationError("slice", None, [])

    try:
        data = yaml.safe_load(raw)
    except yaml.YAMLError as e:
        from aitaem.utils.validation import ValidationError

        raise SpecValidationError(
            "slice", None, [ValidationError(field="yaml", message=f"Invalid YAML syntax: {e}")]
        )

    if not isinstance(data, dict) or "slice" not in data:
        from aitaem.utils.validation import ValidationError

        got_keys = list(data.keys()) if isinstance(data, dict) else []
        raise SpecValidationError(
            "slice",
            None,
            [
                ValidationError(
                    field="yaml", message=f"Expected top-level key 'slice', got: {got_keys}"
                )
            ],
        )

    spec_dict = data["slice"]
    if not isinstance(spec_dict, dict):
        from aitaem.utils.validation import ValidationError

        raise SpecValidationError(
            "slice",
            None,
            [ValidationError(field="slice", message="'slice' value must be a mapping")],
        )

    result = validate_slice_spec(spec_dict)
    name = spec_dict.get("name") if isinstance(spec_dict.get("name"), str) else None

    if not result.valid:
        raise SpecValidationError("slice", name, result.errors)

    raw_cross_product = spec_dict.get("cross_product")
    raw_where = spec_dict.get("where")
    values: tuple[SliceValue, ...] = ()
    cross_product: tuple[str, ...] = ()
    column: str = ""
    if raw_cross_product is not None:
        # Composite spec
        cross_product = tuple(raw_cross_product)
    elif raw_where is not None:
        # Wildcard spec
        column = str(raw_where)
    else:
        # Leaf spec
        values = tuple(
            SliceValue(name=v["name"], where=v["where"]) for v in spec_dict["values"]
        )

    unknown_fields = set(spec_dict.keys()) - {
        "name",
        "values",
        "cross_product",
        "where",
        "description",
    }
    if unknown_fields:
        logger.debug("SliceSpec '%s': ignoring unknown fields: %s", name, unknown_fields)

    return cls(
        name=spec_dict["name"],
        values=values,
        cross_product=cross_product,
        column=column,
        description=spec_dict.get("description", ""),
    )

validate

validate() -> ValidationResult

Validate spec fields and return a ValidationResult (does not raise).

Source code in aitaem/specs/slice.py
def validate(self) -> ValidationResult:
    """Validate spec fields and return a ValidationResult (does not raise)."""
    spec_dict: dict = {"name": self.name, "description": self.description}
    if self.is_composite:
        spec_dict["cross_product"] = list(self.cross_product)
    elif self.is_wildcard:
        spec_dict["where"] = self.column
    else:
        spec_dict["values"] = [{"name": v.name, "where": v.where} for v in self.values]
    return validate_slice_spec(spec_dict)

SliceValue

aitaem.specs.slice.SliceValue dataclass

Source code in aitaem/specs/slice.py
@dataclass(frozen=True)
class SliceValue:
    name: str
    where: str

SegmentSpec

aitaem.specs.segment.SegmentSpec dataclass

Source code in aitaem/specs/segment.py
@dataclass(frozen=True)
class SegmentSpec:
    name: str
    source: str
    values: tuple[SegmentValue, ...]
    description: str = ""

    @classmethod
    def from_yaml(cls, yaml_input: str | Path) -> "SegmentSpec":
        """Load and validate a SegmentSpec from a YAML file path or YAML string.

        Expects top-level key 'segment:'.

        Raises:
            SpecValidationError: if validation fails or YAML is malformed
            FileNotFoundError: if path provided but file does not exist
        """
        is_path = isinstance(yaml_input, Path)
        path: Path = yaml_input if isinstance(yaml_input, Path) else Path(str(yaml_input))

        if is_path or path.exists():
            if not path.exists():
                raise FileNotFoundError(f"Spec file not found: {path}")
            try:
                raw = path.read_text(encoding="utf-8")
            except OSError as e:
                raise FileNotFoundError(f"Cannot read file: {path}") from e
        else:
            raw = str(yaml_input)

        if not raw or not raw.strip():
            raise SpecValidationError("segment", None, [])

        try:
            data = yaml.safe_load(raw)
        except yaml.YAMLError as e:
            from aitaem.utils.validation import ValidationError

            raise SpecValidationError(
                "segment",
                None,
                [ValidationError(field="yaml", message=f"Invalid YAML syntax: {e}")],
            )

        if not isinstance(data, dict) or "segment" not in data:
            from aitaem.utils.validation import ValidationError

            got_keys = list(data.keys()) if isinstance(data, dict) else []
            raise SpecValidationError(
                "segment",
                None,
                [
                    ValidationError(
                        field="yaml", message=f"Expected top-level key 'segment', got: {got_keys}"
                    )
                ],
            )

        spec_dict = data["segment"]
        if not isinstance(spec_dict, dict):
            from aitaem.utils.validation import ValidationError

            raise SpecValidationError(
                "segment",
                None,
                [ValidationError(field="segment", message="'segment' value must be a mapping")],
            )

        result = validate_segment_spec(spec_dict)
        name = spec_dict.get("name") if isinstance(spec_dict.get("name"), str) else None

        if not result.valid:
            raise SpecValidationError("segment", name, result.errors)

        values = tuple(SegmentValue(name=v["name"], where=v["where"]) for v in spec_dict["values"])

        unknown_fields = set(spec_dict.keys()) - {"name", "source", "values", "description"}
        if unknown_fields:
            logger.debug("SegmentSpec '%s': ignoring unknown fields: %s", name, unknown_fields)

        return cls(
            name=spec_dict["name"],
            source=spec_dict["source"],
            values=values,
            description=spec_dict.get("description", ""),
        )

    def validate(self) -> ValidationResult:
        """Validate spec fields and return a ValidationResult (does not raise)."""
        spec_dict = {
            "name": self.name,
            "source": self.source,
            "values": [{"name": v.name, "where": v.where} for v in self.values],
            "description": self.description,
        }
        return validate_segment_spec(spec_dict)

from_yaml classmethod

from_yaml(yaml_input: str | Path) -> 'SegmentSpec'

Load and validate a SegmentSpec from a YAML file path or YAML string.

Expects top-level key 'segment:'.

Raises:

Type Description
SpecValidationError

if validation fails or YAML is malformed

FileNotFoundError

if path provided but file does not exist

Source code in aitaem/specs/segment.py
@classmethod
def from_yaml(cls, yaml_input: str | Path) -> "SegmentSpec":
    """Load and validate a SegmentSpec from a YAML file path or YAML string.

    Expects top-level key 'segment:'.

    Raises:
        SpecValidationError: if validation fails or YAML is malformed
        FileNotFoundError: if path provided but file does not exist
    """
    is_path = isinstance(yaml_input, Path)
    path: Path = yaml_input if isinstance(yaml_input, Path) else Path(str(yaml_input))

    if is_path or path.exists():
        if not path.exists():
            raise FileNotFoundError(f"Spec file not found: {path}")
        try:
            raw = path.read_text(encoding="utf-8")
        except OSError as e:
            raise FileNotFoundError(f"Cannot read file: {path}") from e
    else:
        raw = str(yaml_input)

    if not raw or not raw.strip():
        raise SpecValidationError("segment", None, [])

    try:
        data = yaml.safe_load(raw)
    except yaml.YAMLError as e:
        from aitaem.utils.validation import ValidationError

        raise SpecValidationError(
            "segment",
            None,
            [ValidationError(field="yaml", message=f"Invalid YAML syntax: {e}")],
        )

    if not isinstance(data, dict) or "segment" not in data:
        from aitaem.utils.validation import ValidationError

        got_keys = list(data.keys()) if isinstance(data, dict) else []
        raise SpecValidationError(
            "segment",
            None,
            [
                ValidationError(
                    field="yaml", message=f"Expected top-level key 'segment', got: {got_keys}"
                )
            ],
        )

    spec_dict = data["segment"]
    if not isinstance(spec_dict, dict):
        from aitaem.utils.validation import ValidationError

        raise SpecValidationError(
            "segment",
            None,
            [ValidationError(field="segment", message="'segment' value must be a mapping")],
        )

    result = validate_segment_spec(spec_dict)
    name = spec_dict.get("name") if isinstance(spec_dict.get("name"), str) else None

    if not result.valid:
        raise SpecValidationError("segment", name, result.errors)

    values = tuple(SegmentValue(name=v["name"], where=v["where"]) for v in spec_dict["values"])

    unknown_fields = set(spec_dict.keys()) - {"name", "source", "values", "description"}
    if unknown_fields:
        logger.debug("SegmentSpec '%s': ignoring unknown fields: %s", name, unknown_fields)

    return cls(
        name=spec_dict["name"],
        source=spec_dict["source"],
        values=values,
        description=spec_dict.get("description", ""),
    )

validate

validate() -> ValidationResult

Validate spec fields and return a ValidationResult (does not raise).

Source code in aitaem/specs/segment.py
def validate(self) -> ValidationResult:
    """Validate spec fields and return a ValidationResult (does not raise)."""
    spec_dict = {
        "name": self.name,
        "source": self.source,
        "values": [{"name": v.name, "where": v.where} for v in self.values],
        "description": self.description,
    }
    return validate_segment_spec(spec_dict)

SegmentValue

aitaem.specs.segment.SegmentValue dataclass

Source code in aitaem/specs/segment.py
@dataclass(frozen=True)
class SegmentValue:
    name: str
    where: str