Source code for neuroconv.tools.nwb_helpers._configuration_models._base_backend

"""Base Pydantic models for DatasetInfo and DatasetConfiguration."""

from typing import Any, ClassVar, Literal, Type

from hdmf.container import DataIO
from pydantic import BaseModel, ConfigDict, Field
from pynwb import NWBFile
from typing_extensions import Self

from ._base_dataset_io import DatasetIOConfiguration
from ._pydantic_pure_json_schema_generator import PureJSONSchemaGenerator
from .._dataset_configuration import get_default_dataset_io_configurations


[docs]class BackendConfiguration(BaseModel):
    """A model for matching collections of DatasetConfigurations to a specific backend."""

    backend: ClassVar[Literal["hdf5", "zarr"]]
    pretty_backend_name: ClassVar[Literal["HDF5", "Zarr"]]
    data_io_class: ClassVar[Type[DataIO]]

    model_config = ConfigDict(validate_assignment=True)  # Re-validate model on mutation

    dataset_configurations: dict[str, DatasetIOConfiguration] = Field(
        description=(
            "A mapping from object locations (e.g. `acquisition/TestElectricalSeriesAP/data`) "
            "to their DatasetConfiguration specification that contains all information "
            "for writing the datasets to disk using the specific backend."
        )
    )

[docs]    def __str__(self) -> str:
        """Not overriding __repr__ as this is intended to render only when wrapped in print()."""
        string = (
            f"\n{self.pretty_backend_name} dataset configurations" f"\n{'-' * (len(self.pretty_backend_name) + 23)}"
        )

        for dataset_configuration in self.dataset_configurations.values():
            string += f"\n{dataset_configuration}"

        return string

    # Pydantic models have several API calls for retrieving the schema - override all of them to work
[docs]    @classmethod
    def schema(cls, **kwargs) -> dict[str, Any]:
        return cls.model_json_schema(**kwargs)

[docs]    @classmethod
    def schema_json(cls, **kwargs) -> dict[str, Any]:
        return cls.model_json_schema(**kwargs)

[docs]    @classmethod
    def model_json_schema(cls, **kwargs) -> dict[str, Any]:
        assert "mode" not in kwargs, "The 'mode' of this method is fixed to be 'validation' and cannot be changed."
        assert "schema_generator" not in kwargs, "The 'schema_generator' of this method cannot be changed."
        return super().model_json_schema(mode="validation", schema_generator=PureJSONSchemaGenerator, **kwargs)

[docs]    @classmethod
    def from_nwbfile(cls, nwbfile: NWBFile) -> Self:
        default_dataset_configurations = get_default_dataset_io_configurations(nwbfile=nwbfile, backend=cls.backend)
        dataset_configurations = {
            default_dataset_configuration.location_in_file: default_dataset_configuration
            for default_dataset_configuration in default_dataset_configurations
        }

        return cls(dataset_configurations=dataset_configurations)

[docs]    def find_locations_requiring_remapping(self, nwbfile: NWBFile) -> dict[str, DatasetIOConfiguration]:
        """
        Find locations of objects with mismatched IDs in the file.

        This function identifies neurodata objects in the `nwbfile` that have matching locations
        with the current configuration but different object IDs. It returns a dictionary of
        remapped `DatasetIOConfiguration` objects for these mismatched locations.

        Parameters
        ----------
        nwbfile : pynwb.NWBFile
            The NWBFile object to check for mismatched object IDs.

        Returns
        -------
        dict[str, DatasetIOConfiguration]
            A dictionary where:
            * Keys: Locations in the NWB of objects with mismatched IDs.
            * Values: New `DatasetIOConfiguration` objects corresponding to the updated object IDs.

        Notes
        -----
        * This function only checks for objects with the same location but different IDs.
        * It does not identify objects missing from the current configuration.
        * The returned `DatasetIOConfiguration` objects are copies of the original configurations
        with updated `object_id` fields.
        """
        # Use a fresh default configuration to get mapping of object IDs to locations in file
        default_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=self.backend))

        if len(default_configurations) != len(self.dataset_configurations):
            raise ValueError(
                f"The number of default configurations ({len(default_configurations)}) does not match the number of "
                f"specified configurations ({len(self.dataset_configurations)})!"
            )

        objects_requiring_remapping = {}
        for dataset_configuration in default_configurations:
            location_in_file = dataset_configuration.location_in_file
            object_id = dataset_configuration.object_id

            location_cannot_be_remapped = location_in_file not in self.dataset_configurations
            if location_cannot_be_remapped:
                raise KeyError(
                    f"Unable to remap the object IDs for object at location '{location_in_file}'! This "
                    "usually occurs if you are attempting to configure the backend for two files of "
                    "non-equivalent structure."
                )

            former_configuration = self.dataset_configurations[location_in_file]
            former_object_id = former_configuration.object_id

            if former_object_id == object_id:
                continue

            remapped_configuration = former_configuration.model_copy(update={"object_id": object_id})
            objects_requiring_remapping[location_in_file] = remapped_configuration

        return objects_requiring_remapping

[docs]    def build_remapped_backend(
        self,
        locations_to_remap: dict[str, DatasetIOConfiguration],
    ) -> Self:
        """
        Build a remapped backend configuration by updating mismatched object IDs.

        This function takes a dictionary of new `DatasetIOConfiguration` objects
        (as returned by `find_locations_requiring_remapping`) and updates a copy of the current configuration
        with these new configurations.

        Parameters
        ----------
        locations_to_remap : dict
            A dictionary mapping locations in the NWBFile to their corresponding new
            `DatasetIOConfiguration` objects with updated IDs.

        Returns
        -------
        Self
            A new instance of the backend configuration class with updated object IDs for
            the specified locations.
        """
        new_backend_configuration = self.model_copy(deep=True)
        new_backend_configuration.dataset_configurations.update(locations_to_remap)
        return new_backend_configuration