Source code for dapla_metadata.datasets.model_validation

"""Handle validation for metadata with pydantic validators and custom warnings."""

from __future__ import annotations

import logging
import warnings
from typing import TYPE_CHECKING
from typing import TextIO

from datadoc_model.all_optional import model
from pydantic import model_validator
from typing_extensions import Self

from dapla_metadata.datasets.utility.constants import DATE_VALIDATION_MESSAGE
from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
from dapla_metadata.datasets.utility.constants import OBLIGATORY_METADATA_WARNING
from dapla_metadata.datasets.utility.utils import get_missing_obligatory_dataset_fields
from dapla_metadata.datasets.utility.utils import (
    get_missing_obligatory_variables_fields,
)
from dapla_metadata.datasets.utility.utils import (
    get_missing_obligatory_variables_pseudo_fields,
)
from dapla_metadata.datasets.utility.utils import get_timestamp_now
from dapla_metadata.datasets.utility.utils import incorrect_date_order
from dapla_metadata.datasets.utility.utils import (
    num_obligatory_dataset_fields_completed,
)
from dapla_metadata.datasets.utility.utils import set_variables_inherit_from_dataset

if TYPE_CHECKING:
    from datetime import datetime

logger = logging.getLogger(__name__)


[docs] class ValidateDatadocMetadata(model.DatadocMetadata): """Class that inherits from DatadocMetadata, providing additional validation."""
[docs] @model_validator(mode="after") def check_date_order(self) -> Self: """Validate the order of date fields. Check that dataset and variable date fields `contains_data_from` and `contains_data_until` are in chronological order. Mode: This validator runs after other validation. Returns: The instance of the model after validation. Raises: ValueError: If `contains_data_until` date is earlier than `contains_data_from date`. """ if self.dataset is not None and incorrect_date_order( self.dataset.contains_data_from, self.dataset.contains_data_until, ): raise ValueError(DATE_VALIDATION_MESSAGE) if self.variables is not None: for v in self.variables: if incorrect_date_order(v.contains_data_from, v.contains_data_until): raise ValueError(DATE_VALIDATION_MESSAGE) return self
[docs] @model_validator(mode="after") def check_metadata_created_date(self) -> Self: """Ensure `metadata_created_date` is set for the dataset. Sets the current timestamp if `metadata_created_date` is None. Mode: This validator runs after other validation. Returns: The instance of the model after validation. """ timestamp: datetime = get_timestamp_now() # --check-untyped-defs if self.dataset is not None and self.dataset.metadata_created_date is None: self.dataset.metadata_created_date = timestamp return self
[docs] @model_validator(mode="after") def check_inherit_values(self) -> Self: """Inherit values from dataset to variables if not set. Sets values for 'data source', 'temporality type', 'contains data from', and 'contains data until' if they are None. Mode: This validator runs after other validation. Returns: The instance of the model after validation. """ if self.variables and self.dataset is not None: set_variables_inherit_from_dataset(self.dataset, self.variables) return self
[docs] @model_validator(mode="after") def check_obligatory_dataset_metadata(self) -> Self: """Check obligatory dataset fields and issue a warning if any are missing. Mode: This validator runs after other validation. Returns: The instance of the model after validation. Raises: ObligatoryDatasetWarning: If not all obligatory dataset metadata fields are filled in. """ if ( self.dataset is not None and num_obligatory_dataset_fields_completed( self.dataset, ) != NUM_OBLIGATORY_DATASET_FIELDS ): warnings.warn( f"{OBLIGATORY_METADATA_WARNING} {get_missing_obligatory_dataset_fields(self.dataset)}", ObligatoryDatasetWarning, stacklevel=2, ) logger.warning( "Type warning: %s.%s %s", ObligatoryDatasetWarning, OBLIGATORY_METADATA_WARNING, get_missing_obligatory_dataset_fields(self.dataset), ) return self
[docs] @model_validator(mode="after") def check_obligatory_variables_metadata(self) -> Self: """Check obligatory variable fields and issue a warning if any are missing. Mode: This validator runs after other validation. Returns: The instance of the model after validation. Raises: ObligatoryVariableWarning: If not all obligatory variable metadata fields are filled in. """ if self.variables is not None: missing_fields_dict = {} for d in get_missing_obligatory_variables_fields(self.variables): for var, fields in d.items(): missing_fields_dict[var] = fields.copy() for d in get_missing_obligatory_variables_pseudo_fields(self.variables): for var, fields in d.items(): if var in missing_fields_dict: missing_fields_dict[var].extend(fields) else: missing_fields_dict[var] = fields.copy() missing_fields = [ {var: fields} for var, fields in missing_fields_dict.items() ] if missing_fields: message = f"{OBLIGATORY_METADATA_WARNING} {missing_fields}" warnings.warn(message, ObligatoryVariableWarning, stacklevel=2) logger.warning( "Type warning: %s.%s %s", ObligatoryVariableWarning, OBLIGATORY_METADATA_WARNING, missing_fields, ) return self
[docs] class ValidationWarning(UserWarning): """Custom warning for validation purposes."""
[docs] class ObligatoryDatasetWarning(UserWarning): """Custom warning for checking obligatory metadata for dataset."""
[docs] class ObligatoryVariableWarning(UserWarning): """Custom warning for checking obligatory metadata for variables."""
[docs] def custom_warning_handler( message: Warning | str, category: type[Warning], filename: str, lineno: int, file: TextIO | None = None, # noqa: ARG001 remove causes incompatible types line: str | None = None, # noqa: ARG001 remove causes incompatible types ) -> None: """Handle warnings.""" print( # noqa: T201 f"Warning: {message}, Category: {category.__name__}, Filename: {filename}, Line: {lineno}", )
warnings.showwarning = custom_warning_handler warnings.simplefilter("always")