Source code for nomenclature.testing

import logging
from pathlib import Path

import yaml
from yaml import parser, scanner

from nomenclature.definition import DataStructureDefinition
from nomenclature.exceptions import (
    NoTracebackExceptionGroup,
    ProcessorErrorGroup,
    YamlErrorGroup,
)
from nomenclature.processor import (
    DataValidator,
    Processor,
    RegionProcessor,
    RequiredDataValidator,
)

logger = logging.getLogger(__name__)

ILLEGAL_CHARS = ["\u202f"]


[docs] def assert_valid_yaml(path: Path): """Assert that all yaml files in `path` can be parsed without errors""" special_characters = "" # Iterate over the yaml files in all sub-folders and try loading each exceptions: list[Exception] = [] for file in (f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}): try: with open(file, "r", encoding="utf-8") as stream: yaml.safe_load(stream) except (scanner.ScannerError, parser.ParserError) as error: exceptions.append(error) with open(file, "r", encoding="utf-8") as all_lines: # Check if any special character is found in the file for index, line in enumerate(all_lines.readlines()): for col, char in enumerate(line): if char in ILLEGAL_CHARS: special_characters += ( f"\n - {file.name}, line {index + 1}, col {col + 1}. " ) if special_characters: exceptions.append( AssertionError(f"Unexpected special character(s): {special_characters}") ) # Test fails if any file cannot be parsed, raise error with list of these files if exceptions: raise YamlErrorGroup("Parsing yaml files failed", exceptions)
def _check_mappings( path: Path, dsd: DataStructureDefinition, mappings: str | None = None, ) -> None: if mappings is None: if (path / "mappings").is_dir(): RegionProcessor.from_directory(path / "mappings", dsd) elif (path / mappings).is_dir(): RegionProcessor.from_directory(path / mappings, dsd) else: raise FileNotFoundError(f"Mappings directory not found: {path / mappings}") def _collect_processor_errors( path: Path, processor: type[RequiredDataValidator] | type[DataValidator], dsd: DataStructureDefinition, ) -> None: errors: list[NoTracebackExceptionGroup] = [] for file in path.iterdir(): try: processor.from_file(file).validate_with_definition(dsd) except Exception as exception: errors.append(exception) if errors: raise ProcessorErrorGroup(f"Error(s) checking '{processor.__name__}'", errors) def _check_processor_directory( path: Path, dsd: DataStructureDefinition, processor: Processor, processor_arg: str, folder: str | None = None, ) -> None: if folder is None: if (path / processor_arg).is_dir(): _collect_processor_errors(path / processor_arg, processor, dsd) elif (path / folder).is_dir(): _collect_processor_errors(path / folder, processor, dsd) else: raise FileNotFoundError( f"Directory for '{processor_arg}' not found at: {path / folder}" )
[docs] def assert_valid_structure( path: Path, definitions: str = "definitions", mappings: str | None = None, required_data: str | None = None, validate_data: str | None = None, dimensions: list[str] | None = None, ) -> None: """Assert that `path` can be initialized as a :class:`DataStructureDefinition` Parameters ---------- path : Path Project directory to be validated definitions : str, optional Name of the definitions folder, defaults to "definitions" mappings : str, optional Name of the mappings folder, defaults to "mappings" (if this folder exists) required_data : str, optional Name of the folder for required data, defaults to "required_data" (if this folder exists) validate_data : str, optional Name of the folder for data validation criteria, defaults to "validate_data" (if this folder exists) dimensions : list[str], optional Dimensions to be checked, defaults to all sub-folders of `definitions` Notes ----- Folder structure of `path`: - A `definitions` folder is required and must be a valid :class:`DataStructureDefinition` - The `definitions` folder must contain sub-folder(s) to validate - If a `mappings` folder exists, it must be a valid :class:`RegionProcessor` """ if not (path / definitions).is_dir(): raise NotADirectoryError( f"Definitions directory not found: {path / definitions}" ) dsd = DataStructureDefinition(path / definitions, dimensions) if "variable" in dsd.dimensions: dsd.variable.data_validator _check_mappings(path, dsd, mappings) _check_processor_directory( path, dsd, RequiredDataValidator, "required_data", required_data ) _check_processor_directory(path, dsd, DataValidator, "validate_data", validate_data)