Source code for nomenclature.core

import logging

import pyam
from pydantic import validate_call

from nomenclature.definition import DataStructureDefinition
from nomenclature.processor import Processor, RegionProcessor
from nomenclature.processor.nuts import NutsProcessor

logger = logging.getLogger(__name__)


[docs] @validate_call(config={"arbitrary_types_allowed": True}) def process( df: pyam.IamDataFrame, dsd: DataStructureDefinition, dimensions: list[str] | str | None = None, processor: Processor | list[Processor] | None = None, ) -> pyam.IamDataFrame: """Function for validation and region aggregation in one step This function is the recommended way of using the nomenclature package. It performs the following operations: * Validation against the codelists and criteria of a :class:`DataStructureDefinition` * Region processing, which can occur via one or more :class:`Processor` instances. This can be: * Region aggregation (via :class:`RegionProcessor`), which renames and aggregates based on user-provided mappings. 1. Model native regions not listed in the model mapping will be dropped 2. Model native regions can be renamed 3. Aggregation from model native regions to "common regions" * NUTS aggregation (via :class:`NutsProcessor`), which aggregates NUTS3 -> NUTS2 -> NUTS1 -> Country -> EU27(+UK) * Validation of consistency across the variable hierarchy Parameters ---------- df : :class:`pyam.IamDataFrame` Scenario data to be validated and aggregated. dsd : :class:`DataStructureDefinition` Codelists that are used for validation. dimensions : list of str, str, optional Dimensions to be used in the validation, defaults to all dimensions defined in ``dsd``. processor : :class:`Processor` or list of :class:`Processor`, optional One or more processors to apply. Runs before any config-declared processors. Returns ------- :class:`pyam.IamDataFrame` Processed scenario data Raises ------ ValueError If the :class:`pyam.IamDataFrame` fails the validation. """ processor = processor or [] processor = processor if isinstance(processor, list) else [processor] dimensions = ( [dimensions] if isinstance(dimensions, str) else dimensions ) or dsd.dimensions # Auto-instantiate processors declared in nomenclature.yaml under 'processors' # Raise error if both explicit and config-based processors exist. if getattr(dsd.config.processor, "region_processor", False): if any(isinstance(p, RegionProcessor) for p in processor): raise ValueError( "Config declares 'region-processor: true' but an explicit " "RegionProcessor was provided. Please specify only one source of " "RegionProcessor (either via config or explicitly)." ) processor.append( RegionProcessor.from_directory(dsd.project_folder / "mappings", dsd) ) if dsd.config.processor.nuts: if any(isinstance(p, NutsProcessor) for p in processor): raise ValueError( "Config declares 'nuts' processor but an explicit NutsProcessor " "was provided. Please specify only one source of NutsProcessor " "(either via config or explicitly)." ) processor.append(NutsProcessor.from_definition(dsd)) if ( any(isinstance(p, (RegionProcessor, NutsProcessor)) for p in processor) and "region" in dimensions ): dimensions.remove("region") # Validate against the codelists dsd.validate(df, dimensions=dimensions) # Run the processors for p in processor: try: df = p.apply(df) except Exception as error: if p.fail_ok: logger.warning( f"Processor {p.__class__.__name__} failed with error: {error}. " "Continuing with processing as fail_ok=True." ) else: raise # Check consistency across the variable hierarchy error = dsd.check_aggregate(df) if not error.empty: raise ValueError( f"These variables are not the sum of their components:\n{error}" ) return df