Source code for nomenclature.core
import logging
import pyam
from pydantic import validate_call
from nomenclature.definition import DataStructureDefinition
from nomenclature.processor import Processor, RegionProcessor
from nomenclature.processor.nuts import NutsProcessor
logger = logging.getLogger(__name__)
[docs]
@validate_call(config={"arbitrary_types_allowed": True})
def process(
df: pyam.IamDataFrame,
dsd: DataStructureDefinition,
dimensions: list[str] | str | None = None,
processor: Processor | list[Processor] | None = None,
) -> pyam.IamDataFrame:
"""Function for validation and region aggregation in one step
This function is the recommended way of using the nomenclature package. It performs
the following operations:
* Validation against the codelists and criteria of a :class:`DataStructureDefinition`
* Region processing, which can occur via one or more :class:`Processor` instances. This can be:
* Region aggregation (via :class:`RegionProcessor`), which renames and aggregates based on user-provided mappings.
1. Model native regions not listed in the model mapping will be dropped
2. Model native regions can be renamed
3. Aggregation from model native regions to "common regions"
* NUTS aggregation (via :class:`NutsProcessor`), which aggregates NUTS3 -> NUTS2 -> NUTS1 -> Country -> EU27(+UK)
* Validation of consistency across the variable hierarchy
Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be validated and aggregated.
dsd : :class:`DataStructureDefinition`
Codelists that are used for validation.
dimensions : list of str, str, optional
Dimensions to be used in the validation, defaults to all dimensions defined in
``dsd``.
processor : :class:`Processor` or list of :class:`Processor`, optional
One or more processors to apply. Runs before any config-declared processors.
Returns
-------
:class:`pyam.IamDataFrame`
Processed scenario data
Raises
------
ValueError
If the :class:`pyam.IamDataFrame` fails the validation.
"""
processor = processor or []
processor = processor if isinstance(processor, list) else [processor]
dimensions = (
[dimensions] if isinstance(dimensions, str) else dimensions
) or dsd.dimensions
# Auto-instantiate processors declared in nomenclature.yaml under 'processors'
# Raise error if both explicit and config-based processors exist.
if getattr(dsd.config.processor, "region_processor", False):
if any(isinstance(p, RegionProcessor) for p in processor):
raise ValueError(
"Config declares 'region-processor: true' but an explicit "
"RegionProcessor was provided. Please specify only one source of "
"RegionProcessor (either via config or explicitly)."
)
processor.append(
RegionProcessor.from_directory(dsd.project_folder / "mappings", dsd)
)
if dsd.config.processor.nuts:
if any(isinstance(p, NutsProcessor) for p in processor):
raise ValueError(
"Config declares 'nuts' processor but an explicit NutsProcessor "
"was provided. Please specify only one source of NutsProcessor "
"(either via config or explicitly)."
)
processor.append(NutsProcessor.from_definition(dsd))
if (
any(isinstance(p, (RegionProcessor, NutsProcessor)) for p in processor)
and "region" in dimensions
):
dimensions.remove("region")
# Validate against the codelists
dsd.validate(df, dimensions=dimensions)
# Run the processors
for p in processor:
try:
df = p.apply(df)
except Exception as error:
if p.fail_ok:
logger.warning(
f"Processor {p.__class__.__name__} failed with error: {error}. "
"Continuing with processing as fail_ok=True."
)
else:
raise
# Check consistency across the variable hierarchy
error = dsd.check_aggregate(df)
if not error.empty:
raise ValueError(
f"These variables are not the sum of their components:\n{error}"
)
return df