Source code for nomenclature.core
import logging
from typing import Optional, Union, List
import pyam
from pydantic import validate_call
from nomenclature.definition import DataStructureDefinition
from nomenclature.processor import Processor, RegionProcessor
logger = logging.getLogger(__name__)
[docs]
@validate_call(config={"arbitrary_types_allowed": True})
def process(
df: pyam.IamDataFrame,
dsd: DataStructureDefinition,
dimensions: Optional[List[str]] = None,
processor: Optional[Union[Processor, List[Processor]]] = None,
) -> pyam.IamDataFrame:
"""Function for validation and region aggregation in one step
This function is the recommended way of using the nomenclature package. It performs
the following operations:
* Validation against the codelists of a DataStructureDefinition
* Region-processing, which can consist of three parts:
1. Model native regions not listed in the model mapping will be dropped
2. Model native regions can be renamed
3. Aggregation from model native regions to "common regions"
* Validation of consistency across the variable hierarchy
Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be validated and aggregated.
dsd : :class:`DataStructureDefinition`
Codelists that are used for validation.
dimensions : list, optional
Dimensions to be used in the validation, defaults to all dimensions defined in
`dsd`
processor : :class:`RegionProcessor`, optional
Region processor to perform region renaming and aggregation (if given)
Returns
-------
:class:`pyam.IamDataFrame`
Processed scenario data
Raises
------
ValueError
If the :class:`pyam.IamDataFrame` fails the validation.
"""
processor = processor or []
processor = processor if isinstance(processor, list) else [processor]
dimensions = dimensions or dsd.dimensions
if (
any(isinstance(p, RegionProcessor) for p in processor)
and "region" in dimensions
):
dimensions.remove("region")
dsd.validate(df, dimensions=dimensions)
for p in processor:
df = p.apply(df)
# check consistency across the variable hierarchy
error = dsd.check_aggregate(df)
if error is not None:
logger.error(f"These variables are not the sum of their components:\n{error}")
raise ValueError("The validation failed. Please check the log for details.")
return df