Source code for nomenclature.config
from pathlib import Path
from typing import Annotated, Optional
import yaml
from git import Repo
from pydantic import (
BaseModel,
Field,
ValidationInfo,
field_validator,
model_validator,
ConfigDict,
BeforeValidator,
)
def convert_to_set(v: str | list[str] | set[str]) -> set[str]:
match v:
case set(v):
return v
case list(v):
return set(v)
case str(v):
return {v}
case _:
raise TypeError("`repositories` must be of type str, list or set.")
class CodeListConfig(BaseModel):
dimension: str
repositories: Annotated[set[str] | None, BeforeValidator(convert_to_set)] = Field(
None, alias="repository"
)
model_config = ConfigDict(populate_by_name=True)
@property
def repository_dimension_path(self) -> str:
return f"definitions/{self.dimension}"
class RegionCodeListConfig(CodeListConfig):
country: bool = False
class Repository(BaseModel):
url: str
hash: str | None = None
release: str | None = None
local_path: Path | None = Field(default=None, validate_default=True)
# defined via the `repository` name in the configuration
@model_validator(mode="after")
@classmethod
def check_hash_and_release(cls, v: "Repository") -> "Repository":
if v.hash and v.release:
raise ValueError("Either `hash` or `release` can be provided, not both.")
return v
@field_validator("local_path")
@classmethod
def check_path_empty(cls, v):
if v is not None:
raise ValueError("The `local_path` must not be set as part of the config.")
return v
@property
def revision(self):
return self.hash or self.release or "main"
def fetch_repo(self, to_path):
to_path = to_path if isinstance(to_path, Path) else Path(to_path)
if not to_path.is_dir():
repo = Repo.clone_from(self.url, to_path)
else:
repo = Repo(to_path)
repo.remotes.origin.fetch()
self.local_path = to_path
repo.git.reset("--hard")
repo.git.checkout(self.revision)
repo.git.reset("--hard")
repo.git.clean("-xdf")
if self.revision == "main":
repo.remotes.origin.pull()
[docs]
class DataStructureConfig(BaseModel):
"""A class for configuration of a DataStructureDefinition
Attributes
----------
region : RegionCodeListConfig
Attributes for configuring the RegionCodeList
"""
region: Optional[RegionCodeListConfig] = None
variable: Optional[CodeListConfig] = None
@field_validator("region", "variable", mode="before")
@classmethod
def add_dimension(cls, v, info: ValidationInfo):
return {"dimension": info.field_name, **v}
@property
def repos(self) -> dict[str, str]:
return {
dimension: getattr(self, dimension).repositories
for dimension in ("region", "variable")
if getattr(self, dimension) and getattr(self, dimension).repositories
}
class RegionMappingConfig(BaseModel):
repositories: Annotated[set[str], BeforeValidator(convert_to_set)] = Field(
..., alias="repository"
)
model_config = ConfigDict(populate_by_name=True)
class NomenclatureConfig(BaseModel):
repositories: dict[str, Repository] = {}
definitions: Optional[DataStructureConfig] = None
mappings: Optional[RegionMappingConfig] = None
@model_validator(mode="after")
@classmethod
def check_definitions_repository(
cls, v: "NomenclatureConfig"
) -> "NomenclatureConfig":
definitions_repos = v.definitions.repos if v.definitions else {}
mapping_repos = {"mappings": v.mappings.repositories} if v.mappings else {}
repos = {**definitions_repos, **mapping_repos}
for use, repositories in repos.items():
if repositories - v.repositories.keys():
raise ValueError((f"Unknown repository {repositories} in '{use}'."))
return v
def fetch_repos(self, target_folder: Path):
for repo_name, repo in self.repositories.items():
repo.fetch_repo(target_folder / repo_name)
@classmethod
def from_file(cls, file: Path):
"""Read a DataStructureConfig from a file
Parameters
----------
file : :class:`pathlib.Path` or path-like
Path to config file
"""
with open(file, "r", encoding="utf-8") as stream:
config = yaml.safe_load(stream)
instance = cls(**config)
instance.fetch_repos(file.parent)
return instance