Source code for econuy.utils.operations

import inspect
import json
import os
from pathlib import Path
from typing import Optional, Dict, List, Tuple

import pandas as pd

from econuy.utils import get_project_root
from econuy.base import Dataset, DatasetMetadata


[docs] class DatasetRegistry: def __init__(self): """ Initialize the DatasetRegistry by loading the dataset information from a JSON file. """ with open( get_project_root() / "retrieval" / "datasets.json", "r", encoding="utf-8" ) as f: self.registry = json.load(f) def __getitem__(self, name: str) -> Dict: """ Retrieve a dataset by its name. Parameters ---------- name : str The name of the dataset to retrieve. Returns ------- dict The dataset information. """ return self.registry[name]
[docs] def get_multiple(self, names: List[str]) -> Dict: """ Retrieve multiple datasets by their names. Parameters ---------- names : List[str] A list of dataset names to retrieve. Returns ------- dict A dictionary containing the requested datasets. """ return {k: v for k, v in self.registry.items() if k in names}
[docs] def get_available(self) -> Dict: """ Retrieve all available datasets that are not disabled. Returns ------- dict A dictionary containing all available datasets. """ return { k: v for k, v in self.registry.items() if not v["disabled"] and not v["auxiliary"] }
[docs] def get_custom(self) -> Dict: """ Retrieve all custom datasets. Returns ------- dict A dictionary containing all custom datasets. """ return {k: v for k, v in self.registry.items() if v["custom"]}
[docs] def get_by_area( self, area: str, keep_disabled: bool = False, keep_auxiliary: bool = False ) -> Dict: """ Retrieve datasets by a specific area, with options to include disabled and auxiliary datasets. Parameters ---------- area : str The area to filter datasets by. keep_disabled : bool, optional Whether to include disabled datasets (default is False). keep_auxiliary : bool, optional Whether to include auxiliary datasets (default is False). Returns ------- dict A dictionary containing the datasets that match the specified area and options. """ return { k: v for k, v in self.registry.items() if v["area"] == area and (keep_disabled or not v["disabled"]) and (keep_auxiliary or not v["auxiliary"]) }
[docs] def list_available(self) -> List[str]: """ List the names of all available datasets. Returns ------- List[str] A list of names of all available datasets. """ return list(self.get_available().keys())
[docs] def list_custom(self) -> List[str]: """ List the names of all custom datasets. Returns ------- List[str] A list of names of all custom datasets. """ return list(self.get_custom().keys())
[docs] def list_by_area( self, area: str, keep_disabled: bool = False, keep_auxiliary: bool = False ) -> List[str]: """ List the names of datasets by a specific area, with options to include disabled and auxiliary datasets. Parameters ---------- area : str The area to filter datasets by. keep_disabled : bool, optional Whether to include disabled datasets (default is False). keep_auxiliary : bool, optional Whether to include auxiliary datasets (default is False). Returns ------- List[str] A list of names of datasets that match the specified area and options. """ return list(self.get_by_area(area, keep_disabled, keep_auxiliary).keys())
REGISTRY = DatasetRegistry() def get_name_from_function() -> str: return inspect.currentframe().f_back.f_code.co_name def get_download_sources(name: str) -> Dict: return REGISTRY[name]["sources"]["downloads"] def get_base_metadata(name: str) -> Dict: return REGISTRY[name]["base_metadata"] def get_names_and_ids(name: str, language: str = "es") -> Tuple[List[str], List[Dict]]: ids_names = REGISTRY[name]["indicator_ids"] ids_names = {k: v[language] for k, v in ids_names.items()} language_names = [{"es": x} for x in ids_names.values()] ids = [f"{name}_{i}" for i in ids_names.keys()] return ids, language_names def get_data_dir() -> Path: data_dir = os.getenv("ECONUY_DATA_DIR", "") or Path.home() / ".cache" / "econuy" data_dir = Path(data_dir) os.environ["ECONUY_DATA_DIR"] = data_dir.as_posix() data_dir.mkdir(parents=True, exist_ok=True, mode=0o755) return data_dir def read_dataset(name: str, data_dir: Path) -> Optional[Dataset]: # noqa: F821 dataset_path = (data_dir / name).with_suffix(".csv") metadata_path = (data_dir / f"{name}_metadata").with_suffix(".json") if not dataset_path.exists() or not metadata_path.exists(): return None dataset = pd.read_csv(dataset_path, index_col=0, parse_dates=True) metadata = DatasetMetadata.from_json(metadata_path) dataset = Dataset(name, dataset, metadata) return dataset