Source code for egon.data.datasets.mastr

"""
Download Marktstammdatenregister (MaStR) datasets unit registry.
It incorporates two different datasets:

Dump 2021-05-03
* Source: https://sandbox.zenodo.org/record/808086
* Used technologies: PV plants, wind turbines, biomass, hydro plants,
  combustion, nuclear, gsgk, storage
* Data is further processed in dataset
  :py:class:`egon.data.datasets.power_plants.PowerPlants`

Dump 2022-11-17
* Source: https://sandbox.zenodo.org/record/1132839
* Used technologies: PV plants, wind turbines, biomass, hydro plants
* Data is further processed in module
  :py:mod:`egon.data.datasets.power_plants.mastr` `PowerPlants`

Todo: Finish docstring
TBD
"""

from functools import partial
from pathlib import Path
from urllib.request import urlretrieve
import os

from egon.data.datasets import Dataset
import egon.data.config

WORKING_DIR_MASTR_OLD = Path(".", "bnetza_mastr", "dump_2021-05-03")
WORKING_DIR_MASTR_NEW = Path(".", "bnetza_mastr", "dump_2022-11-17")


[docs]def download_mastr_data(): """Download MaStR data from Zenodo""" def download(dataset_name, download_dir): print(f"Downloading dataset {dataset_name} to {download_dir} ...") # Get parameters from config and set download URL data_config = egon.data.config.datasets()[dataset_name] zenodo_files_url = ( f"https://sandbox.zenodo.org/record/" f"{data_config['deposit_id']}/files/" ) files = [] for technology in data_config["technologies"]: files.append( f"{data_config['file_basename']}_{technology}_cleaned.csv" ) files.append("location_elec_generation_raw.csv") # Retrieve specified files for filename in files: if not os.path.isfile(filename): urlretrieve( zenodo_files_url + filename, download_dir / filename ) if not os.path.exists(WORKING_DIR_MASTR_OLD): WORKING_DIR_MASTR_OLD.mkdir(exist_ok=True, parents=True) if not os.path.exists(WORKING_DIR_MASTR_NEW): WORKING_DIR_MASTR_NEW.mkdir(exist_ok=True, parents=True) download(dataset_name="mastr", download_dir=WORKING_DIR_MASTR_OLD) download(dataset_name="mastr_new", download_dir=WORKING_DIR_MASTR_NEW)
mastr_data_setup = partial( Dataset, name="MastrData", version="0.0.1", dependencies=[], tasks=(download_mastr_data,), )