Source code for egon.data.datasets.storages.pumped_hydro

"""
The module containing code allocating pumped hydro plants based on
data from MaStR and NEP.
"""

from geopy.geocoders import Nominatim
from sqlalchemy.orm import sessionmaker
import geopandas as gpd
import pandas as pd

from egon.data import config, db
from egon.data.datasets.chp.match_nep import match_nep_chp
from egon.data.datasets.chp.small_chp import assign_use_case
from egon.data.datasets.mastr import WORKING_DIR_MASTR_OLD
from egon.data.datasets.power_plants import (
    assign_bus_id,
    assign_voltage_level,
    filter_mastr_geometry,
    select_target,
)
import egon.data.config


[docs]def select_nep_pumped_hydro(): """Select pumped hydro plants from NEP power plants list Returns ------- pandas.DataFrame Pumped hydro plants from NEP list """ cfg = egon.data.config.datasets()["power_plants"] carrier = "pumped_hydro" # Select plants with geolocation from list of conventional power plants nep_ph = db.select_dataframe( f""" SELECT bnetza_id, name, carrier, postcode, capacity, city, federal_state, c2035_capacity FROM {cfg['sources']['nep_conv']} WHERE carrier = '{carrier}' AND c2035_capacity > 0 AND postcode != 'None'; """ ) # Removing plants out of Germany nep_ph["postcode"] = nep_ph["postcode"].astype(str) nep_ph = nep_ph[~nep_ph["postcode"].str.contains("A")] nep_ph = nep_ph[~nep_ph["postcode"].str.contains("L")] nep_ph = nep_ph[~nep_ph["postcode"].str.contains("nan")] # Remove the subunits from the bnetza_id nep_ph["bnetza_id"] = nep_ph["bnetza_id"].str[0:7] return nep_ph
[docs]def select_mastr_pumped_hydro(): """Select pumped hydro plants from MaStR Returns ------- pandas.DataFrame Pumped hydro plants from MaStR """ sources = egon.data.config.datasets()["power_plants"]["sources"] # Read-in data from MaStR mastr_ph = pd.read_csv( WORKING_DIR_MASTR_OLD / sources["mastr_storage"], delimiter=",", usecols=[ "Nettonennleistung", "EinheitMastrNummer", "Kraftwerksnummer", "Technologie", "Postleitzahl", "Laengengrad", "Breitengrad", "EinheitBetriebsstatus", "LokationMastrNummer", "Ort", "Bundesland", ], ) # Rename columns mastr_ph = mastr_ph.rename( columns={ "Kraftwerksnummer": "bnetza_id", "Technologie": "carrier", "Postleitzahl": "plz", "Ort": "city", "Bundesland": "federal_state", "Nettonennleistung": "el_capacity", } ) # Select only pumped hydro units mastr_ph = mastr_ph[mastr_ph.carrier == "Pumpspeicher"] # Select only pumped hydro units which are in operation mastr_ph = mastr_ph[mastr_ph.EinheitBetriebsstatus == "InBetrieb"] # Insert geometry column mastr_ph = mastr_ph[~(mastr_ph["Laengengrad"].isnull())] mastr_ph = gpd.GeoDataFrame( mastr_ph, geometry=gpd.points_from_xy( mastr_ph["Laengengrad"], mastr_ph["Breitengrad"] ), ) # Drop rows without post code and update datatype of postcode mastr_ph = mastr_ph[~mastr_ph["plz"].isnull()] mastr_ph["plz"] = mastr_ph["plz"].astype(int) # Calculate power in MW mastr_ph.loc[:, "el_capacity"] *= 1e-3 mastr_ph = mastr_ph.set_crs(4326) mastr_ph = mastr_ph[~(mastr_ph["federal_state"].isnull())] # Drop CHP outside of Germany/ outside the test mode area mastr_ph = filter_mastr_geometry(mastr_ph, federal_state=None) return mastr_ph
[docs]def match_storage_units( nep, mastr, matched, buffer_capacity=0.1, consider_location="plz", consider_carrier=True, consider_capacity=True, ): """Match storage_units (in this case only pumped hydro) from MaStR to list of power plants from NEP Parameters ---------- nep : pandas.DataFrame storage units from NEP which are not matched to MaStR mastr : pandas.DataFrame Pstorage_units from MaStR which are not matched to NEP matched : pandas.DataFrame Already matched storage_units buffer_capacity : float, optional Maximum difference in capacity in p.u. The default is 0.1. Returns ------- matched : pandas.DataFrame Matched CHP mastr : pandas.DataFrame storage_units from MaStR which are not matched to NEP nep : pandas.DataFrame storage_units from NEP which are not matched to MaStR """ list_federal_states = pd.Series( { "Hamburg": "HH", "Sachsen": "SN", "MecklenburgVorpommern": "MV", "Thueringen": "TH", "SchleswigHolstein": "SH", "Bremen": "HB", "Saarland": "SL", "Bayern": "BY", "BadenWuerttemberg": "BW", "Brandenburg": "BB", "Hessen": "HE", "NordrheinWestfalen": "NW", "Berlin": "BE", "Niedersachsen": "NI", "SachsenAnhalt": "ST", "RheinlandPfalz": "RP", } ) carrier = "pumped_hydro" for index, row in nep[ (nep["carrier"] == carrier) & (nep["postcode"] != "None") ].iterrows(): # Select plants from MaStR that match carrier, PLZ # and have a similar capacity # Create a copy of all power plants from MaStR selected = mastr.copy() # Set capacity constraint using buffer if consider_capacity: selected = selected[ ( selected.el_capacity <= row["capacity"] * (1 + buffer_capacity) ) & ( selected.el_capacity >= row["capacity"] * (1 - buffer_capacity) ) ] # Set geographic constraint, either choose power plants # with the same postcode, city or federal state if consider_location == "plz": selected = selected[ selected.plz.astype(int).astype(str) == row["postcode"] ] elif consider_location == "city": selected = selected[selected.city == row.city.replace("\n", " ")] elif consider_location == "federal_state": selected.loc[:, "federal_state"] = list_federal_states[ selected.federal_state ].values selected = selected[selected.federal_state == row.federal_state] # Set capacity constraint if selected if consider_carrier: selected = selected[selected.carrier == carrier] # If a plant could be matched, add this to matched if len(selected) > 0: matched = matched.append( gpd.GeoDataFrame( data={ "source": "MaStR scaled with NEP 2021 list", "MaStRNummer": selected.EinheitMastrNummer.head(1), "carrier": carrier, "el_capacity": row.c2035_capacity, "scenario": "eGon2035", "geometry": selected.geometry.head(1), "voltage_level": selected.voltage_level.head(1), } ) ) # Drop matched storage units from nep nep = nep.drop(index) # Drop matched storage units from MaStR list if the location is accurate if consider_capacity & consider_carrier: mastr = mastr.drop(selected.index) return matched, mastr, nep
[docs]def get_location(unmatched): """Gets a geolocation for units which couldn't be matched using MaStR data. Uses geolocator and the city name from NEP data to create longitude and latitude for a list of unmatched units. Parameters ---------- unmatched : pandas.DataFrame storage units from NEP which are not matched to MaStR but containing a city information Returns ------- unmatched: pandas.DataFrame Units for which no geolocation could be identified located : pandas.DataFrame Units with a geolocation based on their city information """ geolocator = Nominatim(user_agent="egon_data") # Create array of cities cities = unmatched.city.values # identify longitude and latitude for all cities in the array for city in cities: lon = geolocator.geocode(city).longitude lat = geolocator.geocode(city).latitude # write information on lon and lat to df unmatched.loc[unmatched.city == city, "lon"] = lon unmatched.loc[unmatched.city == city, "lat"] = lat # Get a point geometry from lon and lat information unmatched["geometry"] = gpd.points_from_xy(unmatched.lon, unmatched.lat) unmatched.crs = "EPSG:4326" # Copy units with lon and lat to a new dataframe located = unmatched[ ["bnetza_id", "name", "carrier", "city", "c2035_capacity", "geometry"] ].copy() located.dropna(subset=["geometry"], inplace=True) # Rename columns for compatibility reasons located = located.rename( columns={"c2035_capacity": "el_capacity", "bnetza_id": "MaStRNummer"} ) located["scenario"] = "eGon2035" located["source"] = "NEP power plants geolocated using city" unmatched = unmatched.drop(located.index.values) return located, unmatched
[docs]def apply_voltage_level_thresholds(power_plants): """Assigns voltage level to power plants based on thresholds defined for the egon project. Parameters ---------- power_plants : pandas.DataFrame Power plants and their electrical capacity Returns ------- pandas.DataFrame Power plants including voltage_level """ # Identify voltage_level for every power plant taking thresholds into # account which were defined in the eGon project. Existing entries on voltage # will be overwritten power_plants.loc[power_plants["el_capacity"] < 0.1, "voltage_level"] = 7 power_plants.loc[power_plants["el_capacity"] > 0.1, "voltage_level"] = 6 power_plants.loc[power_plants["el_capacity"] > 0.2, "voltage_level"] = 5 power_plants.loc[power_plants["el_capacity"] > 5.5, "voltage_level"] = 4 power_plants.loc[power_plants["el_capacity"] > 20, "voltage_level"] = 3 power_plants.loc[power_plants["el_capacity"] > 120, "voltage_level"] = 1 return power_plants