"""
Helpers: constants and functions for motorized individual travel
"""
from pathlib import Path
import json
import numpy as np
import pandas as pd
import egon.data.config
TESTMODE_OFF = (
egon.data.config.settings()["egon-data"]["--dataset-boundary"]
== "Everything"
)
WORKING_DIR = Path(".", "emobility")
DATA_BUNDLE_DIR = Path(
".",
"data_bundle_egon_data",
"emobility",
)
DATASET_CFG = egon.data.config.datasets()["emobility_mit"]
COLUMNS_KBA = [
"reg_district",
"total",
"mini",
"medium",
"luxury",
"unknown",
]
CONFIG_EV = {
"bev_mini": {
"column": "mini",
"tech_share": "bev_mini_share",
"share": "mini_share",
"factor": "mini_factor",
},
"bev_medium": {
"column": "medium",
"tech_share": "bev_medium_share",
"share": "medium_share",
"factor": "medium_factor",
},
"bev_luxury": {
"column": "luxury",
"tech_share": "bev_luxury_share",
"share": "luxury_share",
"factor": "luxury_factor",
},
"phev_mini": {
"column": "mini",
"tech_share": "phev_mini_share",
"share": "mini_share",
"factor": "mini_factor",
},
"phev_medium": {
"column": "medium",
"tech_share": "phev_medium_share",
"share": "medium_share",
"factor": "medium_factor",
},
"phev_luxury": {
"column": "luxury",
"tech_share": "phev_luxury_share",
"share": "luxury_share",
"factor": "luxury_factor",
},
}
TRIP_COLUMN_MAPPING = {
"location": "location",
"use_case": "use_case",
"nominal_charging_capacity_kW": "charging_capacity_nominal",
"grid_charging_capacity_kW": "charging_capacity_grid",
"battery_charging_capacity_kW": "charging_capacity_battery",
"soc_start": "soc_start",
"soc_end": "soc_end",
"chargingdemand_kWh": "charging_demand",
"park_start_timesteps": "park_start",
"park_end_timesteps": "park_end",
"drive_start_timesteps": "drive_start",
"drive_end_timesteps": "drive_end",
"consumption_kWh": "consumption",
}
MVGD_MIN_COUNT = 3700 if TESTMODE_OFF else 150
[docs]def read_kba_data():
"""Read KBA data from CSV"""
return pd.read_csv(
WORKING_DIR
/ egon.data.config.datasets()["emobility_mit"]["original_data"][
"sources"
]["KBA"]["file_processed"]
)
[docs]def read_rs7_data():
"""Read RegioStaR7 data from CSV"""
return pd.read_csv(
WORKING_DIR
/ egon.data.config.datasets()["emobility_mit"]["original_data"][
"sources"
]["RS7"]["file_processed"]
)
[docs]def reduce_mem_usage(
df: pd.DataFrame, show_reduction: bool = False
) -> pd.DataFrame:
"""Function to automatically check if columns of a pandas DataFrame can
be reduced to a smaller data type. Source:
https://www.mikulskibartosz.name/how-to-reduce-memory-usage-in-pandas/
Parameters
----------
df: pd.DataFrame
DataFrame to reduce memory usage on
show_reduction : bool
If True, print amount of memory reduced
Returns
-------
pd.DataFrame
DataFrame with memory usage decreased
"""
start_mem = df.memory_usage().sum() / 1024 ** 2
for col in df.columns:
col_type = df[col].dtype
if col_type != object and str(col_type) != "category":
c_min = df[col].min()
c_max = df[col].max()
if str(col_type)[:3] == "int":
if (
c_min > np.iinfo(np.int16).min
and c_max < np.iinfo(np.int16).max
):
df[col] = df[col].astype("int16")
elif (
c_min > np.iinfo(np.int32).min
and c_max < np.iinfo(np.int32).max
):
df[col] = df[col].astype("int32")
else:
df[col] = df[col].astype("int64")
else:
if (
c_min > np.finfo(np.float32).min
and c_max < np.finfo(np.float32).max
):
df[col] = df[col].astype("float32")
else:
df[col] = df[col].astype("float64")
else:
df[col] = df[col].astype("category")
end_mem = df.memory_usage().sum() / 1024 ** 2
if show_reduction is True:
print(
"Reduced memory usage of DataFrame by "
f"{(1 - end_mem/start_mem) * 100:.2f} %."
)
return df