Source code for egon.data.datasets.zensus_vg250
import datetime
import json
import time
from geoalchemy2 import Geometry
from sqlalchemy import (
BigInteger,
Column,
Float,
Integer,
SmallInteger,
String,
func,
select,
)
from sqlalchemy.ext.declarative import declarative_base
import geopandas as gpd
from egon.data import db
from egon.data.datasets import Dataset
from egon.data.datasets.vg250 import vg250_metadata_resources_fields
from egon.data.metadata import (
context,
licenses_datenlizenz_deutschland,
meta_metadata,
)
import egon.data.config
Base = declarative_base()
[docs]class ZensusVg250(Dataset):
def __init__(self, dependencies):
super().__init__(
name="ZensusVg250",
version="0.0.2",
dependencies=dependencies,
tasks=(
map_zensus_vg250,
inside_germany,
add_metadata_zensus_inside_ger,
population_in_municipalities,
add_metadata_vg250_gem_pop,
),
)
[docs]class Vg250Sta(Base):
__tablename__ = "vg250_sta"
__table_args__ = {"schema": "boundaries"}
id = Column(BigInteger, primary_key=True, index=True)
ade = Column(BigInteger)
gf = Column(BigInteger)
bsg = Column(BigInteger)
ars = Column(String)
ags = Column(String)
sdv_ars = Column(String)
gen = Column(String)
bez = Column(String)
ibz = Column(BigInteger)
bem = Column(String)
nbd = Column(String)
sn_l = Column(String)
sn_r = Column(String)
sn_k = Column(String)
sn_v1 = Column(String)
sn_v2 = Column(String)
sn_g = Column(String)
fk_s3 = Column(String)
nuts = Column(String)
ars_0 = Column(String)
ags_0 = Column(String)
wsk = Column(String)
debkg_id = Column(String)
rs = Column(String)
sdv_rs = Column(String)
rs_0 = Column(String)
geometry = Column(Geometry(srid=4326), index=True)
[docs]class Vg250Gem(Base):
__tablename__ = "vg250_gem"
__table_args__ = {"schema": "boundaries"}
id = Column(BigInteger, primary_key=True, index=True)
ade = Column(BigInteger)
gf = Column(BigInteger)
bsg = Column(BigInteger)
ars = Column(String)
ags = Column(String)
sdv_ars = Column(String)
gen = Column(String)
bez = Column(String)
ibz = Column(BigInteger)
bem = Column(String)
nbd = Column(String)
sn_l = Column(String)
sn_r = Column(String)
sn_k = Column(String)
sn_v1 = Column(String)
sn_v2 = Column(String)
sn_g = Column(String)
fk_s3 = Column(String)
nuts = Column(String)
ars_0 = Column(String)
ags_0 = Column(String)
wsk = Column(String)
debkg_id = Column(String)
rs = Column(String)
sdv_rs = Column(String)
rs_0 = Column(String)
geometry = Column(Geometry(srid=4326), index=True)
[docs]class DestatisZensusPopulationPerHa(Base):
__tablename__ = "destatis_zensus_population_per_ha"
__table_args__ = {"schema": "society"}
id = Column(Integer, primary_key=True, index=True)
grid_id = Column(String(254), nullable=False)
x_mp = Column(Integer)
y_mp = Column(Integer)
population = Column(SmallInteger)
geom_point = Column(Geometry("POINT", 3035), index=True)
geom = Column(Geometry("POLYGON", 3035), index=True)
[docs]class DestatisZensusPopulationPerHaInsideGermany(Base):
__tablename__ = "destatis_zensus_population_per_ha_inside_germany"
__table_args__ = {"schema": "society"}
id = Column(Integer, primary_key=True, index=True)
grid_id = Column(String(254), nullable=False)
population = Column(SmallInteger)
geom_point = Column(Geometry("POINT", 3035), index=True)
geom = Column(Geometry("POLYGON", 3035), index=True)
[docs]class Vg250GemPopulation(Base):
__tablename__ = "vg250_gem_population"
__table_args__ = {"schema": "boundaries"}
id = Column(Integer, primary_key=True, index=True)
gen = Column(String)
bez = Column(String)
bem = Column(String)
nuts = Column(String)
ags_0 = Column(String)
rs_0 = Column(String)
area_ha = Column(Float)
area_km2 = Column(Float)
population_total = Column(Integer)
cell_count = Column(Integer)
population_density = Column(Integer)
geom = Column(Geometry(srid=3035))
[docs]class MapZensusVg250(Base):
__tablename__ = "egon_map_zensus_vg250"
__table_args__ = {"schema": "boundaries"}
zensus_population_id = Column(Integer, primary_key=True, index=True)
zensus_geom = Column(Geometry("POINT", 3035))
vg250_municipality_id = Column(Integer)
vg250_nuts3 = Column(String)
[docs]def map_zensus_vg250():
"""Perform mapping between municipalities and zensus grid"""
MapZensusVg250.__table__.drop(bind=db.engine(), checkfirst=True)
MapZensusVg250.__table__.create(bind=db.engine(), checkfirst=True)
# Get information from data configuration file
cfg = egon.data.config.datasets()["map_zensus_vg250"]
local_engine = db.engine()
db.execute_sql(
f"""DELETE FROM
{cfg['targets']['map']['schema']}.{cfg['targets']['map']['table']}"""
)
gdf = db.select_geodataframe(
f"""SELECT * FROM
{cfg['sources']['zensus_population']['schema']}.
{cfg['sources']['zensus_population']['table']}""",
geom_col="geom_point",
)
gdf_boundaries = db.select_geodataframe(
f"""SELECT * FROM {cfg['sources']['vg250_municipalities']['schema']}.
{cfg['sources']['vg250_municipalities']['table']}""",
geom_col="geometry",
epsg=3035,
)
# Join vg250 with zensus cells
join = gpd.sjoin(gdf, gdf_boundaries, how="inner", op="intersects")
# Deal with cells that don't interect with boundaries (e.g. at borders)
missing_cells = gdf[(~gdf.id.isin(join.id_left)) & (gdf.population > 0)]
# start with buffer
buffer = 0
# increase buffer until every zensus cell is matched to a nuts3 region
while len(missing_cells) > 0:
buffer += 100
boundaries_buffer = gdf_boundaries.copy()
boundaries_buffer.geometry = boundaries_buffer.geometry.buffer(buffer)
join_missing = gpd.sjoin(
missing_cells, boundaries_buffer, how="inner", op="intersects"
)
join = join.append(join_missing)
missing_cells = gdf[
(~gdf.id.isin(join.id_left)) & (gdf.population > 0)
]
print(f"Maximal buffer to match zensus points to vg250: {buffer}m")
# drop duplicates
join = join.drop_duplicates(subset=["id_left"])
# Insert results to database
join.rename(
{
"id_left": "zensus_population_id",
"geom_point": "zensus_geom",
"nuts": "vg250_nuts3",
"id_right": "vg250_municipality_id",
},
axis=1,
)[
[
"zensus_population_id",
"zensus_geom",
"vg250_municipality_id",
"vg250_nuts3",
]
].set_geometry(
"zensus_geom"
).to_postgis(
cfg["targets"]["map"]["table"],
schema=cfg["targets"]["map"]["schema"],
con=local_engine,
if_exists="replace",
)
[docs]def inside_germany():
"""
Filter zensus data by data inside Germany and population > 0
"""
# Get database engine
engine_local_db = db.engine()
# Create new table
db.execute_sql(
f"""
DROP TABLE IF EXISTS {DestatisZensusPopulationPerHaInsideGermany.__table__.schema}.{DestatisZensusPopulationPerHaInsideGermany.__table__.name} CASCADE;
"""
)
DestatisZensusPopulationPerHaInsideGermany.__table__.create(
bind=engine_local_db, checkfirst=True
)
with db.session_scope() as s:
# Query zensus cells in German boundaries from vg250
cells_in_germany = s.query(MapZensusVg250.zensus_population_id)
# Query relevant data from zensus population table
q = (
s.query(
DestatisZensusPopulationPerHa.id,
DestatisZensusPopulationPerHa.grid_id,
DestatisZensusPopulationPerHa.population,
DestatisZensusPopulationPerHa.geom_point,
DestatisZensusPopulationPerHa.geom,
)
.filter(DestatisZensusPopulationPerHa.population > 0)
.filter(DestatisZensusPopulationPerHa.id.in_(cells_in_germany))
)
# Insert above queried data into new table
insert = DestatisZensusPopulationPerHaInsideGermany.__table__.insert().from_select(
(
DestatisZensusPopulationPerHaInsideGermany.id,
DestatisZensusPopulationPerHaInsideGermany.grid_id,
DestatisZensusPopulationPerHaInsideGermany.population,
DestatisZensusPopulationPerHaInsideGermany.geom_point,
DestatisZensusPopulationPerHaInsideGermany.geom,
),
q,
)
# Execute and commit (trigger transactions in database)
s.execute(insert)
s.commit()
[docs]def population_in_municipalities():
"""
Create table of municipalities with information about population
"""
engine_local_db = db.engine()
Vg250GemPopulation.__table__.drop(bind=engine_local_db, checkfirst=True)
Vg250GemPopulation.__table__.create(bind=engine_local_db, checkfirst=True)
srid = 3035
gem = db.select_geodataframe(
"SELECT * FROM boundaries.vg250_gem",
geom_col="geometry",
epsg=srid,
index_col="id",
)
gem["area_ha"] = gem.area / 10000
gem["area_km2"] = gem.area / 1000000
population = db.select_dataframe(
"""SELECT id, population, vg250_municipality_id
FROM society.destatis_zensus_population_per_ha
INNER JOIN boundaries.egon_map_zensus_vg250 ON (
society.destatis_zensus_population_per_ha.id =
boundaries.egon_map_zensus_vg250.zensus_population_id)
WHERE population > 0"""
)
gem["population_total"] = (
population.groupby("vg250_municipality_id").population.sum().fillna(0)
)
gem["cell_count"] = population.groupby(
"vg250_municipality_id"
).population.count()
gem["population_density"] = gem["population_total"] / gem["area_km2"]
gem.reset_index().to_postgis(
"vg250_gem_population",
schema="boundaries",
con=db.engine(),
if_exists="replace",
)
[docs]def add_metadata_zensus_inside_ger():
"""
Create metadata JSON for DestatisZensusPopulationPerHaInsideGermany
Creates a metdadata JSON string and writes it to the database table comment
"""
schema_table = ".".join(
[
DestatisZensusPopulationPerHaInsideGermany.__table__.schema,
DestatisZensusPopulationPerHaInsideGermany.__table__.name,
]
)
metadata = {
"name": schema_table,
"title": "DESTATIS - Zensus 2011 - Population per hectar",
"id": "WILL_BE_SET_AT_PUBLICATION",
"description": (
"National census in Germany in 2011 with the bounds on Germanys "
"borders."
),
"language": ["en-EN", "de-DE"],
"publicationDate": datetime.date.today().isoformat(),
"context": context(),
"spatial": {
"location": None,
"extent": "Germany",
"resolution": "1 ha",
},
"temporal": {
"reference_date": "2011-12-31",
"timeseries": {
"start": None,
"end": None,
"resolution": None,
"alignment": None,
"aggregationType": None,
},
},
"sources": [
{
"title": "Statistisches Bundesamt (Destatis) - Ergebnisse des "
"Zensus 2011 zum Download",
"description": (
"Als Download bieten wir Ihnen auf dieser Seite "
"zusätzlich zur Zensusdatenbank CSV- und "
"teilweise Excel-Tabellen mit umfassenden "
"Personen-, Haushalts- und Familien- sowie "
"Gebäude- und Wohnungsmerkmalen. Die "
"Ergebnisse liegen auf Bundes-, Länder-, Kreis- "
"und Gemeindeebene vor. Außerdem sind einzelne "
"Ergebnisse für Gitterzellen verfügbar."
),
"path": "https://www.zensus2011.de/DE/Home/Aktuelles/"
"DemografischeGrunddaten.html",
"licenses": [
licenses_datenlizenz_deutschland(
attribution="© Statistische Ämter des Bundes und der "
"Länder 2014"
)
],
},
{
"title": "Dokumentation - Zensus 2011 - Methoden und Verfahren",
"description": (
"Diese Publikation beschreibt ausführlich die "
"Methoden und Verfahren des registergestützten "
"Zensus 2011; von der Datengewinnung und "
"-aufbereitung bis hin zur Ergebniserstellung"
" und Geheimhaltung. Der vorliegende Band wurde "
"von den Statistischen Ämtern des Bundes und "
"der Länder im Juni 2015 veröffentlicht."
),
"path": "https://www.destatis.de/DE/Publikationen/Thematisch/Be"
"voelkerung/Zensus/ZensusBuLaMethodenVerfahren51211051"
"19004.pdf?__blob=publicationFile",
"licenses": [
licenses_datenlizenz_deutschland(
attribution="© Statistisches Bundesamt, Wiesbaden "
"2015 (im Auftrag der "
"Herausgebergemeinschaft)"
)
],
},
],
"licenses": [
licenses_datenlizenz_deutschland(
attribution="© Statistische Ämter des Bundes und der Länder "
"2014; © Statistisches Bundesamt, Wiesbaden 2015 "
"(Daten verändert)"
)
],
"contributors": [
{
"title": "Guido Pleßmann",
"email": "http://github.com/gplssm",
"date": time.strftime("%Y-%m-%d"),
"object": None,
"comment": "Imported data",
},
{
"title": "Jonathan Amme",
"email": "http://github.com/nesnoj",
"date": time.strftime("%Y-%m-%d"),
"object": None,
"comment": "Metadata extended",
},
],
"resources": [
{
"profile": "tabular-data-resource",
"name": schema_table,
"path": None,
"format": "PostgreSQL",
"encoding": "UTF-8",
"schema": {
"fields": [
{
"name": "id",
"description": "Unique identifier",
"type": "none",
"unit": "integer",
},
{
"name": "grid_id",
"description": "Grid number of source",
"type": "string",
"unit": "none",
},
{
"name": "population",
"description": "Number of registred residents",
"type": "integer",
"unit": "resident",
},
{
"name": "geom_point",
"description": "Geometry centroid",
"type": "Geometry",
"unit": "none",
},
{
"name": "geom",
"description": "Geometry",
"type": "Geometry",
"unit": "",
},
],
"primaryKey": ["id"],
"foreignKeys": [],
},
"dialect": {"delimiter": None, "decimalSeparator": "."},
}
],
"metaMetadata": meta_metadata(),
}
meta_json = "'" + json.dumps(metadata) + "'"
db.submit_comment(
meta_json,
DestatisZensusPopulationPerHaInsideGermany.__table__.schema,
DestatisZensusPopulationPerHaInsideGermany.__table__.name,
)
[docs]def add_metadata_vg250_gem_pop():
"""
Create metadata JSON for Vg250GemPopulation
Creates a metdadata JSON string and writes it to the database table comment
"""
vg250_config = egon.data.config.datasets()["vg250"]
schema_table = ".".join(
[
Vg250GemPopulation.__table__.schema,
Vg250GemPopulation.__table__.name,
]
)
licenses = [
licenses_datenlizenz_deutschland(
attribution="© Bundesamt für Kartographie und Geodäsie "
"2020 (Daten verändert)"
)
]
vg250_source = {
"title": "Verwaltungsgebiete 1:250 000 (Ebenen)",
"description": "Der Datenbestand umfasst sämtliche Verwaltungseinheiten der "
"hierarchischen Verwaltungsebenen vom Staat bis zu den Gemeinden "
"mit ihren Grenzen, statistischen Schlüsselzahlen, Namen der "
"Verwaltungseinheit sowie die spezifische Bezeichnung der "
"Verwaltungsebene des jeweiligen Landes.",
"path": vg250_config["original_data"]["source"]["url"],
"licenses": licenses,
}
resources_fields = vg250_metadata_resources_fields()
resources_fields.extend(
[
{
"name": "area_ha",
"description": "Area in ha",
"type": "float",
"unit": "ha",
},
{
"name": "area_km2",
"description": "Area in km2",
"type": "float",
"unit": "km2",
},
{
"name": "population_total",
"description": "Number of inhabitants",
"type": "integer",
"unit": "none",
},
{
"name": "cell_count",
"description": "Number of Zensus cells",
"type": "integer",
"unit": "none",
},
{
"name": "population_density",
"description": "Number of inhabitants per km2",
"type": "float",
"unit": "inhabitants/km²",
},
]
)
metadata = {
"name": schema_table,
"title": (
"Municipalities (BKG Verwaltungsgebiete 250) and population "
"(Destatis Zensus)"
),
"id": "WILL_BE_SET_AT_PUBLICATION",
"description": "Municipality data enriched by population data",
"language": ["de-DE"],
"publicationDate": datetime.date.today().isoformat(),
"context": context(),
"spatial": {
"location": None,
"extent": "Germany",
"resolution": "1:250000",
},
"temporal": {
"referenceDate": "2020-01-01",
"timeseries": {
"start": None,
"end": None,
"resolution": None,
"alignment": None,
"aggregationType": None,
},
},
"sources": [vg250_source],
"licenses": licenses,
"contributors": [
{
"title": "Guido Pleßmann",
"email": "http://github.com/gplssm",
"date": time.strftime("%Y-%m-%d"),
"object": None,
"comment": "Imported data",
},
{
"title": "Jonathan Amme",
"email": "http://github.com/nesnoj",
"date": time.strftime("%Y-%m-%d"),
"object": None,
"comment": "Metadata extended",
},
],
"resources": [
{
"profile": "tabular-data-resource",
"name": schema_table,
"path": None,
"format": "PostgreSQL",
"encoding": "UTF-8",
"schema": {
"fields": resources_fields,
"primaryKey": ["id"],
"foreignKeys": [],
},
"dialect": {"delimiter": None, "decimalSeparator": "."},
}
],
"metaMetadata": meta_metadata(),
}
meta_json = "'" + json.dumps(metadata) + "'"
db.submit_comment(
meta_json,
Vg250GemPopulation.__table__.schema,
Vg250GemPopulation.__table__.name,
)