Create and adjust interim tables for time series

IlkaCu · IlkaCu · commit 14bd30ecc6fa · 2022-11-07T13:48:57.000+01:00
diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml
@@ -738,6 +738,9 @@ electrical_load_curves_industry:
     sites_load:
       schema: 'demand'
       table: 'egon_sites_ind_load_curves'
+    sites_load_individual:
+      schema: 'demand'
+      table: 'egon_sites_ind_load_curves_individual'
 
 etrago_electricity:
   sources:
diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py
@@ -212,7 +212,7 @@ def calc_ind_site_timeseries(scenario):
                 data=curves_bus["bus_id"], index=curves_bus["id"].astype(int)
             )
             ts["scenario_name"] = scenario
-            curves_bus.drop({"id", "bus_id"}, axis=1, inplace=True)
+            curves_bus.drop({"id", "bus_id", "geom"}, axis=1, inplace=True)
             ts["p_set"] = curves_bus.values.tolist()
 
             # add subsector to relate to Schmidt's tables afterwards
diff --git a/src/egon/data/datasets/industry/__init__.py b/src/egon/data/datasets/industry/__init__.py
@@ -60,6 +60,9 @@ class DemandCurvesOsmIndustryIndividual(Base):
     bus_id = Column(Integer)
     scn_name = Column(String, primary_key= True)
     p_set = Column(ARRAY(Float))
+    peak_load = Column(Float)
+    demand = Column(Float)
+    voltage_level = Column(Integer)
 
 
 
@@ -78,11 +81,12 @@ class DemandCurvesSitesIndustryIndividual(Base):
     __table_args__ = {"schema": "demand"}
 
     site_id = Column(Integer, primary_key = True)
-    bus = Column(Integer)
+    bus_id = Column(Integer)
     scn_name = Column(String, primary_key=True)
-    wz = Column(Integer)
     p_set = Column(ARRAY(Float))
-    geom= Column(Geometry("POINT", 4326))
+    peak_load = Column(Float)
+    demand = Column(Float)
+    voltage_level = Column(Integer)
 
 
 def create_tables():
@@ -135,6 +139,12 @@ def create_tables():
             {targets_temporal['sites_load']['table']} CASCADE;"""
     )
 
+    db.execute_sql(
+        f"""DROP TABLE IF EXISTS
+            {targets_temporal['sites_load_individual']['schema']}.
+            {targets_temporal['sites_load_individual']['table']} CASCADE;"""
+    )
+
     engine = db.engine()
 
     EgonDemandRegioSitesIndElectricity.__table__.create(
@@ -153,6 +163,10 @@ def create_tables():
 
     DemandCurvesSitesIndustry.__table__.create(bind=engine, checkfirst=True)
 
+    DemandCurvesSitesIndustryIndividual.__table__.create(
+        bind=engine, checkfirst=True
+    )
+
 
 def industrial_demand_distr():
     """ Distribute electrical demands for industry to osm landuse polygons
@@ -386,7 +400,7 @@ class IndustrialDemandCurves(Dataset):
     def __init__(self, dependencies):
         super().__init__(
             name="Industrial_demand_curves",
-            version="0.0.4",
+            version="0.0.5",
             dependencies=dependencies,
             tasks=(
                 create_tables,
diff --git a/src/egon/data/datasets/industry/temporal.py b/src/egon/data/datasets/industry/temporal.py
@@ -6,6 +6,7 @@
 import egon.data.config
 import geopandas as gpd
 import pandas as pd
+import numpy as np
 from egon.data import db
 from egon.data.datasets.electricity_demand.temporal import calc_load_curve
 from sqlalchemy import ARRAY, Column, Float, Integer, String
@@ -14,6 +15,38 @@
 Base = declarative_base()
 
 
+def identify_voltage_level(df):
+
+    """Identify the voltage_level of a grid component based on its peak load and
+    defined thresholds.
+
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        Data frame containing information about peak loads
+
+
+    Returns
+    -------
+    pandas.DataFrame
+        Data frame with an additional column with voltage level
+
+    """
+
+    df['voltage_level']= np.nan
+
+    # Identify voltage_level for every demand area taking thresholds into account which were defined in the eGon project
+    df.loc[df["peak_load"] < 0.1, "voltage_level"] = 7
+    df.loc[df["peak_load"] > 0.1, "voltage_level"] = 6
+    df.loc[df["peak_load"] > 0.2, "voltage_level"] = 5
+    df.loc[df["peak_load"] > 5.5, "voltage_level"] = 4
+    df.loc[df["peak_load"] > 20, "voltage_level"] = 3
+    df.loc[df["peak_load"] > 120, "voltage_level"] = 1
+
+    return df
+
+
 def identify_bus(load_curves, demand_area):
     """Identify the grid connection point for a consumer by determining its grid level
     based on the time series' peak load and the spatial intersection to mv
@@ -50,17 +83,11 @@ def identify_bus(load_curves, demand_area):
     )
 
     # Initialize dataframe to identify peak load per demand area (e.g. osm landuse area or industrial site)
-    peak = pd.DataFrame(columns=["id", "peak_load", "voltage_level"])
+    peak = pd.DataFrame(columns=["id", "peak_load"])
     peak["id"] = load_curves.max(axis=0).index
     peak["peak_load"] = load_curves.max(axis=0).values
 
-    # Identify voltage_level for every demand area taking thresholds into account which were defined in the eGon project
-    peak.loc[peak["peak_load"] < 0.1, "voltage_level"] = 7
-    peak.loc[peak["peak_load"] > 0.1, "voltage_level"] = 6
-    peak.loc[peak["peak_load"] > 0.2, "voltage_level"] = 5
-    peak.loc[peak["peak_load"] > 5.5, "voltage_level"] = 4
-    peak.loc[peak["peak_load"] > 20, "voltage_level"] = 3
-    peak.loc[peak["peak_load"] > 120, "voltage_level"] = 1
+    peak = identify_voltage_level(peak)
 
     # Assign bus_id to demand area by merging landuse and peak df
     peak = pd.merge(demand_area, peak, right_on="id", left_index=True)
@@ -210,6 +237,14 @@ def insert_osm_ind_load():
             """
         )
 
+        db.execute_sql(
+            f"""
+            DELETE FROM
+            {targets['osm_load_individual']['schema']}.{targets['osm_load_individual']['table']}
+            WHERE scn_name = '{scenario}'
+            """
+        )
+
         # Calculate cts load curves per mv substation (hvmv bus)
         data, curves_individual = calc_load_curves_ind_osm(scenario)
         data.index = data.index.rename("bus")
@@ -225,6 +260,9 @@ def insert_osm_ind_load():
             if_exists="append",
         )
 
+        curves_individual['peak_load'] = np.array(curves_individual['p_set'].values.tolist()).max(axis=1)
+        curves_individual['demand'] = np.array(curves_individual['p_set'].values.tolist()).sum(axis=1)
+        curves_individual = identify_voltage_level(curves_individual)
 
         curves_individual.to_sql(
             targets["osm_load_individual"]["table"],
@@ -321,7 +359,16 @@ def calc_load_curves_ind_sites(scenario):
     # Insert data for pf load timeseries table
     load_ts_df.p_set = curves_bus.values.tolist()
 
-    return load_ts_df
+    # Create Dataframe to store time series individually
+    curves_individual_interim = (
+        curves_da.drop(["bus_id", "geom", "wz"], axis=1).fillna(0)
+    ).set_index("id")
+    curves_individual = curves_da[["id", "bus_id"]]
+    curves_individual["p_set"] = curves_individual_interim.values.tolist()
+    curves_individual["scn_name"]= scenario
+    curves_individual = curves_individual.rename(columns={"id": "site_id"}).set_index(['site_id', 'scn_name'])
+
+    return load_ts_df, curves_individual
 
 
 def insert_sites_ind_load():
@@ -348,8 +395,18 @@ def insert_sites_ind_load():
             """
         )
 
+        # Delete existing data from database
+        db.execute_sql(
+            f"""
+            DELETE FROM
+            {targets['sites_load_individual']['schema']}.
+            {targets['sites_load_individual']['table']}
+            WHERE scn_name = '{scenario}'
+            """
+        )
+
         # Calculate industrial load curves per bus
-        data = calc_load_curves_ind_sites(scenario)
+        data, curves_individual = calc_load_curves_ind_sites(scenario)
         data.index = data.index.rename(["bus", "wz"])
         data["scn_name"] = scenario
 
@@ -362,3 +419,14 @@ def insert_sites_ind_load():
             con=db.engine(),
             if_exists="append",
         )
+
+        curves_individual['peak_load'] = np.array(curves_individual['p_set'].values.tolist()).max(axis=1)
+        curves_individual['demand'] = np.array(curves_individual['p_set'].values.tolist()).sum(axis=1)
+        curves_individual = identify_voltage_level(curves_individual)
+
+        curves_individual.to_sql(
+            targets["sites_load_individual"]["table"],
+            schema=targets["sites_load_individual"]["schema"],
+            con=db.engine(),
+            if_exists="append",
+        )

Original file line number	Diff line number	Diff line change
`@@ -212,7 +212,7 @@ def calc_ind_site_timeseries(scenario):`
`212`	`212`	`data=curves_bus["bus_id"], index=curves_bus["id"].astype(int)`
`213`	`213`	`)`
`214`	`214`	`ts["scenario_name"] = scenario`
`215`		`- curves_bus.drop({"id", "bus_id"}, axis=1, inplace=True)`
	`215`	`+ curves_bus.drop({"id", "bus_id", "geom"}, axis=1, inplace=True)`
`216`	`216`	`ts["p_set"] = curves_bus.values.tolist()`
`217`	`217`
`218`	`218`	`# add subsector to relate to Schmidt's tables afterwards`