Virtually complete WIP checkin

With this code we successfully complete most of the V2 test case (there are numbers that have to be recalculated due to uncertainties math). This also fixes fundamental wrong assumptions/coding from MichaelTiemannOSC's first attempt at using Pydantic and Pint together. Finally! Signed-off-by: [email protected]
os-climate · Oct 14, 2022 · b8f8915 · b8f8915
1 parent 867f327
commit b8f8915
Show file tree

Hide file tree

Showing 12 changed files with 896 additions and 1,501 deletions.
diff --git a/ITR/configs.py b/ITR/configs.py
@@ -10,7 +10,7 @@
 from ITR.data.osc_units import ureg, Q_
 from typing import List
 
-from .interfaces import TemperatureScoreControls
+from ITR.interfaces import TemperatureScoreControls
 
 class ColumnsConfig:
     # Define a constant for each column used in the
@@ -166,9 +166,9 @@ class TemperatureScoreConfig(PortfolioAggregationConfig):
         target_end_year=2050,
         projection_start_year=2010,
         projection_end_year=2019,
-        tcre=Q_(2.2, 'delta_degC'),
-        carbon_conversion=Q_(3664.0, 'Gt CO2'),
-        scenario_target_temperature=Q_(1.5, 'delta_degC')
+        tcre='2.2 delta_degC',
+        carbon_conversion=Q_('3664.0 Gt CO2'),
+        scenario_target_temperature='1.5 delta_degC'
     )
 
 

diff --git a/ITR/data/__init__.py b/ITR/data/__init__.py
@@ -9,8 +9,8 @@
 
 # openscm_units doesn't make it easy to set preprocessors.  This is one way to do it.
 unit_registry.preprocessors=[
-     lambda s1: re.sub(r'passenger.km', 'pkm', s1),
-     lambda s2: s2.replace('BoE', 'boe'),
+    lambda s1: re.sub(r'passenger.km', 'pkm', s1),
+    lambda s2: s2.replace('BoE', 'boe'),
 ]
 
 ureg = unit_registry

diff --git a/ITR/data/base_providers.py b/ITR/data/base_providers.py
@@ -1,9 +1,9 @@
 import warnings  # needed until quantile behaves better with Pint quantities in arrays
 import numpy as np
 import pandas as pd
+# import uncertainties
 # from uncertainties import ufloat
 # from uncertainties.core import Variable as utype
-import uncertainties
 from uncertainties import unumpy as unp
 import pint
 
@@ -19,7 +19,8 @@
 from ITR.interfaces import ICompanyData, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes, \
     IBenchmark, IProjection, ICompanyEIProjections, ICompanyEIProjectionsScopes, IHistoricEIScopes, \
     IHistoricEmissionsScopes, IProductionRealization, ITargetData, IHistoricData, ICompanyEIProjection, \
-    IEmissionRealization, IntensityMetric, ProjectionControls
+    IEmissionRealization, ProjectionControls
+from ITR.interfaces import EI_Quantity
 
 # TODO handling of scopes in benchmarks
 
@@ -51,7 +52,7 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark, scope: EScope) ->
         :return: pd.Series
         """
         return pd.Series({r.year: r.value for r in benchmark.projections}, name=(benchmark.region, benchmark.sector, scope),
-                         dtype=f'pint[{benchmark.benchmark_metric.units}]')
+                         dtype=f'pint[{str(benchmark.benchmark_metric)}]')
 
     # Production benchmarks are dimensionless.  S1S2 has nothing to do with any company data.
     # It's a label in the top-level of benchmark data.  Currently S1S2 is the only label with any data.
@@ -158,7 +159,7 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark, scope: EScope) ->
         :return: pd.Series
         """
         s = pd.Series({p.year: p.value for p in benchmark.projections}, name=(benchmark.region, benchmark.sector, scope),
-                      dtype=f'pint[{benchmark.benchmark_metric.units}]')
+                      dtype=f'pint[{str(benchmark.benchmark_metric)}]')
         return s
 
     def _get_projected_intensities(self, scope: EScope = EScope.S1S2) -> pd.DataFrame:
@@ -248,11 +249,15 @@ def _convert_projections_to_series(self, company: ICompanyData, feature: str,
         :return: pd.Series
         """
         company_dict = company.dict()
+        # FIXME--this should be fixed with latest ProductionMetric stuff!
         try:
             production_units = company_dict[self.column_config.PRODUCTION_METRIC]['units']
         except TypeError:
-            breakpoint()
-        emissions_units = company_dict[self.column_config.EMISSIONS_METRIC]['units']
+            production_units = str(company_dict[self.column_config.PRODUCTION_METRIC])
+        try:
+            emissions_units = company_dict[self.column_config.EMISSIONS_METRIC]['units']
+        except TypeError:
+            emissions_units = str(company_dict[self.column_config.EMISSIONS_METRIC])
         if company_dict[feature][scope.name]:
             projections = company_dict[feature][scope.name]['projections']
         else:
@@ -298,7 +303,7 @@ def _calculate_target_projections(self, production_bm: BaseProviderProductionBen
                     warnings.simplefilter("ignore")
                     company_sector_region_info = pd.DataFrame({
                         self.column_config.COMPANY_ID: [c.company_id],
-                        self.column_config.BASE_YEAR_PRODUCTION: [base_year_production.to(c.production_metric.units)],
+                        self.column_config.BASE_YEAR_PRODUCTION: [base_year_production.to(str(c.production_metric))],
                         self.column_config.GHG_SCOPE12: [c.ghg_s1s2],
                         self.column_config.GHG_SCOPE3: [c.ghg_s3],
                         self.column_config.SECTOR: [c.sector],
@@ -308,7 +313,7 @@ def _calculate_target_projections(self, production_bm: BaseProviderProductionBen
                             production_bm.get_company_projected_production(company_sector_region_info)
                             # We transpose the data so that we get a pd.Series that will accept the pint units as a whole (not element-by-element)
                             .iloc[0].T
-                            .astype(f'pint[{str(base_year_production.units)}]')
+                            .astype(f'pint[{str(base_year_production.u)}]')
                             )
                 c.projected_targets = EITargetProjector().project_ei_targets(c, bm_production_data)
 
@@ -537,20 +542,20 @@ def _add_projections_to_companies(self, companies: List[ICompanyData], extrapola
                 scope_dfs[scope] = results.astype(f"pint[{units}]")
                 projections = [IProjection(year=year, value=value) for year, value in results.items()
                                if year in range(self.projection_controls.BASE_YEAR, self.projection_controls.TARGET_YEAR+1)]
-                scope_projections[scope] = ICompanyEIProjections(ei_metric={'units': units}, projections=projections)
+                scope_projections[scope] = ICompanyEIProjections(ei_metric=units, projections=projections)
             if scope_projections['S1'] and scope_projections['S2'] and not scope_projections['S1S2']:
                 results = scope_dfs['S1'] + scope_dfs['S2']
                 units = f"{results.values[0].u:~P}"
                 projections = [IProjection(year=year, value=value) for year, value in results.items()
                                if year in range(self.projection_controls.BASE_YEAR, self.projection_controls.TARGET_YEAR+1)]
-                scope_projections['S1S2'] = ICompanyEIProjections(ei_metric={'units': units}, projections=projections)
+                scope_projections['S1S2'] = ICompanyEIProjections(ei_metric=units, projections=projections)
             # FIXME: do we really need to do this?  We're going to migrate S3 to S1S2 and ignore S1S2S3...
             if scope_projections['S1S2'] and scope_projections['S3'] and not scope_projections['S1S2S3']:
                 results = scope_dfs['S1S2'] + scope_dfs['S3']
                 units = f"{results.values[0].u:~P}"
                 projections = [IProjection(year=year, value=value) for year, value in results.items()
                                if year in range(self.projection_controls.BASE_YEAR, self.projection_controls.TARGET_YEAR+1)]
-                scope_projections['S1S2S3'] = ICompanyEIProjections(ei_metric={'units': units}, projections=projections)
+                scope_projections['S1S2S3'] = ICompanyEIProjections(ei_metric=units, projections=projections)
             company.projected_intensities = ICompanyEIProjectionsScopes(**scope_projections)
 
     def _standardize(self, intensities: pd.DataFrame) -> pd.DataFrame:
@@ -766,7 +771,7 @@ def project_ei_targets(self, company: ICompanyData, production_bm: pd.Series) ->
                         ei_projection_scopes[scope].projections.extend(ei_projections)
                     else:
                         ei_projection_scopes[scope] = ICompanyEIProjections(projections=ei_projections,
-                                                                            ei_metric=IntensityMetric.parse_obj({'units': target.target_base_year_unit}))
+                                                                            ei_metric=EI_Quantity(target.target_base_year_unit))
                 elif target.target_type == "absolute":
                     # Complicated case, the target must be switched from absolute value to intensity.
                     # We use the benchmark production data
@@ -821,8 +826,7 @@ def project_ei_targets(self, company: ICompanyData, production_bm: pd.Series) ->
                         ei_projection_scopes[scope].projections.extend(ei_projections)
                     else:
                         ei_projection_scopes[scope] = ICompanyEIProjections(projections=ei_projections,
-                                                                            ei_metric=IntensityMetric.parse_obj(
-                                                                                {'units': f"{target_value.u:~P}"}))
+                                                                            ei_metric=EI_Quantity (f"{target_value.u:~P}"))
                 else:
                     # No target (type) specified
                     ei_projection_scopes[scope] = None

diff --git a/ITR/data/data_warehouse.py b/ITR/data/data_warehouse.py
@@ -2,6 +2,8 @@
 import logging
 import pandas as pd
 import numpy as np
+from uncertainties import unumpy as unp
+
 from abc import ABC
 from typing import List, Type
 from pydantic import ValidationError
@@ -96,15 +98,8 @@ def get_preprocessed_company_data(self, company_ids: List[str]) -> List[ICompany
             pd.Series([self.benchmarks_projected_ei.benchmark_global_budget] * len(df_company_data),
                       dtype='pint[Gt CO2]',
                       index=df_company_data.index)
-        df_company_data[self.column_config.BENCHMARK_TEMP] = \
-            pd.Series([self.benchmarks_projected_ei.benchmark_temperature] * len(df_company_data),
-                      dtype='pint[delta_degC]',
-                      index=df_company_data.index)
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            # See https://github.com/hgrecco/pint-pandas/issues/114
-            for col in [self.column_config.CUMULATIVE_TRAJECTORY, self.column_config.CUMULATIVE_TARGET, self.column_config.CUMULATIVE_BUDGET]:
-                df_company_data[col] = df_company_data[col].apply(lambda x: str(x))
+        # ICompanyAggregates wants this Quantity as a `str`
+        df_company_data[self.column_config.BENCHMARK_TEMP] = [str(self.benchmarks_projected_ei.benchmark_temperature)] * len(df_company_data)
         companies = df_company_data.to_dict(orient="records")
         aggregate_company_data = [ICompanyAggregates.parse_obj(company) for company in companies]
         return aggregate_company_data
@@ -138,7 +133,7 @@ def _get_cumulative_emissions(self, projected_ei: pd.DataFrame, projected_produc
         :return: cumulative emissions based on weighted sum of emissions intensity * production
         """
         projected_emissions = projected_ei.multiply(projected_production)
-        projected_emissions = projected_emissions.applymap(lambda x: x if isinstance(x,float) else x if np.isfinite(x.m) else np.nan)
+        projected_emissions = projected_emissions.applymap(lambda x: np.nan if unp.isnan(x) else x)
         null_idx = projected_emissions.index[projected_emissions.isnull().all(axis=1)]
         return pd.concat([projected_emissions.loc[null_idx, projected_emissions.columns[0]],
                           projected_emissions.loc[projected_emissions.index.difference(null_idx)].sum(axis=1)]).astype('pint[Mt CO2]')
diff --git a/ITR/data/template.py b/ITR/data/template.py
@@ -13,7 +13,7 @@
 
 from ITR.data.base_providers import BaseCompanyDataProvider
 from ITR.configs import ColumnsConfig, TemperatureScoreConfig, VariablesConfig, TabsConfig, SectorsConfig, LoggingConfig
-from ITR.interfaces import ICompanyData, EScope, \
+from ITR.interfaces import ICompanyData, EScope, ProductionQuantity, EmissionsQuantity, EI_Quantity, \
     IHistoricEmissionsScopes, \
     IProductionRealization, IHistoricEIScopes, IHistoricData, ITargetData, IEmissionRealization, IEIRealization, \
     IProjection, ProjectionControls
@@ -413,7 +413,7 @@ def _validate_target_data(self, target_data: pd.DataFrame) -> pd.DataFrame:
         c_ids_invalid_netzero_year = list(target_data[target_data['netzero_year'] > ProjectionControls.TARGET_YEAR].index)
         if c_ids_invalid_netzero_year:
             error_message = f"Invalid net-zero target years (>{ProjectionControls.TARGET_YEAR}) are entered for companies with ID: " \
-                            f"{c_ids_without_netzero_year}"
+                            f"{c_ids_invalid_netzero_year}"
             logger.error(error_message)
             raise ValueError(error_message)
         target_data.loc[target_data.netzero_year.isna(), 'netzero_year'] = ProjectionControls.TARGET_YEAR
@@ -473,13 +473,6 @@ def _company_df_to_model(self, df_fundamentals: pd.DataFrame,
                 else:
                     company_data[ColumnsConfig.TARGET_DATA] = None
 
-                if company_data[ColumnsConfig.PRODUCTION_METRIC]:
-                    company_data[ColumnsConfig.PRODUCTION_METRIC] = {
-                        'units': company_data[ColumnsConfig.PRODUCTION_METRIC]}
-                if company_data[ColumnsConfig.EMISSIONS_METRIC]:
-                    company_data[ColumnsConfig.EMISSIONS_METRIC] = {
-                        'units': company_data[ColumnsConfig.EMISSIONS_METRIC]}
-
                 # handling of missing market cap data is mainly done in _convert_from_template_company_data()
                 if company_data[ColumnsConfig.COMPANY_MARKET_CAP] is pd.NA:
                     company_data[ColumnsConfig.COMPANY_MARKET_CAP] = np.nan
@@ -547,7 +540,7 @@ def _convert_to_historic_emissions(self, emissions: pd.DataFrame) -> Optional[IH
             results = emissions.loc[emissions[ColumnsConfig.SCOPE] == scope]
             emissions_scopes[scope] = [] \
                 if results.empty \
-                else [IEmissionRealization(year=year, value=results[year].squeeze()) for year in self.historic_years]
+                else [IEmissionRealization(year=year, value=EmissionsQuantity(results[year].squeeze())) for year in self.historic_years]
         return IHistoricEmissionsScopes(**emissions_scopes)
 
     def _convert_to_historic_productions(self, productions: pd.DataFrame) -> Optional[List[IProductionRealization]]:
@@ -557,7 +550,7 @@ def _convert_to_historic_productions(self, productions: pd.DataFrame) -> Optiona
         """
         if productions.empty:
             return None
-        return [IProductionRealization(year=year, value=productions[year].squeeze()) for year in self.historic_years]
+        return [IProductionRealization(year=year, value=ProductionQuantity(productions[year].squeeze())) for year in self.historic_years]
 
     def _convert_to_historic_ei(self, intensities: pd.DataFrame) -> Optional[IHistoricEIScopes]:
         """
@@ -574,5 +567,5 @@ def _convert_to_historic_ei(self, intensities: pd.DataFrame) -> Optional[IHistor
             results = intensities.loc[intensities[ColumnsConfig.SCOPE] == scope]
             intensity_scopes[scope] = [] \
                 if results.empty \
-                else [IEIRealization(year=year, value=results[year].squeeze()) for year in self.historic_years]
+                else [IEIRealization(year=year, value=EI_Quantity(results[year].squeeze())) for year in self.historic_years]
         return IHistoricEIScopes(**intensity_scopes)