Skip to content

Commit

Permalink
Virtually complete WIP checkin
Browse files Browse the repository at this point in the history
With this code we successfully complete most of the V2 test case (there are numbers that have to be recalculated due to uncertainties math).  This also fixes fundamental wrong assumptions/coding from MichaelTiemannOSC's first attempt at using Pydantic and Pint together.  Finally!

Signed-off-by: [email protected]
  • Loading branch information
MichaelTiemannOSC committed Oct 14, 2022
1 parent 867f327 commit b8f8915
Show file tree
Hide file tree
Showing 12 changed files with 896 additions and 1,501 deletions.
8 changes: 4 additions & 4 deletions ITR/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ITR.data.osc_units import ureg, Q_
from typing import List

from .interfaces import TemperatureScoreControls
from ITR.interfaces import TemperatureScoreControls

class ColumnsConfig:
# Define a constant for each column used in the
Expand Down Expand Up @@ -166,9 +166,9 @@ class TemperatureScoreConfig(PortfolioAggregationConfig):
target_end_year=2050,
projection_start_year=2010,
projection_end_year=2019,
tcre=Q_(2.2, 'delta_degC'),
carbon_conversion=Q_(3664.0, 'Gt CO2'),
scenario_target_temperature=Q_(1.5, 'delta_degC')
tcre='2.2 delta_degC',
carbon_conversion=Q_('3664.0 Gt CO2'),
scenario_target_temperature='1.5 delta_degC'
)


Expand Down
4 changes: 2 additions & 2 deletions ITR/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

# openscm_units doesn't make it easy to set preprocessors. This is one way to do it.
unit_registry.preprocessors=[
lambda s1: re.sub(r'passenger.km', 'pkm', s1),
lambda s2: s2.replace('BoE', 'boe'),
lambda s1: re.sub(r'passenger.km', 'pkm', s1),
lambda s2: s2.replace('BoE', 'boe'),
]

ureg = unit_registry
Expand Down
32 changes: 18 additions & 14 deletions ITR/data/base_providers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import warnings # needed until quantile behaves better with Pint quantities in arrays
import numpy as np
import pandas as pd
# import uncertainties
# from uncertainties import ufloat
# from uncertainties.core import Variable as utype
import uncertainties
from uncertainties import unumpy as unp
import pint

Expand All @@ -19,7 +19,8 @@
from ITR.interfaces import ICompanyData, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes, \
IBenchmark, IProjection, ICompanyEIProjections, ICompanyEIProjectionsScopes, IHistoricEIScopes, \
IHistoricEmissionsScopes, IProductionRealization, ITargetData, IHistoricData, ICompanyEIProjection, \
IEmissionRealization, IntensityMetric, ProjectionControls
IEmissionRealization, ProjectionControls
from ITR.interfaces import EI_Quantity

# TODO handling of scopes in benchmarks

Expand Down Expand Up @@ -51,7 +52,7 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark, scope: EScope) ->
:return: pd.Series
"""
return pd.Series({r.year: r.value for r in benchmark.projections}, name=(benchmark.region, benchmark.sector, scope),
dtype=f'pint[{benchmark.benchmark_metric.units}]')
dtype=f'pint[{str(benchmark.benchmark_metric)}]')

# Production benchmarks are dimensionless. S1S2 has nothing to do with any company data.
# It's a label in the top-level of benchmark data. Currently S1S2 is the only label with any data.
Expand Down Expand Up @@ -158,7 +159,7 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark, scope: EScope) ->
:return: pd.Series
"""
s = pd.Series({p.year: p.value for p in benchmark.projections}, name=(benchmark.region, benchmark.sector, scope),
dtype=f'pint[{benchmark.benchmark_metric.units}]')
dtype=f'pint[{str(benchmark.benchmark_metric)}]')
return s

def _get_projected_intensities(self, scope: EScope = EScope.S1S2) -> pd.DataFrame:
Expand Down Expand Up @@ -248,11 +249,15 @@ def _convert_projections_to_series(self, company: ICompanyData, feature: str,
:return: pd.Series
"""
company_dict = company.dict()
# FIXME--this should be fixed with latest ProductionMetric stuff!
try:
production_units = company_dict[self.column_config.PRODUCTION_METRIC]['units']
except TypeError:
breakpoint()
emissions_units = company_dict[self.column_config.EMISSIONS_METRIC]['units']
production_units = str(company_dict[self.column_config.PRODUCTION_METRIC])
try:
emissions_units = company_dict[self.column_config.EMISSIONS_METRIC]['units']
except TypeError:
emissions_units = str(company_dict[self.column_config.EMISSIONS_METRIC])
if company_dict[feature][scope.name]:
projections = company_dict[feature][scope.name]['projections']
else:
Expand Down Expand Up @@ -298,7 +303,7 @@ def _calculate_target_projections(self, production_bm: BaseProviderProductionBen
warnings.simplefilter("ignore")
company_sector_region_info = pd.DataFrame({
self.column_config.COMPANY_ID: [c.company_id],
self.column_config.BASE_YEAR_PRODUCTION: [base_year_production.to(c.production_metric.units)],
self.column_config.BASE_YEAR_PRODUCTION: [base_year_production.to(str(c.production_metric))],
self.column_config.GHG_SCOPE12: [c.ghg_s1s2],
self.column_config.GHG_SCOPE3: [c.ghg_s3],
self.column_config.SECTOR: [c.sector],
Expand All @@ -308,7 +313,7 @@ def _calculate_target_projections(self, production_bm: BaseProviderProductionBen
production_bm.get_company_projected_production(company_sector_region_info)
# We transpose the data so that we get a pd.Series that will accept the pint units as a whole (not element-by-element)
.iloc[0].T
.astype(f'pint[{str(base_year_production.units)}]')
.astype(f'pint[{str(base_year_production.u)}]')
)
c.projected_targets = EITargetProjector().project_ei_targets(c, bm_production_data)

Expand Down Expand Up @@ -537,20 +542,20 @@ def _add_projections_to_companies(self, companies: List[ICompanyData], extrapola
scope_dfs[scope] = results.astype(f"pint[{units}]")
projections = [IProjection(year=year, value=value) for year, value in results.items()
if year in range(self.projection_controls.BASE_YEAR, self.projection_controls.TARGET_YEAR+1)]
scope_projections[scope] = ICompanyEIProjections(ei_metric={'units': units}, projections=projections)
scope_projections[scope] = ICompanyEIProjections(ei_metric=units, projections=projections)
if scope_projections['S1'] and scope_projections['S2'] and not scope_projections['S1S2']:
results = scope_dfs['S1'] + scope_dfs['S2']
units = f"{results.values[0].u:~P}"
projections = [IProjection(year=year, value=value) for year, value in results.items()
if year in range(self.projection_controls.BASE_YEAR, self.projection_controls.TARGET_YEAR+1)]
scope_projections['S1S2'] = ICompanyEIProjections(ei_metric={'units': units}, projections=projections)
scope_projections['S1S2'] = ICompanyEIProjections(ei_metric=units, projections=projections)
# FIXME: do we really need to do this? We're going to migrate S3 to S1S2 and ignore S1S2S3...
if scope_projections['S1S2'] and scope_projections['S3'] and not scope_projections['S1S2S3']:
results = scope_dfs['S1S2'] + scope_dfs['S3']
units = f"{results.values[0].u:~P}"
projections = [IProjection(year=year, value=value) for year, value in results.items()
if year in range(self.projection_controls.BASE_YEAR, self.projection_controls.TARGET_YEAR+1)]
scope_projections['S1S2S3'] = ICompanyEIProjections(ei_metric={'units': units}, projections=projections)
scope_projections['S1S2S3'] = ICompanyEIProjections(ei_metric=units, projections=projections)
company.projected_intensities = ICompanyEIProjectionsScopes(**scope_projections)

def _standardize(self, intensities: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -766,7 +771,7 @@ def project_ei_targets(self, company: ICompanyData, production_bm: pd.Series) ->
ei_projection_scopes[scope].projections.extend(ei_projections)
else:
ei_projection_scopes[scope] = ICompanyEIProjections(projections=ei_projections,
ei_metric=IntensityMetric.parse_obj({'units': target.target_base_year_unit}))
ei_metric=EI_Quantity(target.target_base_year_unit))
elif target.target_type == "absolute":
# Complicated case, the target must be switched from absolute value to intensity.
# We use the benchmark production data
Expand Down Expand Up @@ -821,8 +826,7 @@ def project_ei_targets(self, company: ICompanyData, production_bm: pd.Series) ->
ei_projection_scopes[scope].projections.extend(ei_projections)
else:
ei_projection_scopes[scope] = ICompanyEIProjections(projections=ei_projections,
ei_metric=IntensityMetric.parse_obj(
{'units': f"{target_value.u:~P}"}))
ei_metric=EI_Quantity (f"{target_value.u:~P}"))
else:
# No target (type) specified
ei_projection_scopes[scope] = None
Expand Down
15 changes: 5 additions & 10 deletions ITR/data/data_warehouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import logging
import pandas as pd
import numpy as np
from uncertainties import unumpy as unp

from abc import ABC
from typing import List, Type
from pydantic import ValidationError
Expand Down Expand Up @@ -96,15 +98,8 @@ def get_preprocessed_company_data(self, company_ids: List[str]) -> List[ICompany
pd.Series([self.benchmarks_projected_ei.benchmark_global_budget] * len(df_company_data),
dtype='pint[Gt CO2]',
index=df_company_data.index)
df_company_data[self.column_config.BENCHMARK_TEMP] = \
pd.Series([self.benchmarks_projected_ei.benchmark_temperature] * len(df_company_data),
dtype='pint[delta_degC]',
index=df_company_data.index)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# See https://github.com/hgrecco/pint-pandas/issues/114
for col in [self.column_config.CUMULATIVE_TRAJECTORY, self.column_config.CUMULATIVE_TARGET, self.column_config.CUMULATIVE_BUDGET]:
df_company_data[col] = df_company_data[col].apply(lambda x: str(x))
# ICompanyAggregates wants this Quantity as a `str`
df_company_data[self.column_config.BENCHMARK_TEMP] = [str(self.benchmarks_projected_ei.benchmark_temperature)] * len(df_company_data)
companies = df_company_data.to_dict(orient="records")
aggregate_company_data = [ICompanyAggregates.parse_obj(company) for company in companies]
return aggregate_company_data
Expand Down Expand Up @@ -138,7 +133,7 @@ def _get_cumulative_emissions(self, projected_ei: pd.DataFrame, projected_produc
:return: cumulative emissions based on weighted sum of emissions intensity * production
"""
projected_emissions = projected_ei.multiply(projected_production)
projected_emissions = projected_emissions.applymap(lambda x: x if isinstance(x,float) else x if np.isfinite(x.m) else np.nan)
projected_emissions = projected_emissions.applymap(lambda x: np.nan if unp.isnan(x) else x)
null_idx = projected_emissions.index[projected_emissions.isnull().all(axis=1)]
return pd.concat([projected_emissions.loc[null_idx, projected_emissions.columns[0]],
projected_emissions.loc[projected_emissions.index.difference(null_idx)].sum(axis=1)]).astype('pint[Mt CO2]')
17 changes: 5 additions & 12 deletions ITR/data/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from ITR.data.base_providers import BaseCompanyDataProvider
from ITR.configs import ColumnsConfig, TemperatureScoreConfig, VariablesConfig, TabsConfig, SectorsConfig, LoggingConfig
from ITR.interfaces import ICompanyData, EScope, \
from ITR.interfaces import ICompanyData, EScope, ProductionQuantity, EmissionsQuantity, EI_Quantity, \
IHistoricEmissionsScopes, \
IProductionRealization, IHistoricEIScopes, IHistoricData, ITargetData, IEmissionRealization, IEIRealization, \
IProjection, ProjectionControls
Expand Down Expand Up @@ -413,7 +413,7 @@ def _validate_target_data(self, target_data: pd.DataFrame) -> pd.DataFrame:
c_ids_invalid_netzero_year = list(target_data[target_data['netzero_year'] > ProjectionControls.TARGET_YEAR].index)
if c_ids_invalid_netzero_year:
error_message = f"Invalid net-zero target years (>{ProjectionControls.TARGET_YEAR}) are entered for companies with ID: " \
f"{c_ids_without_netzero_year}"
f"{c_ids_invalid_netzero_year}"
logger.error(error_message)
raise ValueError(error_message)
target_data.loc[target_data.netzero_year.isna(), 'netzero_year'] = ProjectionControls.TARGET_YEAR
Expand Down Expand Up @@ -473,13 +473,6 @@ def _company_df_to_model(self, df_fundamentals: pd.DataFrame,
else:
company_data[ColumnsConfig.TARGET_DATA] = None

if company_data[ColumnsConfig.PRODUCTION_METRIC]:
company_data[ColumnsConfig.PRODUCTION_METRIC] = {
'units': company_data[ColumnsConfig.PRODUCTION_METRIC]}
if company_data[ColumnsConfig.EMISSIONS_METRIC]:
company_data[ColumnsConfig.EMISSIONS_METRIC] = {
'units': company_data[ColumnsConfig.EMISSIONS_METRIC]}

# handling of missing market cap data is mainly done in _convert_from_template_company_data()
if company_data[ColumnsConfig.COMPANY_MARKET_CAP] is pd.NA:
company_data[ColumnsConfig.COMPANY_MARKET_CAP] = np.nan
Expand Down Expand Up @@ -547,7 +540,7 @@ def _convert_to_historic_emissions(self, emissions: pd.DataFrame) -> Optional[IH
results = emissions.loc[emissions[ColumnsConfig.SCOPE] == scope]
emissions_scopes[scope] = [] \
if results.empty \
else [IEmissionRealization(year=year, value=results[year].squeeze()) for year in self.historic_years]
else [IEmissionRealization(year=year, value=EmissionsQuantity(results[year].squeeze())) for year in self.historic_years]
return IHistoricEmissionsScopes(**emissions_scopes)

def _convert_to_historic_productions(self, productions: pd.DataFrame) -> Optional[List[IProductionRealization]]:
Expand All @@ -557,7 +550,7 @@ def _convert_to_historic_productions(self, productions: pd.DataFrame) -> Optiona
"""
if productions.empty:
return None
return [IProductionRealization(year=year, value=productions[year].squeeze()) for year in self.historic_years]
return [IProductionRealization(year=year, value=ProductionQuantity(productions[year].squeeze())) for year in self.historic_years]

def _convert_to_historic_ei(self, intensities: pd.DataFrame) -> Optional[IHistoricEIScopes]:
"""
Expand All @@ -574,5 +567,5 @@ def _convert_to_historic_ei(self, intensities: pd.DataFrame) -> Optional[IHistor
results = intensities.loc[intensities[ColumnsConfig.SCOPE] == scope]
intensity_scopes[scope] = [] \
if results.empty \
else [IEIRealization(year=year, value=results[year].squeeze()) for year in self.historic_years]
else [IEIRealization(year=year, value=EI_Quantity(results[year].squeeze())) for year in self.historic_years]
return IHistoricEIScopes(**intensity_scopes)
Loading

0 comments on commit b8f8915

Please sign in to comment.