Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Template v2 (plus Pint/Pydantic cleanups) #159

Closed
Closed
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
14746a8
Initial commit of notebook to convert V1 data templates to V2.
MichaelTiemannOSC Oct 3, 2022
133ff59
V2 Template and uncertainties
MichaelTiemannOSC Oct 10, 2022
bd36f43
WIP can now read V2 data; now Pint+uncertainties work begins...
MichaelTiemannOSC Oct 10, 2022
1c5d7ec
WIP checkin (almost through to temp score)
MichaelTiemannOSC Oct 12, 2022
cc8b284
Virtually complete WIP checkin
MichaelTiemannOSC Oct 14, 2022
9e64c82
Fix long-standing problems from last year's Pint/Pydantic integration
MichaelTiemannOSC Oct 21, 2022
50b6f3a
This completes the first smoke-test of the ITR_UI tool working with u…
MichaelTiemannOSC Oct 23, 2022
40d245d
WIP test code. Not finished, but moving forward.
MichaelTiemannOSC Oct 23, 2022
c5ac785
Get excel.py working, fixing and harmonizing as we go...
MichaelTiemannOSC Oct 24, 2022
655f60d
Fix calculation and use of epsilon in umean function.
MichaelTiemannOSC Oct 24, 2022
c99b805
Remove print statement used for debugging.
MichaelTiemannOSC Oct 24, 2022
af7fa70
Remove unconditional breakpoint used for debuggin.
MichaelTiemannOSC Oct 24, 2022
780a015
Plot temperature scores with std_dev from uncertainties.
MichaelTiemannOSC Oct 25, 2022
01b316d
Support using GHG Protocol labels for S3 categories in template.
MichaelTiemannOSC Oct 25, 2022
e244331
Fixup code so it works without uncertainties package
MichaelTiemannOSC Oct 30, 2022
150a00f
co2budget.ipynb works with both TPI and OECM
MichaelTiemannOSC Nov 7, 2022
b267aa5
Fix minor breakage caused by making ITR work w/o uncertainties.
MichaelTiemannOSC Nov 17, 2022
d91ab20
Minor fixes so that ITR demos can work with vanilla Pint, Pint-Pandas…
MichaelTiemannOSC Nov 18, 2022
7cbdf76
Update requirements to use latest Pint, Pint-Pandas, Pandas, etc.
MichaelTiemannOSC Nov 18, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions ITR/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
This file defines the constants used throughout the different classes. In order to redefine these settings whilst using
the module, extend the respective config class and pass it to the class as the "constants" parameter.
"""
import logging
from __future__ import annotations

from .interfaces import TemperatureScoreControls
import logging

import pint
import pint_pandas
from ITR.data.osc_units import ureg, Q_
from typing import List

from ITR.interfaces import TemperatureScoreControls

class ColumnsConfig:
# Define a constant for each column used in the
Expand Down Expand Up @@ -148,6 +148,8 @@ class TabsConfig:
PROJECTED_TARGET = "projected_target"
HISTORIC_DATA = "historic_data"
TEMPLATE_INPUT_DATA = 'ITR input data'
TEMPLATE_INPUT_DATA_V2 = 'ITR V2 input data'
TEMPLATE_ESG_DATA_V2 = 'ITR V2 esg data'
TEMPLATE_TARGET_DATA = 'ITR target input data'


Expand All @@ -164,9 +166,9 @@ class TemperatureScoreConfig(PortfolioAggregationConfig):
target_end_year=2050,
projection_start_year=2010,
projection_end_year=2019,
tcre=Q_(2.2, ureg.delta_degC),
carbon_conversion=Q_(3664.0, ureg('Gt CO2')),
scenario_target_temperature=Q_(1.5, ureg.delta_degC)
tcre='2.2 delta_degC',
carbon_conversion=Q_(3664.0, 'Gt CO2'),
scenario_target_temperature='1.5 delta_degC'
)


Expand Down
40 changes: 36 additions & 4 deletions ITR/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,39 @@
"""
This module contains classes that create connections to data providers.
This module contains classes that create connections to data providers and initializes our system of units
"""

from .osc_units import ureg
from .data_providers import CompanyDataProvider
from .excel import ExcelProviderCompany
import pint
from pint import set_application_registry
from openscm_units import unit_registry
import re

import numpy as np
from uncertainties import ufloat

# openscm_units doesn't make it easy to set preprocessors. This is one way to do it.
unit_registry.preprocessors=[
lambda s1: re.sub(r'passenger.km', 'pkm', s1),
lambda s2: s2.replace('BoE', 'boe'),
kmarinushkin marked this conversation as resolved.
Show resolved Hide resolved
]

ureg = unit_registry
set_application_registry(ureg)

# Overwrite what pint/pint/__init__.py initalizes
# # Default Quantity, Unit and Measurement are the ones
# # build in the default registry.
# Quantity = UnitRegistry.Quantity
# Unit = UnitRegistry.Unit
# Measurement = UnitRegistry.Measurement
# Context = UnitRegistry.Context

pint.Quantity = ureg.Quantity
pint.Unit = ureg.Unit
pint.Measurement = ureg.Measurement
pint.Context = ureg.Context

# FIXME: delay loading of pint_pandas until after we've initialized ourselves
from pint_pandas import PintType
PintType.ureg = ureg

_ufloat_nan = ufloat(np.nan, 0.0)
241 changes: 154 additions & 87 deletions ITR/data/base_providers.py

Large diffs are not rendered by default.

11 changes: 7 additions & 4 deletions ITR/data/data_providers.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import List, Dict, Union
import pandas as pd

import numpy as np

from ITR.configs import TabsConfig, ColumnsConfig, VariablesConfig, TemperatureScoreConfig
from ITR.interfaces import ICompanyData, EScope, IHistoricData, IProductionRealization, IHistoricEmissionsScopes, \
IHistoricEIScopes, ICompanyEIProjection, ICompanyEIProjectionsScopes, ICompanyEIProjections

import pint
from pint import Quantity
from ITR.data.osc_units import ureg

from ITR.interfaces import ICompanyData, EScope, IHistoricData, IProductionRealization, IHistoricEmissionsScopes, \
IHistoricEIScopes, ICompanyEIProjection, ICompanyEIProjectionsScopes, ICompanyEIProjections, TemperatureScoreControls

from ITR.interfaces import ICompanyData

from ITR.configs import TabsConfig, ColumnsConfig, VariablesConfig, TemperatureScoreConfig

class CompanyDataProvider(ABC):
"""
Expand Down
51 changes: 37 additions & 14 deletions ITR/data/data_warehouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,19 @@
import logging
import pandas as pd
import numpy as np
from uncertainties import unumpy as unp

from abc import ABC
from typing import List, Type
from pydantic import ValidationError

from ITR.data.osc_units import ureg, Q_
from ITR.interfaces import IEmissionRealization, IEIRealization, ICompanyAggregates, ICompanyEIProjection
from ITR.data.data_providers import CompanyDataProvider, ProductionBenchmarkDataProvider, IntensityBenchmarkDataProvider
from ITR.configs import ColumnsConfig, TemperatureScoreConfig, LoggingConfig

import pint

logger = logging.getLogger(__name__)
LoggingConfig.add_config_to_logger(logger)

Expand Down Expand Up @@ -41,10 +46,10 @@ def __init__(self, company_data: CompanyDataProvider,
# After projections have been made, shift S3 data into S1S2. If we shift before we project,
# then S3 targets will not be projected correctly.
for c in self.company_data._companies:
if c.ghg_s3:
if c.ghg_s3 and not unp.isnan(c.ghg_s3.m):
# For Production-centric and energy-only data (except for Cement), convert all S3 numbers to S1 numbers
c.ghg_s1s2 = c.ghg_s1s2 + c.ghg_s3
c.ghg_s3 = 0
c.ghg_s3 = Q_(0.0, c.ghg_s3.u)
if c.historic_data:
if c.historic_data.emissions and c.historic_data.emissions.S3:
c.historic_data.emissions.S1S2 = list( map(IEmissionRealization.add, c.historic_data.emissions.S1S2, c.historic_data.emissions.S3) )
Expand Down Expand Up @@ -79,12 +84,29 @@ def get_preprocessed_company_data(self, company_ids: List[str]) -> List[ICompany
company_info_at_base_year).sort_index()

# trajectories are projected from historic data and we are careful to fill all gaps between historic and projections
# FIXME: we just computed ALL company data above into a dataframe. Why not use that?
projected_trajectories = self.company_data.get_company_projected_trajectories(company_ids)
df_trajectory = self._get_cumulative_emissions(
projected_ei=projected_trajectories,
projected_production=projected_production).rename(self.column_config.CUMULATIVE_TRAJECTORY)

def fix_ragged_projected_targets(x):
year = x.index[0]
x_val = x[year]
if unp.isnan(x_val.m):
historic_ei_dict = { d['year']:d['value'] for d in df_company_data.loc[x.name].historic_data['emissions_intensities']['S1S2']}
if not historic_ei_dict or year not in historic_ei_dict:
# We don't have a historic value, so leave as NaN
return x_val
return historic_ei_dict[year]
else:
return x_val

projected_targets = self.company_data.get_company_projected_targets(company_ids)
# Fill in ragged left edge of projected_targets with historic data, interpolating where we need to
projected_targets[projected_targets.columns[0]] = (
projected_targets[[projected_targets.columns[0]]].apply(fix_ragged_projected_targets, axis=1)
)
df_target = self._get_cumulative_emissions(
projected_ei=projected_targets,
projected_production=projected_production).rename(self.column_config.CUMULATIVE_TARGET)
Expand All @@ -96,15 +118,8 @@ def get_preprocessed_company_data(self, company_ids: List[str]) -> List[ICompany
pd.Series([self.benchmarks_projected_ei.benchmark_global_budget] * len(df_company_data),
dtype='pint[Gt CO2]',
index=df_company_data.index)
df_company_data[self.column_config.BENCHMARK_TEMP] = \
pd.Series([self.benchmarks_projected_ei.benchmark_temperature] * len(df_company_data),
dtype='pint[delta_degC]',
index=df_company_data.index)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# See https://github.com/hgrecco/pint-pandas/issues/114
for col in [self.column_config.CUMULATIVE_TRAJECTORY, self.column_config.CUMULATIVE_TARGET, self.column_config.CUMULATIVE_BUDGET]:
df_company_data[col] = df_company_data[col].apply(lambda x: str(x))
# ICompanyAggregates wants this Quantity as a `str`
df_company_data[self.column_config.BENCHMARK_TEMP] = [str(self.benchmarks_projected_ei.benchmark_temperature)] * len(df_company_data)
companies = df_company_data.to_dict(orient="records")
aggregate_company_data = [ICompanyAggregates.parse_obj(company) for company in companies]
return aggregate_company_data
Expand Down Expand Up @@ -138,7 +153,15 @@ def _get_cumulative_emissions(self, projected_ei: pd.DataFrame, projected_produc
:return: cumulative emissions based on weighted sum of emissions intensity * production
"""
projected_emissions = projected_ei.multiply(projected_production)
projected_emissions = projected_emissions.applymap(lambda x: x if isinstance(x,float) else x if np.isfinite(x.m) else np.nan)
null_idx = projected_emissions.index[projected_emissions.isnull().all(axis=1)]
return pd.concat([projected_emissions.loc[null_idx, projected_emissions.columns[0]],
return projected_emissions.sum(axis=1).astype('pint[Mt CO2]')

# The following code is broken, due to the way unp.isnan straps away Quantity from scalars
# It was written to rescue data from automotive, but maybe not needed anymore?
nan_emissions = projected_emissions.applymap(lambda x: np.nan if unp.isnan(x) else x)
if nan_emissions.isnull().any(axis=0).any():
breakpoint()
null_idx = nan_emissions.index[nan_emissions.isnull().all(axis=1)]
# FIXME: this replaces the quantified NaNs in projected_emissions with straight-up NaNs,
# while also converting the remaining emissions to a consistent unit of 'Mt CO2'
return pd.concat([nan_emissions.loc[null_idx, nan_emissions.columns[0]],
projected_emissions.loc[projected_emissions.index.difference(null_idx)].sum(axis=1)]).astype('pint[Mt CO2]')
Loading