-
Notifications
You must be signed in to change notification settings - Fork 9
Support scenario s3 only #158
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e52d393
73bf593
1cb2711
7f1c2c8
7e4f329
0adc9d0
b05d2a8
274bc0c
d083989
a9a24d7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,30 +47,30 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark, scope: EScope) -> | |
return pd.Series({r.year: r.value for r in benchmark.projections}, name=(benchmark.region, benchmark.sector, scope), | ||
dtype=f'pint[{benchmark.benchmark_metric.units}]') | ||
|
||
# Production benchmarks are dimensionless. S1S2 has nothing to do with any company data. | ||
# It's a label in the top-level of benchmark data. Currently S1S2 is the only label with any data. | ||
# Production benchmarks are dimensionless, relevant for AnyScope | ||
def _get_projected_production(self, scope: EScope = EScope.S1S2) -> pd.DataFrame: | ||
""" | ||
Converts IProductionBenchmarkScopes into dataframe for a scope | ||
:param scope: a scope | ||
:return: pd.DataFrame | ||
""" | ||
result = [] | ||
for bm in self._productions_benchmarks.dict()[str(scope)]['benchmarks']: | ||
for bm in self._productions_benchmarks.dict()['AnyScope']['benchmarks']: | ||
result.append(self._convert_benchmark_to_series(IBenchmark.parse_obj(bm), scope)) | ||
df_bm = pd.DataFrame(result) | ||
df_bm.index.names = [self.column_config.REGION, self.column_config.SECTOR, self.column_config.SCOPE] | ||
|
||
return df_bm | ||
|
||
def get_company_projected_production(self, company_sector_region_info: pd.DataFrame) -> pd.DataFrame: | ||
def get_company_projected_production(self, company_sector_region_info: pd.DataFrame, scope: EScope) -> pd.DataFrame: | ||
""" | ||
get the projected productions for list of companies in ghg_scope12 | ||
:param company_sector_region_info: DataFrame with at least the following columns : | ||
ColumnsConfig.COMPANY_ID, ColumnsConfig.GHG_SCOPE12, ColumnsConfig.SECTOR and ColumnsConfig.REGION | ||
:param scope: benchmark scope for projections | ||
:return: DataFrame of projected productions for [base_year - base_year + 50] | ||
""" | ||
benchmark_production_projections = self.get_benchmark_projections(company_sector_region_info, scope=EScope.S1S2) | ||
benchmark_production_projections = self.get_benchmark_projections(company_sector_region_info, scope) | ||
company_production = company_sector_region_info[self.column_config.BASE_YEAR_PRODUCTION] | ||
return benchmark_production_projections.add(1).cumprod(axis=1).mul( | ||
company_production, axis=0) | ||
|
@@ -88,7 +88,7 @@ def get_benchmark_projections(self, company_sector_region_info: pd.DataFrame, | |
benchmark_projection = self._get_projected_production(scope) # TODO optimize performance | ||
sectors = company_sector_region_info[self.column_config.SECTOR] | ||
regions = company_sector_region_info[self.column_config.REGION] | ||
scopes = [EScope.S1S2] * len(sectors) | ||
scopes = [scope] * len(sectors) | ||
benchmark_regions = regions.copy() | ||
mask = benchmark_regions.isin(benchmark_projection.reset_index()[self.column_config.REGION]) | ||
benchmark_regions.loc[~mask] = "Global" | ||
|
@@ -107,6 +107,12 @@ def __init__(self, EI_benchmarks: IEIBenchmarkScopes, | |
self._EI_benchmarks = EI_benchmarks | ||
self.temp_config = tempscore_config | ||
self.column_config = column_config | ||
# Benchmark's scope, for which we calculate | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The changes I made in template-v2-scopes change the paradigm from "what is scope you are trying to calculate, I'll give it to you" to "here are all the scope calculations relevant to the benchmark". In this way, the ITR code doesn't have to guess how the benchmark should be interpreted, it just gives all the answers and the caller decides how to aggregate the scopes. In this bit of code there's an internal logic prioritizing scopes S1+S2 over S3 if the first case is available. But that logic may not be correct for all cases (or cases where S1+S2+S3 should be computed). Best to let the caller aggregate (as the benchmark intends). |
||
self.scope_to_calc = None | ||
if self._EI_benchmarks.S1S2 != None: | ||
self.scope_to_calc = EScope.S1S2 | ||
elif self._EI_benchmarks.S3 != None: | ||
self.scope_to_calc = EScope.S3 | ||
|
||
def get_SDA_intensity_benchmarks(self, company_info_at_base_year: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
|
@@ -116,7 +122,8 @@ def get_SDA_intensity_benchmarks(self, company_info_at_base_year: pd.DataFrame) | |
ColumnsConfig.COMPANY_ID, ColumnsConfig.BASE_EI, ColumnsConfig.SECTOR and ColumnsConfig.REGION | ||
:return: A DataFrame with company and SDA intensity benchmarks per calendar year per row | ||
""" | ||
intensity_benchmarks = self._get_intensity_benchmarks(company_info_at_base_year) | ||
intensity_benchmarks = self._get_intensity_benchmarks(company_info_at_base_year, | ||
self.scope_to_calc) | ||
decarbonization_paths = self._get_decarbonizations_paths(intensity_benchmarks) | ||
last_ei = intensity_benchmarks[self.temp_config.CONTROLS_CONFIG.target_end_year] | ||
ei_base = intensity_benchmarks[self.temp_config.CONTROLS_CONFIG.base_year] | ||
|
@@ -162,7 +169,10 @@ def _get_projected_intensities(self, scope: EScope = EScope.S1S2) -> pd.DataFram | |
:return: pd.DataFrame | ||
""" | ||
results = [] | ||
for bm in self._EI_benchmarks.__getattribute__(str(scope)).benchmarks: | ||
scope_attr = self._EI_benchmarks.__getattribute__(str(scope)) | ||
if not scope_attr: | ||
raise ValueError(f"Scope {str(scope)} not found in loaded benchmark") | ||
for bm in scope_attr.benchmarks: | ||
results.append(self._convert_benchmark_to_series(bm, scope)) | ||
with warnings.catch_warnings(): | ||
# pd.DataFrame.__init__ (in pandas/core/frame.py) ignores the beautiful dtype information adorning the pd.Series list elements we are providing. Sad! | ||
|
@@ -187,6 +197,14 @@ def _get_intensity_benchmarks(self, company_sector_region_info: pd.DataFrame, | |
mask = regions.isin(benchmark_projection.reset_index()[self.column_config.REGION]) | ||
regions.loc[~mask] = "Global" | ||
|
||
# TODO: Remove this w/a, after associated benchmarks are clarified | ||
# Currently, EI benchmark for S3 not available for certain sectors | ||
# Temporal solution: remove such sectors from calculation | ||
if scope == EScope.S3: | ||
filtered_sector = 'Construction Buildings' | ||
filtered_df = company_sector_region_info.loc[company_sector_region_info[self.column_config.SECTOR] != filtered_sector] | ||
sectors = filtered_df[self.column_config.SECTOR] | ||
|
||
# benchmark_projection has a scope by construction | ||
benchmark_projection = benchmark_projection.loc[list(zip(regions, sectors, [scope] * len(sectors)))] | ||
benchmark_projection.index = sectors.index | ||
|
@@ -269,12 +287,13 @@ def _convert_projections_to_series(self, company: ICompanyData, feature: str, | |
{p['year']: p['value'] for p in projections}, | ||
name=company.company_id, dtype=f'pint[{emissions_units}/({production_units})]') | ||
|
||
def _calculate_target_projections(self, production_bm: BaseProviderProductionBenchmark): | ||
def _calculate_target_projections(self, production_bm: BaseProviderProductionBenchmark, scope: EScope): | ||
""" | ||
We cannot calculate target projections until after we have loaded benchmark data. | ||
We do so when companies are associated with benchmarks, in the DataWarehouse construction | ||
|
||
:param production_bm: A Production Benchmark (multi-sector, single-scope, 2020-2050) | ||
:param scope: scope to calculate | ||
""" | ||
for c in self._companies: | ||
if c.projected_targets is not None: | ||
|
@@ -296,7 +315,7 @@ def _calculate_target_projections(self, production_bm: BaseProviderProductionBen | |
self.column_config.REGION: [c.region], | ||
}, index=[0]) | ||
bm_production_data = ( | ||
production_bm.get_company_projected_production(company_sector_region_info) | ||
production_bm.get_company_projected_production(company_sector_region_info, scope) | ||
kmarinushkin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# We transpose the data so that we get a pd.Series that will accept the pint units as a whole (not element-by-element) | ||
.iloc[0].T | ||
.astype(f'pint[{str(base_year_production.units)}]') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto here: scope projections for production make no sense.
A long time ago, the code had significant confusion around naming where things that were emissions were labeled as production, and vice versa. I fixed most of that, but believe this is a vestigial case. Emissions and Emissions intensity have scopes. Production, by itself, has no scope.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, agree. We will track this discussion in a different comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have looked for, but not found, where we finalized the discussion about using a scope parameter for get_company_projected_production. In any case, unless you have a really great example of where production output is somehow scope-dependent, I'm going to revert all these. Note that in your test case (test_base_providers.py:test_get_projected_production where you pass EScope.S1S2 to one calculation and EScope.S3 to a second calculation), both result in the same answer (which you also remark in the comments). Because there will never be any difference, it's just much cleaner and simpler to remove that parameter from that functional chain, which I will do in my subsequent merge.