diff --git a/config/construction_schema.toml b/config/construction_schema.toml index 81c2f39ec..3ec870dad 100644 --- a/config/construction_schema.toml +++ b/config/construction_schema.toml @@ -1554,16 +1554,6 @@ Possible_Categorical_Values = [ "nan",] q_code = "" old_ref = "nan" -[postcodes_harmonised] -Description = "nan" -Deduced_Data_Type = "str" -Nullable = false -Current_Data_Type = "str" -Length = "nan" -Min_values = "nan" -Max_values = "nan" -Possible_categorical_Values = ["nan"] - [force_imputation] Description = "nan" Deduced_Data_Type = "boolean" diff --git a/src/_version.py b/src/_version.py index 22049ab2c..63af88769 100644 --- a/src/_version.py +++ b/src/_version.py @@ -1 +1 @@ -__version__ = "0.6.2" +__version__ = "0.6.3" diff --git a/src/construction/construction.py b/src/construction/construction.py index 126e04a34..603182807 100644 --- a/src/construction/construction.py +++ b/src/construction/construction.py @@ -85,14 +85,24 @@ def run_construction( # Run GB specific actions if not is_northern_ireland: - # Prepare the short to long form constructions (N/A to NI) - updated_snapshot_df = prepare_short_to_long(updated_snapshot_df, construction_df) + # Prepare the short to long form constructions, if any (N/A to NI) + if "short_to_long" in construction_df.columns: + updated_snapshot_df = prepare_short_to_long( + updated_snapshot_df, construction_df + ) # Create period_year column (NI already has it) updated_snapshot_df = create_period_year(updated_snapshot_df) construction_df = create_period_year(construction_df) # Set instance=1 so longforms with status 'Form sent out' match correctly - form_sent_condition = (updated_snapshot_df.formtype == "0001") & (updated_snapshot_df.status == "Form sent out") + form_sent_condition = (updated_snapshot_df.formtype == "0001") & ( + updated_snapshot_df.status == "Form sent out" + ) updated_snapshot_df.loc[form_sent_condition, "instance"] = 1 + # Set instance=0 so shortforms with status 'Form sent out' match correctly + form_sent_condition = (updated_snapshot_df.formtype == "0006") & ( + updated_snapshot_df.status == "Form sent out" + ) + updated_snapshot_df.loc[form_sent_condition, "instance"] = 0 # NI data has no instance but needs an instance of 1 if is_northern_ireland: @@ -125,18 +135,30 @@ def run_construction( # Run GB specific actions if not is_northern_ireland: # Long form records with a postcode in 601 use this as the postcode - long_form_cond = (~updated_snapshot_df["601"].isnull()) - updated_snapshot_df.loc[long_form_cond, "postcodes_harmonised"] = updated_snapshot_df["601"] + long_form_cond = ~updated_snapshot_df["601"].isnull() + updated_snapshot_df.loc[ + long_form_cond, "postcodes_harmonised" + ] = updated_snapshot_df["601"] # Short form records with nothing in 601 use referencepostcode instead - short_form_cond = (updated_snapshot_df["601"].isnull()) & (~updated_snapshot_df["referencepostcode"].isnull()) - updated_snapshot_df.loc[short_form_cond, "postcodes_harmonised"] = updated_snapshot_df["referencepostcode"] + short_form_cond = (updated_snapshot_df["601"].isnull()) & ( + ~updated_snapshot_df["referencepostcode"].isnull() + ) + updated_snapshot_df.loc[ + short_form_cond, "postcodes_harmonised" + ] = updated_snapshot_df["referencepostcode"] # Top up all new postcodes so they're all eight characters exactly postcode_cols = ["601", "referencepostcode", "postcodes_harmonised"] for col in postcode_cols: updated_snapshot_df[col] = updated_snapshot_df[col].apply(postcode_topup) + # Reset shortforms with status 'Form sent out' to instance=None + form_sent_condition = (updated_snapshot_df.formtype == "0006") & ( + updated_snapshot_df.status == "Form sent out" + ) + updated_snapshot_df.loc[form_sent_condition, "instance"] = None + updated_snapshot_df = updated_snapshot_df.sort_values( ["reference", "instance"], ascending=[True, True] ).reset_index(drop=True) @@ -150,8 +172,8 @@ def prepare_short_to_long(updated_snapshot_df, construction_df): """Create addional instances for short to long construction""" # Check which references are going to converted to long forms short_to_long_refs = construction_df.loc[ - construction_df["short_to_long"] == True,"reference" - ].unique() + construction_df["short_to_long"] == True, "reference" + ].unique() # Create conversion df short_to_long_df = updated_snapshot_df[ updated_snapshot_df["reference"].isin(short_to_long_refs) diff --git a/src/developer_config.yaml b/src/developer_config.yaml index 504674d71..e8385797b 100644 --- a/src/developer_config.yaml +++ b/src/developer_config.yaml @@ -17,7 +17,7 @@ global: load_manual_imputation: False load_backdata: True # whether to load previous year data for MoR load_reference_list: True # the reference list corrections should always be loaded in year 1 - load_from_feather: True + load_from_feather: False # Apportionment to sites settings apportion_sites: True # Output settings diff --git a/src/estimation/estimation_main.py b/src/estimation/estimation_main.py index 323dbd026..a58728e79 100644 --- a/src/estimation/estimation_main.py +++ b/src/estimation/estimation_main.py @@ -1,11 +1,11 @@ """Main file for the estimation module.""" import logging -import pandas as pd from datetime import datetime -from typing import Callable, Dict, Any +from typing import Any, Callable, Dict -from src.estimation import calculate_weights as weights +import pandas as pd from src.estimation import apply_weights as appweights +from src.estimation import calculate_weights as weights from src.estimation import cellno_mapper as cmap EstMainLogger = logging.getLogger(__name__)