Skip to content

Commit

Permalink
Merge pull request #186 from ONSdigital/RDRP-667
Browse files Browse the repository at this point in the history
RDRP-667: Construction bug fixes
  • Loading branch information
zorge69 authored Jan 18, 2024
2 parents 5173e6f + 2744f1d commit 2ae8dcc
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 24 deletions.
10 changes: 0 additions & 10 deletions config/construction_schema.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1554,16 +1554,6 @@ Possible_Categorical_Values = [ "nan",]
q_code = ""
old_ref = "nan"

[postcodes_harmonised]
Description = "nan"
Deduced_Data_Type = "str"
Nullable = false
Current_Data_Type = "str"
Length = "nan"
Min_values = "nan"
Max_values = "nan"
Possible_categorical_Values = ["nan"]

[force_imputation]
Description = "nan"
Deduced_Data_Type = "boolean"
Expand Down
2 changes: 1 addition & 1 deletion src/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.2"
__version__ = "0.6.3"
40 changes: 31 additions & 9 deletions src/construction/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,24 @@ def run_construction(

# Run GB specific actions
if not is_northern_ireland:
# Prepare the short to long form constructions (N/A to NI)
updated_snapshot_df = prepare_short_to_long(updated_snapshot_df, construction_df)
# Prepare the short to long form constructions, if any (N/A to NI)
if "short_to_long" in construction_df.columns:
updated_snapshot_df = prepare_short_to_long(
updated_snapshot_df, construction_df
)
# Create period_year column (NI already has it)
updated_snapshot_df = create_period_year(updated_snapshot_df)
construction_df = create_period_year(construction_df)
# Set instance=1 so longforms with status 'Form sent out' match correctly
form_sent_condition = (updated_snapshot_df.formtype == "0001") & (updated_snapshot_df.status == "Form sent out")
form_sent_condition = (updated_snapshot_df.formtype == "0001") & (
updated_snapshot_df.status == "Form sent out"
)
updated_snapshot_df.loc[form_sent_condition, "instance"] = 1
# Set instance=0 so shortforms with status 'Form sent out' match correctly
form_sent_condition = (updated_snapshot_df.formtype == "0006") & (
updated_snapshot_df.status == "Form sent out"
)
updated_snapshot_df.loc[form_sent_condition, "instance"] = 0

# NI data has no instance but needs an instance of 1
if is_northern_ireland:
Expand Down Expand Up @@ -125,18 +135,30 @@ def run_construction(
# Run GB specific actions
if not is_northern_ireland:
# Long form records with a postcode in 601 use this as the postcode
long_form_cond = (~updated_snapshot_df["601"].isnull())
updated_snapshot_df.loc[long_form_cond, "postcodes_harmonised"] = updated_snapshot_df["601"]
long_form_cond = ~updated_snapshot_df["601"].isnull()
updated_snapshot_df.loc[
long_form_cond, "postcodes_harmonised"
] = updated_snapshot_df["601"]

# Short form records with nothing in 601 use referencepostcode instead
short_form_cond = (updated_snapshot_df["601"].isnull()) & (~updated_snapshot_df["referencepostcode"].isnull())
updated_snapshot_df.loc[short_form_cond, "postcodes_harmonised"] = updated_snapshot_df["referencepostcode"]
short_form_cond = (updated_snapshot_df["601"].isnull()) & (
~updated_snapshot_df["referencepostcode"].isnull()
)
updated_snapshot_df.loc[
short_form_cond, "postcodes_harmonised"
] = updated_snapshot_df["referencepostcode"]

# Top up all new postcodes so they're all eight characters exactly
postcode_cols = ["601", "referencepostcode", "postcodes_harmonised"]
for col in postcode_cols:
updated_snapshot_df[col] = updated_snapshot_df[col].apply(postcode_topup)

# Reset shortforms with status 'Form sent out' to instance=None
form_sent_condition = (updated_snapshot_df.formtype == "0006") & (
updated_snapshot_df.status == "Form sent out"
)
updated_snapshot_df.loc[form_sent_condition, "instance"] = None

updated_snapshot_df = updated_snapshot_df.sort_values(
["reference", "instance"], ascending=[True, True]
).reset_index(drop=True)
Expand All @@ -150,8 +172,8 @@ def prepare_short_to_long(updated_snapshot_df, construction_df):
"""Create addional instances for short to long construction"""
# Check which references are going to converted to long forms
short_to_long_refs = construction_df.loc[
construction_df["short_to_long"] == True,"reference"
].unique()
construction_df["short_to_long"] == True, "reference"
].unique()
# Create conversion df
short_to_long_df = updated_snapshot_df[
updated_snapshot_df["reference"].isin(short_to_long_refs)
Expand Down
2 changes: 1 addition & 1 deletion src/developer_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ global:
load_manual_imputation: False
load_backdata: True # whether to load previous year data for MoR
load_reference_list: True # the reference list corrections should always be loaded in year 1
load_from_feather: True
load_from_feather: False
# Apportionment to sites settings
apportion_sites: True
# Output settings
Expand Down
6 changes: 3 additions & 3 deletions src/estimation/estimation_main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Main file for the estimation module."""
import logging
import pandas as pd
from datetime import datetime
from typing import Callable, Dict, Any
from typing import Any, Callable, Dict

from src.estimation import calculate_weights as weights
import pandas as pd
from src.estimation import apply_weights as appweights
from src.estimation import calculate_weights as weights
from src.estimation import cellno_mapper as cmap

EstMainLogger = logging.getLogger(__name__)
Expand Down

0 comments on commit 2ae8dcc

Please sign in to comment.