Skip to content

Commit ca498d3

Browse files
committed
Added missing docstrings, comments, typings and review notes
1 parent fc78511 commit ca498d3

File tree

2 files changed

+71
-13
lines changed

2 files changed

+71
-13
lines changed

bam_masterdata/cli/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def cli():
3939
""",
4040
)
4141
@click.option(
42-
"--path",
42+
"--excel-file",
4343
type=click.Path(exists=True, dir_okay=False),
4444
required=False,
4545
help="""
@@ -50,7 +50,7 @@ def fill_masterdata(url, path):
5050
start_time = time.time()
5151

5252
# Define output directory
53-
output_directory = "./artifacts/tmp/" if path else DATAMODEL_DIR
53+
output_directory = os.path.join(DATAMODEL_DIR, "tmp") if path else DATAMODEL_DIR
5454

5555
# Ensure the output directory exists
5656
os.makedirs(output_directory, exist_ok=True)

bam_masterdata/cli/excel_to_entities.py

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
import re
2-
from typing import TYPE_CHECKING, Any
2+
from typing import TYPE_CHECKING, Any, Optional, dict
33

44
import openpyxl
5+
from openpyxl.worksheet.worksheet import Worksheet
56

67
from bam_masterdata.logger import logger
78

8-
# logger = logger()
99

10-
11-
def index_to_excel_column(index):
10+
def index_to_excel_column(index: int) -> str:
1211
"""
1312
Converts a 1-based index to an Excel column name.
1413
@@ -25,7 +24,7 @@ def index_to_excel_column(index):
2524
return column
2625

2726

28-
def get_last_non_empty_row(sheet, start_index):
27+
def get_last_non_empty_row(sheet: Worksheet, start_index: int) -> Optional[int]:
2928
"""
3029
Finds the last non-empty row before encountering a completely empty row.
3130
@@ -50,7 +49,7 @@ def get_last_non_empty_row(sheet, start_index):
5049
return last_non_empty_row # If no empty row is encountered, return the last non-empty row
5150

5251

53-
def is_reduced_version(generated_code_value, code):
52+
def is_reduced_version(generated_code_value: str, code: str) -> bool:
5453
"""
5554
Check if generated_code_value is a reduced version of code.
5655
@@ -96,7 +95,9 @@ def is_reduced_version(generated_code_value, code):
9695
return True
9796

9897

99-
def properties_to_dict(sheet, start_index_row, last_non_empty_row):
98+
def properties_to_dict(
99+
sheet: Worksheet, start_index_row: int, last_non_empty_row: int
100+
) -> dict[str, dict[str, Any]]:
100101
"""
101102
Extracts properties from an Entity type block in the Excel sheet and returns them as a dictionary.
102103
@@ -119,6 +120,7 @@ def properties_to_dict(sheet, start_index_row, last_non_empty_row):
119120
"Property label",
120121
"Data type",
121122
"Vocabulary code",
123+
# "Object code",
122124
]
123125

124126
# Determine the header row index
@@ -280,7 +282,21 @@ def properties_to_dict(sheet, start_index_row, last_non_empty_row):
280282
return property_dict
281283

282284

283-
def terms_to_dict(sheet, start_index_row, last_non_empty_row):
285+
def terms_to_dict(
286+
sheet: Worksheet, start_index_row: int, last_non_empty_row: int
287+
) -> dict[str, dict[str, Any]]:
288+
"""
289+
Extracts terms from a Vocabulary block in the Excel sheet and returns them as a dictionary.
290+
291+
Args:
292+
sheet: The worksheet object.
293+
start_index_row: Row where the current entity type begins (1-based index).
294+
last_non_empty_row: Row where the current entity type finish (1-based index).
295+
296+
Returns:
297+
A dictionary where each key is a vocabulary term code and the value is a dictionary
298+
containing the attributes of the vocabulary term.
299+
"""
284300
terms_dict = {}
285301
expected_terms = ["Code", "Description", "Url template", "Label", "Official"]
286302

@@ -374,12 +390,31 @@ def terms_to_dict(sheet, start_index_row, last_non_empty_row):
374390
return terms_dict
375391

376392

377-
def block_to_entity_dict(sheet, start_index_row, last_non_empty_row, complete_dict):
393+
def block_to_entity_dict(
394+
sheet: Worksheet,
395+
start_index_row: int,
396+
last_non_empty_row: int,
397+
complete_dict: dict[str, Any],
398+
) -> dict[str, Any]:
399+
"""
400+
Extracts entity attributes from an Excel sheet block and returns them as a dictionary.
401+
402+
Args:
403+
sheet: The worksheet object.
404+
start_index_row: The row where the current entity type begins (1-based index).
405+
last_non_empty_row: The row where the current entity type finishes (1-based index).
406+
complete_dict: The dictionary to store the extracted entity attributes.
407+
408+
Returns:
409+
A dictionary containing the entity attributes.
410+
"""
378411
attributes_dict = {}
379412

413+
# Get the entity type from the specified cell
380414
entity_type_position = f"A{start_index_row}"
381415
entity_type = sheet[entity_type_position].value
382416

417+
# Define the valid entity types
383418
entity_types = [
384419
"OBJECT_TYPE",
385420
"SAMPLE_TYPE",
@@ -389,14 +424,16 @@ def block_to_entity_dict(sheet, start_index_row, last_non_empty_row, complete_di
389424
"VOCABULARY_TYPE",
390425
]
391426

427+
# Get the header terms from the row below the entity type row
392428
header_terms = [cell.value for cell in sheet[start_index_row + 1]]
393429

430+
# Check if the entity type is valid
394431
if entity_type not in entity_types:
395432
print(
396433
"The entity type (cell A1) should be one of the following: SAMPLE_TYPE/OBJECT_TYPE, EXPERIMENT_TYPE/COLLECTION_TYPE, DATASET_TYPE, PROPERTY_TYPE, VOCABULARY_TYPE"
397434
)
398-
# return "\n".join(errors)
399435
else:
436+
# Process based on the entity type
400437
if entity_type == "SAMPLE_TYPE" or entity_type == "OBJECT_TYPE":
401438
expected_terms = [
402439
"Code",
@@ -478,12 +515,14 @@ def block_to_entity_dict(sheet, start_index_row, last_non_empty_row, complete_di
478515
auto_generate_value = auto_generate_value == "true"
479516
attributes_dict["autoGeneratedCode"] = auto_generate_value
480517

518+
# Assign the properties dictionary as a field for the entity dictionary
481519
attributes_dict["properties"] = properties_to_dict(
482520
sheet, start_index_row, last_non_empty_row
483521
)
484522

485523
complete_dict[code_value] = attributes_dict
486524

525+
# Return the sorted dictionary (by inheritance, using dots "." as criteria for sorting)
487526
return dict(
488527
sorted(complete_dict.items(), key=lambda item: item[0].count("."))
489528
)
@@ -537,12 +576,14 @@ def block_to_entity_dict(sheet, start_index_row, last_non_empty_row, complete_di
537576
validation_value = ""
538577
attributes_dict["validationPlugin"] = validation_value
539578

579+
# Assign the properties dictionary as a field for the entity dictionary
540580
attributes_dict["properties"] = properties_to_dict(
541581
sheet, start_index_row, last_non_empty_row
542582
)
543583

544584
complete_dict[code_value] = attributes_dict
545585

586+
# Return the sorted dictionary (by inheritance, using dots "." as criteria for sorting)
546587
return dict(
547588
sorted(complete_dict.items(), key=lambda item: item[0].count("."))
548589
)
@@ -668,6 +709,7 @@ def block_to_entity_dict(sheet, start_index_row, last_non_empty_row, complete_di
668709

669710
complete_dict[code_value] = attributes_dict
670711

712+
# Return the sorted dictionary (by inheritance, using dots "." as criteria for sorting)
671713
return dict(
672714
sorted(complete_dict.items(), key=lambda item: item[0].count("."))
673715
)
@@ -722,20 +764,36 @@ def block_to_entity_dict(sheet, start_index_row, last_non_empty_row, complete_di
722764
url_value = ""
723765
attributes_dict["url_template"] = url_value
724766

767+
# Assign the terms dictionary as a field for the vocabulary dictionary
725768
attributes_dict["terms"] = terms_to_dict(
726769
sheet, start_index_row, last_non_empty_row
727770
)
728771

729772
complete_dict[code_value] = attributes_dict
730773

774+
# Return the sorted dictionary (by inheritance, using dots "." as criteria for sorting)
731775
return dict(
732776
sorted(complete_dict.items(), key=lambda item: item[0].count("."))
733777
)
734778

735779

736-
def excel_to_entities(excel_path, output_directory="./artifacts/tmp/"):
780+
def excel_to_entities(
781+
excel_path: str, output_directory: str = "./artifacts/tmp/"
782+
) -> dict[str, dict[str, Any]]:
783+
"""
784+
Extracts entities from an Excel file and returns them as a dictionary.
785+
786+
Args:
787+
excel_path: The path to the Excel file.
788+
output_directory: The directory to store the output files.
789+
790+
Returns:
791+
A dictionary where each key is a normalized sheet name and the value is a dictionary
792+
containing the extracted entities.
793+
"""
737794
sheets_dict = {}
738795

796+
# Load the workbook and get the sheet names
739797
workbook = openpyxl.load_workbook(excel_path)
740798
sheet_names = workbook.sheetnames
741799

0 commit comments

Comments
 (0)