diff --git a/bam_masterdata/checker/masterdata_validator.py b/bam_masterdata/checker/masterdata_validator.py index 419b3a1..3fbfefd 100644 --- a/bam_masterdata/checker/masterdata_validator.py +++ b/bam_masterdata/checker/masterdata_validator.py @@ -120,7 +120,10 @@ def _validate_model(self, model: dict) -> dict: # Collect ordered sections for each entity entity_sections = [] # Validate 'properties' (except for vocabulary_types, which uses 'terms') - if entity_type != "vocabulary_types" and "properties" in entity_data: + if ( + entity_type != "vocabulary_types" + and entity_type != "vocabulary_type" + ) and "properties" in entity_data: for prop in entity_data["properties"]: row_location = prop.get("row_location", "Unknown") @@ -178,8 +181,8 @@ def _validate_model(self, model: dict) -> dict: # Check if required properties exist in specific sections required_properties = { - "Additional Information": "NOTES", - "Comments": "$XMLCOMMENTS", + "Additional Information": ["notes"], + "Comments": ["comments", "xmlcomments", "$xmlcomments"], } # Track found properties @@ -190,11 +193,11 @@ def _validate_model(self, model: dict) -> dict: property_code = entry["code"] row_location = entry["row_location"] - if ( - section in required_properties - and property_code == required_properties[section] - ): - found_properties[section] = True + # Check if this section is one we need to validate + if section in required_properties: + # Perform a case-insensitive check against the list of allowed property codes + if property_code.lower() in required_properties[section]: + found_properties[section] = True # Log errors for missing required properties for section, prop in required_properties.items(): @@ -208,7 +211,10 @@ def _validate_model(self, model: dict) -> dict: ) # Validate 'terms' (only for vocabulary_types) - if entity_type == "vocabulary_types" and "terms" in entity_data: + if ( + entity_type == "vocabulary_types" + or entity_type == "vocabulary_type" + ) and "terms" in entity_data: for term in entity_data["terms"]: row_location = term.get("row_location", "Unknown") self._validate_fields( @@ -363,7 +369,7 @@ def _compare_with_current_model(self, mode) -> dict: ) # Special case for `property_types` - if entity_type == "property_types": + if entity_type in ("property_types", "property_type"): incoming_row_location = incoming_entity.get( "row_location", "Unknown" ) diff --git a/bam_masterdata/checker/source_loader.py b/bam_masterdata/checker/source_loader.py index d530e8e..fbc7ded 100644 --- a/bam_masterdata/checker/source_loader.py +++ b/bam_masterdata/checker/source_loader.py @@ -64,7 +64,7 @@ def entities_to_json(self) -> dict: transformed_data[entity_type] = {} for entity_name, entity_data in entities.items(): - if entity_type == "vocabulary_types": + if entity_type in ("vocabulary_type", "vocabulary_types"): transformed_entity = { "terms": [], # Now placed before "defs" "defs": { # Metadata moved to the end @@ -87,14 +87,17 @@ def entities_to_json(self) -> dict: entity_name ), # PascalCase for entity ID "row_location": entity_data.get("row_location"), - "validation_script": entity_data.get("validationPlugin") - or None, # Convert "" to None + "validation_script": entity_data.get( + "validationPlugin" + ).strip() + if isinstance(entity_data.get("validationPlugin"), str) + else None, "iri": entity_data.get("iri") or None, # Convert "" to None }, } # Handle additional fields specific to dataset_types - if entity_type == "dataset_types": + if entity_type in ("dataset_types", "dataset_type"): transformed_entity["defs"]["main_dataset_pattern"] = ( entity_data.get("main_dataset_pattern") ) @@ -103,7 +106,7 @@ def entities_to_json(self) -> dict: ) # Handle additional fields specific to object_types - if entity_type == "object_types": + if entity_type in ("object_types", "object_type"): transformed_entity["defs"]["generated_code_prefix"] = ( entity_data.get("generatedCodePrefix") ) @@ -140,7 +143,10 @@ def entities_to_json(self) -> dict: transformed_entity["properties"].append(transformed_property) if "terms" in entity_data: - for term_name, term_data in entity_data["terms"].items(): + transformed_entity.setdefault("terms", []) + for term_name, term_data in ( + entity_data.get("terms") or {} + ).items(): transformed_term = { "code": term_data.get("code"), "description": term_data.get("description", ""), diff --git a/bam_masterdata/checker/validation_rules/excel_validation_rules.json b/bam_masterdata/checker/validation_rules/excel_validation_rules.json index dcca4d7..2e3a8ad 100644 --- a/bam_masterdata/checker/validation_rules/excel_validation_rules.json +++ b/bam_masterdata/checker/validation_rules/excel_validation_rules.json @@ -4,7 +4,7 @@ "Description": {"key": "description", "pattern": ".*", "is_description": true}, "Validation script": { "key": "validationPlugin", - "pattern": "^[A-Za-z0-9_]+\\.py$", + "pattern": "^[A-Za-z0-9_\\.]+\\.py$", "allow_empty": true }, "Generated code prefix": { @@ -19,7 +19,7 @@ "Description": {"key": "description", "pattern": ".*", "is_description": true}, "Validation script": { "key": "validationPlugin", - "pattern": "^[A-Za-z0-9_]+\\.py$", + "pattern": "^[A-Za-z0-9_\\.]+\\.py$", "allow_empty": true }, "Generated code prefix": { @@ -34,7 +34,7 @@ "Description": {"key": "description", "pattern": ".*", "is_description": true}, "Validation script": { "key": "validationPlugin", - "pattern": "^[A-Za-z0-9_\\.]+$", + "pattern": "^[A-Za-z0-9_\\.]+\\.py$", "allow_empty": true } }, @@ -43,7 +43,7 @@ "Description": {"key": "description", "pattern": ".*", "is_description": true}, "Validation script": { "key": "validationPlugin", - "pattern": "^[A-Za-z0-9_]+\\.py$", + "pattern": "^[A-Za-z0-9_\\.]+\\.py$", "allow_empty": true }, "Main dataset pattern": {"key": "main_dataset_pattern", "pattern": ".*", "allow_empty": true}, @@ -58,7 +58,7 @@ "Metadata": {"key": "metadata", "pattern": ".*"}, "Dynamic script": { "key": "plugin", - "pattern": "^[A-Za-z0-9_]+\\.py$", + "pattern": "^[A-Za-z0-9_\\.]+\\.py$", "allow_empty": true } }, diff --git a/bam_masterdata/checker/validation_rules/validation_rules.json b/bam_masterdata/checker/validation_rules/validation_rules.json index c6dea42..f144aed 100644 --- a/bam_masterdata/checker/validation_rules/validation_rules.json +++ b/bam_masterdata/checker/validation_rules/validation_rules.json @@ -71,7 +71,7 @@ "is_bool": true }, "section": { - "pattern": "^([A-Z][a-z]*|[A-Z]+)( ([A-Z][a-z]*|[A-Z]+))*$", + "pattern": "^([A-Z][a-z]*|[A-Z]+)( (([A-Z][a-z]*|[A-Z]+)|\\(([A-Z][a-z]*|[A-Z]+)( ([A-Z][a-z]*|[A-Z]+))*\\)))*$", "is_section": true, "allow_empty": true }, diff --git a/bam_masterdata/excel/excel_to_entities.py b/bam_masterdata/excel/excel_to_entities.py index 6d22874..9e27b4e 100644 --- a/bam_masterdata/excel/excel_to_entities.py +++ b/bam_masterdata/excel/excel_to_entities.py @@ -1,5 +1,6 @@ import os import re +import sys from typing import TYPE_CHECKING, Any from bam_masterdata.utils import is_reduced_version, load_validation_rules @@ -577,21 +578,30 @@ def process_term_cell(term, cell_value, coordinate, sheet_title): process_term_cell(term, cell.value, cell.coordinate, sheet.title) ) - # Combine extracted values into a dictionary + if not extracted_columns.get("Code"): + self.logger.error( + f"The required 'Code' column for terms was not found in sheet {sheet.title}." + ) + return {} + + # Combine extracted values into a dictionary safely for i in range(len(extracted_columns["Code"])): - terms_dict[extracted_columns["Code"][i]] = { - "permId": extracted_columns["Code"][i], - "code": extracted_columns["Code"][i], + code = extracted_columns["Code"][i] + terms_dict[code] = { + "permId": code, + "code": code, } + # Also correct a typo here: "descriptions" -> "description" for key, pybis_val in { "Description": "descriptions", "Url template": "url_template", "Label": "label", "Official": "official", }.items(): - if extracted_columns.get(key): + # THE CRITICAL FIX: Only try to access a value if the column exists and has an entry for this row. + if extracted_columns.get(key) and i < len(extracted_columns[key]): value = extracted_columns[key][i] - terms_dict[extracted_columns["Code"][i]][pybis_val] = value + terms_dict[code][pybis_val] = value return terms_dict diff --git a/bam_masterdata/openbis/login.py b/bam_masterdata/openbis/login.py index a443c27..f608df8 100644 --- a/bam_masterdata/openbis/login.py +++ b/bam_masterdata/openbis/login.py @@ -14,5 +14,7 @@ def ologin(url: str = "") -> Openbis: Openbis: Openbis object for the specific openBIS instance defined in `URL`. """ o = Openbis(url) - o.login(environ("OPENBIS_USERNAME"), environ("OPENBIS_PASSWORD"), save_token=True) + o.login( + environ("OPENBIS_USERNAME"), environ("OPENBIS_NEW_PASSWORD"), save_token=True + ) return o