Skip to content

Commit d5d30a4

Browse files
OBJECT data type refs (#212)
* Add support for OBJECT data type with string paths and object instances (#205) * Initial plan * Add OBJECT data type support for string paths and object instances * Add openBIS integration for OBJECT data type references * Add integration tests for OBJECT data type references * Address code review feedback: deduplicate test classes and improve mock validation * Small fixes in the functionalities and testing * Add documentation for OBJECT data type functionality * Clean up of the documentation * Deleted prints and uncomment testing --------- Co-authored-by: Copilot <[email protected]>
1 parent f32e4bc commit d5d30a4

File tree

11 files changed

+619
-45
lines changed

11 files changed

+619
-45
lines changed

bam_masterdata/cli/run_parser.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
from pybis import Openbis
22

33
from bam_masterdata.logger import logger
4-
from bam_masterdata.metadata.entities import CollectionType, PropertyTypeAssignment
4+
from bam_masterdata.metadata.entities import (
5+
CollectionType,
6+
ObjectType,
7+
PropertyTypeAssignment,
8+
)
59
from bam_masterdata.parsing import AbstractParser
610

711

@@ -107,7 +111,58 @@ def run_parser(
107111
value = getattr(object_instance, key, None)
108112
if value is None or isinstance(value, PropertyTypeAssignment):
109113
continue
110-
obj_props[object_instance._property_metadata[key].code.lower()] = value
114+
115+
# Handle OBJECT data type properties
116+
property_metadata = object_instance._property_metadata[key]
117+
if property_metadata.data_type == "OBJECT":
118+
if isinstance(value, str):
119+
# Value is a path string, verify it exists in openBIS
120+
try:
121+
referenced_object = openbis.get_object(value)
122+
# Use the identifier from the fetched object
123+
obj_props[property_metadata.code.lower()] = (
124+
referenced_object.identifier
125+
)
126+
except Exception as e:
127+
logger.error(
128+
f"Failed to resolve OBJECT reference '{value}' for property '{key}': {e}"
129+
)
130+
continue
131+
elif isinstance(value, ObjectType):
132+
# Value is an ObjectType instance, construct the path
133+
if not value.code:
134+
logger.warning(
135+
f"OBJECT reference for property '{key}' has no code, skipping"
136+
)
137+
continue
138+
# Construct the identifier path
139+
# Try to find this object in the openbis_id_map first (if it's being created in the same batch)
140+
referenced_identifier = None
141+
for obj_id, obj_inst in collection.attached_objects.items():
142+
if obj_inst is value and obj_id in openbis_id_map:
143+
referenced_identifier = openbis_id_map[obj_id]
144+
break
145+
146+
if not referenced_identifier:
147+
# Construct identifier from the object's code
148+
# Assume it's in the same space/project as the current object
149+
if not collection_name:
150+
referenced_identifier = (
151+
f"/{space_name}/{project_name}/{value.code}"
152+
)
153+
else:
154+
referenced_identifier = f"/{space_name}/{project_name}/{collection_name}/{value.code}"
155+
156+
obj_props[property_metadata.code.lower()] = referenced_identifier
157+
else:
158+
# Unexpected type, skip
159+
logger.warning(
160+
f"Unexpected type for OBJECT property '{key}': {type(value).__name__}"
161+
)
162+
continue
163+
else:
164+
# Not an OBJECT type, handle normally
165+
obj_props[property_metadata.code.lower()] = value
111166

112167
# Check if object already exists in openBIS, and if so, notify and get for updating properties
113168
if not object_instance.code:

bam_masterdata/metadata/entities.py

Lines changed: 72 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,67 @@ def __init__(self, **kwargs):
616616
for key, prop in self._property_metadata.items():
617617
self._properties[key] = prop.data_type
618618

619+
def _set_object_value(self, key, value):
620+
"""
621+
Sets the value when the data type is OBJECT.
622+
"""
623+
if isinstance(value, str):
624+
# Validate the path format: /{space}/{project}/{collection}/{object} or /{space}/{project}/{object}
625+
# If path is valid, store it as-is
626+
if not value.startswith("/"):
627+
raise ValueError(
628+
f"Invalid OBJECT path format for '{key}': Path must start with '/', got '{value}'"
629+
)
630+
path_parts = value.strip("/").split("/")
631+
if len(path_parts) not in [3, 4]:
632+
raise ValueError(
633+
f"Invalid OBJECT path format for '{key}': Expected '/<space>/<project>/<collection>/<object>' "
634+
f"or '/<space>/<project>/<object>', got '{value}'"
635+
)
636+
# * We don't validate if the object exists here as it requires pybis connection
637+
# * That validation should be done when saving to openBIS
638+
elif isinstance(value, ObjectType):
639+
# Check if the object instance has a code
640+
if not hasattr(value, "code") or value.code is None:
641+
raise ValueError(
642+
f"Object instance for '{key}' must have a 'code' attribute set to be used as a reference"
643+
)
644+
else:
645+
raise TypeError(
646+
f"Invalid type for OBJECT property '{key}': Expected str (path) or ObjectType instance, "
647+
f"got {type(value).__name__}"
648+
)
649+
return value
650+
651+
def _validate_controlled_vocabulary(self, meta, key, value) -> None:
652+
"""
653+
Validates the value of a CONTROLLEDVOCABULARY.
654+
"""
655+
vocabulary_code = meta[key].vocabulary_code
656+
if not vocabulary_code:
657+
raise ValueError(
658+
f"Property '{key}' of type CONTROLLEDVOCABULARY must have a vocabulary_code defined."
659+
)
660+
vocab_path = None
661+
for file in listdir_py_modules(DATAMODEL_DIR):
662+
if "vocabulary_types.py" in file:
663+
vocab_path = file
664+
break
665+
if vocab_path is None:
666+
raise FileNotFoundError(
667+
f"The file 'vocabulary_types.py' was not found in the directory specified by {DATAMODEL_DIR}."
668+
)
669+
vocabulary_class = self.get_vocabulary_class(vocabulary_code, vocab_path)
670+
if vocabulary_class is None:
671+
raise ValueError(
672+
f"No matching vocabulary class found for vocabulary_code '{vocabulary_code}'."
673+
)
674+
codes = [term.code for term in vocabulary_class.terms]
675+
if value not in codes:
676+
raise ValueError(
677+
f"{value} for {key} is not in the list of allowed terms for vocabulary."
678+
)
679+
619680
def __setattr__(self, key, value):
620681
if key in ["_property_metadata", "_properties", "code"]:
621682
super().__setattr__(key, value)
@@ -660,36 +721,19 @@ def __setattr__(self, key, value):
660721
raise TypeError(
661722
f"Invalid type for '{key}': Expected {expected_type.__name__}, got {type(value).__name__}"
662723
)
663-
# CONTROLLEDVOCABULARY check
724+
725+
# Get data type for additional checks
664726
data_type = meta[key].data_type
665-
if data_type == "CONTROLLEDVOCABULARY":
666-
vocabulary_code = meta[key].vocabulary_code
667-
if not vocabulary_code:
668-
raise ValueError(
669-
f"Property '{key}' of type CONTROLLEDVOCABULARY must have a vocabulary_code defined."
670-
)
671-
vocab_path = None
672-
for file in listdir_py_modules(DATAMODEL_DIR):
673-
if "vocabulary_types.py" in file:
674-
vocab_path = file
675-
break
676-
if vocab_path is None:
677-
raise FileNotFoundError(
678-
f"The file 'vocabulary_types.py' was not found in the directory specified by {DATAMODEL_DIR}."
679-
)
680-
vocabulary_class = self.get_vocabulary_class(
681-
vocabulary_code, vocab_path
727+
# OBJECT check and attr assignment
728+
if data_type == "OBJECT":
729+
return object.__setattr__(
730+
self, key, self._set_object_value(key, value)
682731
)
683-
if vocabulary_class is None:
684-
raise ValueError(
685-
f"No matching vocabulary class found for vocabulary_code '{vocabulary_code}'."
686-
)
687-
codes = [term.code for term in vocabulary_class.terms]
688-
if value not in codes:
689-
raise ValueError(
690-
f"{value} for {key} is not in the list of allowed terms for vocabulary."
691-
)
692-
# set attribute
732+
# CONTROLLEDVOCABULARY check
733+
if data_type == "CONTROLLEDVOCABULARY":
734+
self._validate_controlled_vocabulary(meta, key, value)
735+
736+
# Setting attribute value after all checks
693737
return object.__setattr__(self, key, value)
694738

695739
raise KeyError(

docs/howtos/object_references.md

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# How-to: Work with Object References
2+
3+
This how-to guide shows you how to work with properties whose data type is OBJECT in order to reference other objects in openBIS.
4+
5+
## What are object references?
6+
7+
Some object types have properties with `data_type="OBJECT"` that create references to other objects. For example:
8+
9+
- An `Instrument` might have a `responsible_person` property that references a `Person` object
10+
- A `Calibration` might have an `instrument` property that references an `Instrument` object
11+
- A `Sample` might have a `parent_sample` property that references another `Sample` object
12+
13+
These are properties defined to reference other existing objects in openBIS. Their purpose is to link between objects, similar to what a parent-child relationship do. However, these links have a semantic meaning, while parent-child relationships are only connecting inputs with outputs.
14+
15+
??? note "Semantic meaning of object referencing"
16+
The semantic meaning of object referencing is given by the name of the property (e.g., a person responsible for operating an instrument). Nevertheless, openBIS will soon allow for adding more metainformation to these properties to create a more complete description of objects and their relationships.
17+
18+
## Option 1: Reference by Object Instance
19+
20+
When creating multiple related objects in the same operation, you can reference them directly:
21+
22+
```python
23+
from bam_masterdata.datamodel.object_types import Person, Instrument
24+
from bam_masterdata.metadata.entities import CollectionType
25+
26+
# Create a collection
27+
collection = CollectionType()
28+
29+
# Create a person
30+
person = Person(name="Dr. Jane Smith")
31+
person.code = "PERSON_001" # ⚠️ Must set a code!
32+
person_id = collection.add(person)
33+
34+
# Create an instrument and reference the person
35+
instrument = Instrument(name="High-Resolution Microscope")
36+
instrument.responsible_person = person # Direct reference
37+
instrument_id = collection.add(instrument)
38+
```
39+
40+
!!! warning "Object must have a code"
41+
When referencing an object instance, it **must** have a `code` attribute set. If not, you'll get a `ValueError`:
42+
43+
```
44+
ValueError: Object instance for 'responsible_person' must have a 'code' attribute set
45+
```
46+
47+
## Option 2: Reference by Path String
48+
49+
If the object already exists in openBIS, you can reference it using its identifier path:
50+
51+
```python
52+
from bam_masterdata.datamodel.object_types import Instrument
53+
54+
# Create an instrument
55+
instrument = Instrument(name="Spectrometer X500")
56+
57+
# Reference an existing person using the path format
58+
# Format: /{space}/{project}/{collection}/{object}
59+
instrument.responsible_person = "/LAB_SPACE/INSTRUMENTS/STAFF/PERSON_001"
60+
61+
# Or without collection: /{space}/{project}/{object}
62+
instrument.responsible_person = "/LAB_SPACE/INSTRUMENTS/PERSON_001"
63+
```
64+
65+
!!! note "Path validation"
66+
The path must:
67+
68+
- Start with `/`
69+
- Have either 3 parts (space/project/object) or 4 parts (space/project/collection/object)
70+
- Point to an existing object in openBIS
71+
72+
## Combining Both Approaches
73+
74+
You can mix both approaches in the same parser:
75+
76+
```python
77+
from bam_masterdata.parsing import AbstractParser
78+
from bam_masterdata.datamodel.object_types import Person, Instrument
79+
80+
class InstrumentParser(AbstractParser):
81+
def parse(self, files, collection, logger):
82+
# Create a new person
83+
new_person = Person(name="Dr. Alice Johnson")
84+
new_person.code = "PERSON_NEW_001"
85+
collection.add(new_person)
86+
87+
# Instrument 1: References the newly created person
88+
instrument1 = Instrument(name="Microscope A")
89+
instrument1.responsible_person = new_person # Object instance
90+
collection.add(instrument1)
91+
92+
# Instrument 2: References an existing person in openBIS
93+
instrument2 = Instrument(name="Microscope B")
94+
instrument2.responsible_person = "/LAB_SPACE/PROJECT/PERSON_EXISTING" # Path
95+
collection.add(instrument2)
96+
```
97+
98+
## Troubleshooting
99+
100+
### Error: "Object instance must have a 'code' attribute set"
101+
102+
**Cause**: You're trying to reference an object instance that doesn't have a code.
103+
104+
**Solution**: Set the `code` attribute before using the object as a reference:
105+
106+
```python
107+
person = Person(name="Dr. Smith")
108+
person.code = "PERSON_001" # ✓ Set the code
109+
instrument.responsible_person = person
110+
```
111+
112+
### Error: "Invalid OBJECT path format"
113+
114+
**Cause**: The path string doesn't follow the required format.
115+
116+
**Solution**: Ensure your path:
117+
118+
- Starts with `/`
119+
- Has 3 or 4 parts separated by `/`
120+
121+
```python
122+
# ✗ Wrong
123+
instrument.responsible_person = "PERSON_001"
124+
instrument.responsible_person = "SPACE/PROJECT/PERSON_001"
125+
126+
# ✓ Correct
127+
instrument.responsible_person = "/SPACE/PROJECT/PERSON_001"
128+
instrument.responsible_person = "/SPACE/PROJECT/COLLECTION/PERSON_001"
129+
```
130+
131+
### Error: "Failed to resolve OBJECT reference"
132+
133+
**Cause**: The path references an object that doesn't exist in openBIS.
134+
135+
**Solution**: Verify the object exists in openBIS at the specified path, or create it first:
136+
137+
```python
138+
# Check if the object exists in openBIS before referencing
139+
openbis = ologin(...)
140+
try:
141+
obj = openbis.get_object("/SPACE/PROJECT/PERSON_001")
142+
print(f"Object exists: {obj.identifier}")
143+
except:
144+
print("Object not found - create it first or use a different path")
145+
```

docs/howtos/parsing/create_new_parsers.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# How-to: Create new parsers
1+
# How-to: Create New Parsers
22

33
This how-to guide explains how to create a custom parser that reads raw files (CSV, Excel, JSON, XML, etc) and transforms them into the `bam-masterdata` format. By following these steps, your parser can be integrated into the Data Store workflow and used in the [Parser app](parser_app.md).
44
This allows you to bring custom or third-party data sources into the existing masterdata workflows without manual conversion.

0 commit comments

Comments
 (0)