Skip to content

Commit 08075a9

Browse files
Add possibility of update metadata in an object during parsing (#201)
* Add code to BaseEntity * Added object_openbis check if it already exists when object_instance.code is not none * Adapted run_parser for empty collections (using project directly) Moved run_parser to its own module cli.run_parser.py * Add object_openbis props updating when code exists in space * Fix updating props * Fix testing * Fix potential problems if collection_name does not exist * Fixed run_parser and entities __setattr__ * Add comprehensive testing for run_parser() and document code parameter for referencing existing objects (#202) * Initial plan * Add comprehensive tests for run_parser() function with mock OpenBIS Co-authored-by: JosePizarro3 <[email protected]> * Update documentation to explain code parameter for referencing existing objects Co-authored-by: JosePizarro3 <[email protected]> * Fix linting issues in test files Co-authored-by: JosePizarro3 <[email protected]> * Address code review feedback - move import to top level and specify encoding Co-authored-by: JosePizarro3 <[email protected]> * Fix mocking and testing * Fix imports * Improved documentation --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: JosePizarro3 <[email protected]> Co-authored-by: jpizarro <[email protected]> * Add comprehensive tutorial for automating metadata injection with run_parser() API (#203) * Initial plan * Add parsing tutorial for run_parser() API Co-authored-by: JosePizarro3 <[email protected]> * Fix mermaid diagram syntax and link titles in parsing tutorial Co-authored-by: JosePizarro3 <[email protected]> * Polished parsing docu * Improved index.md --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: JosePizarro3 <[email protected]> Co-authored-by: jpizarro <[email protected]> * Fix typos --------- Co-authored-by: Copilot <[email protected]>
1 parent 609397e commit 08075a9

File tree

17 files changed

+1172
-157
lines changed

17 files changed

+1172
-157
lines changed

bam_masterdata/cli/cli.py

Lines changed: 5 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,16 @@
99
import click
1010
from decouple import config as environ
1111
from openpyxl import Workbook
12-
from pybis import Openbis
1312
from rdflib import Graph
1413

1514
from bam_masterdata.checker import MasterdataChecker
1615
from bam_masterdata.cli.entities_to_excel import entities_to_excel
1716
from bam_masterdata.cli.entities_to_rdf import entities_to_rdf
1817
from bam_masterdata.cli.fill_masterdata import MasterdataCodeGenerator
18+
from bam_masterdata.cli.run_parser import run_parser
1919
from bam_masterdata.logger import logger
20-
from bam_masterdata.metadata.entities import CollectionType, PropertyTypeAssignment
2120
from bam_masterdata.metadata.entities_dict import EntitiesDict
2221
from bam_masterdata.openbis.login import ologin
23-
from bam_masterdata.parsing import AbstractParser
2422
from bam_masterdata.utils import (
2523
DATAMODEL_DIR,
2624
delete_and_create_dir,
@@ -585,143 +583,6 @@ def push_to_openbis(file_path, datamodel_path):
585583
)
586584

587585

588-
def run_parser(
589-
openbis: Openbis = None,
590-
files_parser: dict[AbstractParser, list[str]] = {},
591-
project_name: str = "PROJECT",
592-
collection_name: str = "COLLECTION",
593-
space_name: str = None,
594-
) -> None:
595-
"""
596-
Run the parsers on the specified files and collect the results.
597-
Args:
598-
login with save_token=True dont forget!!
599-
files_parser (dict): A dictionary where keys are parser instances and values are lists of file paths to be parsed.
600-
space (str): The space in openBIS where the entities will be stored.
601-
project (str): The project in openBIS where the entities will be stored.
602-
collection (str): The collection in openBIS where the entities will be stored.
603-
"""
604-
# Ensure the space, project, and collection are set
605-
if not (project_name or collection_name):
606-
logger.error(
607-
"The name of the project and collection must be specified for the parser to run."
608-
)
609-
return
610-
# Ensure the files_parser is not empty
611-
if not files_parser:
612-
logger.error(
613-
"No files or parsers to parse. Please provide valid file paths or contact an Admin to add missing parser."
614-
)
615-
return
616-
617-
# Specify the space and project for the data
618-
try:
619-
space = openbis.get_space(space_name)
620-
except Exception:
621-
space = None
622-
623-
if space is None:
624-
# user name as default space
625-
for s in openbis.get_spaces():
626-
if s.code.endswith(openbis.username.upper()):
627-
space = s
628-
logger.warning(
629-
f"Space {space_name} does not exist in openBIS. "
630-
f"Loading data in {openbis.username}."
631-
)
632-
break
633-
634-
# no space found
635-
if space is None:
636-
logger.error(
637-
f"No usable Space for {openbis.username} in openBIS. Please create it first or notify an Admin."
638-
)
639-
return
640-
641-
if project_name.upper() in [p.code for p in space.get_projects()]:
642-
project = space.get_project(project_name)
643-
else:
644-
logger.info("Replacing project code with uppercase and underscores.")
645-
project = space.new_project(
646-
code=project_name.replace(" ", "_").upper(),
647-
description="New project created via automated parsing with `bam_masterdata`.",
648-
)
649-
project.save()
650-
651-
# Create a new pybis `COLLECTION` to store the generated objects
652-
if collection_name.upper() in [c.code for c in project.get_collections()]:
653-
collection_openbis = space.get_collection(
654-
f"/{openbis.username}/{project_name}/{collection_name}".upper()
655-
)
656-
else:
657-
logger.info("Replacing collection code with uppercase and underscores.")
658-
collection_openbis = openbis.new_collection(
659-
code=collection_name.replace(" ", "_").upper(),
660-
type="COLLECTION",
661-
project=project,
662-
)
663-
collection_openbis.save()
664-
665-
# Create a bam_masterdata CollectionType instance for storing parsed results
666-
collection = CollectionType()
667-
# Iterate over each parser and its associated files and store them in `collection`
668-
for parser, files in files_parser.items():
669-
parser.parse(files, collection, logger=logger)
670-
671-
# Map the objects added to CollectionType to objects in openBIS using pyBIS
672-
openbis_id_map = {}
673-
for object_id, object_instance in collection.attached_objects.items():
674-
# Map PropertyTypeAssignment to pybis props dictionary
675-
obj_props = {}
676-
for key in object_instance._properties.keys():
677-
value = getattr(object_instance, key, None)
678-
if value is None or isinstance(value, PropertyTypeAssignment):
679-
continue
680-
obj_props[object_instance._property_metadata[key].code.lower()] = value
681-
682-
object_openbis = openbis.new_object(
683-
type=object_instance.defs.code,
684-
space=space,
685-
project=project,
686-
collection=collection_openbis,
687-
props=obj_props,
688-
)
689-
object_openbis.save()
690-
691-
# save local and openbis IDs to map parent-child relationships
692-
openbis_id_map[object_id] = object_openbis.identifier
693-
click.echo(
694-
f"Object {obj_props.get('$name')} stored in openBIS collection {collection_name}."
695-
)
696-
for _, files in files_parser.items():
697-
# Upload the file to openBIS
698-
try:
699-
dataset = openbis.new_dataset(
700-
type="RAW_DATA",
701-
files=files,
702-
collection=collection_openbis,
703-
)
704-
dataset.save()
705-
except Exception as e:
706-
logger.warning(f"Error uploading files {files} to openBIS: {e}")
707-
continue
708-
click.echo(f"Files uploaded to openBIS collection {collection_name}.")
709-
710-
# Map parent-child relationships
711-
for parent_id, child_id in collection.relationships.values():
712-
if parent_id in openbis_id_map and child_id in openbis_id_map:
713-
parent_openbis_id = openbis_id_map[parent_id]
714-
child_openbis_id = openbis_id_map[child_id]
715-
716-
child_openbis = openbis.get_object(child_openbis_id)
717-
child_openbis.add_parents(parent_openbis_id)
718-
child_openbis.save()
719-
720-
click.echo(
721-
f"Linked child {child_openbis_id} to parent {parent_openbis_id} in collection {collection_name}."
722-
)
723-
724-
725586
@cli.command(
726587
name="parser",
727588
help="Parses a list of files using the specified parsers and stores the results in openBIS.",
@@ -746,7 +607,7 @@ def run_parser(
746607
"--collection-name",
747608
"collection_name", # alias
748609
type=str,
749-
required=True,
610+
required=False,
750611
help="OpenBIS collection name",
751612
)
752613
@click.option(
@@ -770,10 +631,11 @@ def parser(files_parser, project_name, collection_name, space_name):
770631
parse_file_dict[parser_cls].append(filepath)
771632

772633
run_parser(
773-
files_parser=parse_file_dict,
634+
openbis=ologin(url=environ("OPENBIS_URL")),
635+
space_name=space_name,
774636
project_name=project_name,
775637
collection_name=collection_name,
776-
space_name=space_name,
638+
files_parser=parse_file_dict,
777639
)
778640

779641

bam_masterdata/cli/run_parser.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
from pybis import Openbis
2+
3+
from bam_masterdata.logger import logger
4+
from bam_masterdata.metadata.entities import CollectionType, PropertyTypeAssignment
5+
from bam_masterdata.parsing import AbstractParser
6+
7+
8+
def run_parser(
9+
openbis: Openbis | None = None,
10+
space_name: str = "",
11+
project_name: str = "PROJECT",
12+
collection_name: str = "",
13+
files_parser: dict[AbstractParser, list[str]] = {},
14+
) -> None:
15+
"""
16+
Run the parsers on the specified files and collect the results.
17+
login with save_token=True don't forget!!
18+
19+
Args:
20+
openbis (Openbis): An instance of the Openbis class from pyBIS, already logged in.
21+
space_name (str): The space in openBIS where the entities will be stored.
22+
project_name (str): The project in openBIS where the entities will be stored.
23+
collection_name (str): The collection in openBIS where the entities will be stored.
24+
files_parser (dict): A dictionary where keys are parser instances and values are lists of file paths to be parsed. E.g., {MasterdataParserExample(): ["path/to/file.json", "path/to/another_file.json"]}
25+
"""
26+
# Ensure openbis is provided
27+
if openbis is None:
28+
logger.error("An instance of Openbis must be provided for the parser to run.")
29+
return
30+
# Ensure the space, project, and collection are set
31+
if not project_name:
32+
logger.error("The Project name must be specified for the parser to run.")
33+
return
34+
# Ensure the files_parser is not empty
35+
if not files_parser:
36+
logger.error(
37+
"No files or parsers to parse. Please provide valid file paths or contact an Admin to add missing parser."
38+
)
39+
return
40+
41+
# Specify the space
42+
try:
43+
space = openbis.get_space(space_name)
44+
except Exception:
45+
space = None
46+
# If space is not found, use the user space
47+
if space is None:
48+
# user name as default space
49+
for s in openbis.get_spaces():
50+
if s.code.endswith(openbis.username.upper()):
51+
space = s
52+
logger.warning(
53+
f"Space {space_name} does not exist in openBIS. "
54+
f"Loading space for {openbis.username}."
55+
)
56+
break
57+
# no space found
58+
if space is None:
59+
logger.error(
60+
f"No usable Space for {openbis.username} in openBIS. Please create it first or notify an Admin."
61+
)
62+
return
63+
64+
# Get project if `project_name` already exists under the space or create a new one if it does not
65+
if project_name.upper() in [p.code for p in space.get_projects()]:
66+
project = space.get_project(project_name)
67+
else:
68+
logger.info("Replacing project code with uppercase and underscores.")
69+
project = space.new_project(
70+
code=project_name.replace(" ", "_").upper(),
71+
description="New project created via automated parsing with `bam_masterdata`.",
72+
)
73+
project.save()
74+
75+
# Create a new pybis `COLLECTION` to store the generated objects
76+
if not collection_name:
77+
logger.info(
78+
"No Collection name specified. Attaching objects directly to Project."
79+
)
80+
collection_openbis = project
81+
else:
82+
if collection_name.upper() in [c.code for c in project.get_collections()]:
83+
collection_openbis = space.get_collection(
84+
f"/{space_name}/{project_name}/{collection_name}".upper()
85+
)
86+
else:
87+
logger.info("Replacing collection code with uppercase and underscores.")
88+
collection_openbis = openbis.new_collection(
89+
code=collection_name.replace(" ", "_").upper(),
90+
type="DEFAULT_EXPERIMENT",
91+
project=project,
92+
)
93+
collection_openbis.save()
94+
95+
# Create a bam_masterdata CollectionType instance for storing parsed results
96+
collection = CollectionType()
97+
# Iterate over each parser and its associated files and store them in `collection`
98+
for parser, files in files_parser.items():
99+
parser.parse(files, collection, logger=logger)
100+
101+
# Map the objects added to CollectionType to objects in openBIS using pyBIS
102+
openbis_id_map = {}
103+
for object_id, object_instance in collection.attached_objects.items():
104+
# Map PropertyTypeAssignment to pybis props dictionary
105+
obj_props = {}
106+
for key in object_instance._properties.keys():
107+
value = getattr(object_instance, key, None)
108+
if value is None or isinstance(value, PropertyTypeAssignment):
109+
continue
110+
obj_props[object_instance._property_metadata[key].code.lower()] = value
111+
112+
# Check if object already exists in openBIS, and if so, notify and get for updating properties
113+
if not object_instance.code:
114+
if not collection_name:
115+
object_openbis = openbis.new_object(
116+
type=object_instance.defs.code,
117+
space=space,
118+
project=project,
119+
props=obj_props,
120+
)
121+
else:
122+
object_openbis = openbis.new_object(
123+
type=object_instance.defs.code,
124+
space=space,
125+
project=project,
126+
collection=collection_openbis,
127+
props=obj_props,
128+
)
129+
object_openbis.save()
130+
else:
131+
identifier = (
132+
f"/{space_name}/{project_name}/{object_instance.code}"
133+
if not collection_name
134+
else f"/{space_name}/{project_name}/{collection_name}/{object_instance.code}"
135+
)
136+
object_openbis = space.get_object(identifier)
137+
object_openbis.set_props(obj_props)
138+
object_openbis.save()
139+
logger.info(
140+
f"Object {identifier} already exists in openBIS, updating properties."
141+
)
142+
143+
# save local and openbis IDs to map parent-child relationships
144+
openbis_id_map[object_id] = object_openbis.identifier
145+
146+
# Storing files as datasets in openBIS
147+
for files in files_parser.values():
148+
try:
149+
if not collection_name:
150+
# ! This won't work on a project -> datasets only attached to collections in pyBIS
151+
dataset = openbis.new_dataset(
152+
type="RAW_DATA",
153+
files=files,
154+
project=project,
155+
)
156+
else:
157+
dataset = openbis.new_dataset(
158+
type="RAW_DATA",
159+
files=files,
160+
collection=collection_openbis,
161+
)
162+
dataset.save()
163+
except Exception as e:
164+
logger.warning(f"Error uploading files {files} to openBIS: {e}")
165+
continue
166+
logger.info(f"Files uploaded to openBIS collection {collection_name}.")
167+
168+
# Map parent-child relationships
169+
for parent_id, child_id in collection.relationships.values():
170+
if parent_id in openbis_id_map and child_id in openbis_id_map:
171+
parent_openbis_id = openbis_id_map[parent_id]
172+
child_openbis_id = openbis_id_map[child_id]
173+
174+
child_openbis = openbis.get_object(child_openbis_id)
175+
child_openbis.add_parents(parent_openbis_id)
176+
child_openbis.save()
177+
178+
logger.info(
179+
f"Linked child {child_openbis_id} to parent {parent_openbis_id} in collection {collection_name}."
180+
)

0 commit comments

Comments
 (0)