Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interface with webprotege #53

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
61 changes: 60 additions & 1 deletion bam_masterdata/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import click
from decouple import config as environ
from openpyxl import Workbook
from rdflib import Graph

from bam_masterdata.cli.entities_to_excel import entities_to_excel
from bam_masterdata.cli.entities_to_json import entities_to_json
from bam_masterdata.cli.entities_to_rdf import entities_to_rdf
from bam_masterdata.cli.fill_masterdata import MasterdataCodeGenerator
from bam_masterdata.logger import logger
from bam_masterdata.utils import (
Expand Down Expand Up @@ -167,7 +169,7 @@ def export_to_excel(force_delete, python_path):
)

# Process the modules and save the entities to the openBIS masterdata Excel file
masterdata_file = os.path.join(".", "artifacts", "masterdata.xlsx")
masterdata_file = os.path.join(export_dir, "masterdata.xlsx")
wb = Workbook()
for i, module_path in enumerate(py_modules):
if i == 0:
Expand All @@ -190,5 +192,62 @@ def export_to_excel(force_delete, python_path):
click.echo(f"All masterdata have been generated and saved to {masterdata_file}")


@cli.command(
name="export_to_rdf",
help="Export entities to a RDF/XML file in the path `./artifacts/bam_masterdata.owl`.",
)
@click.option(
"--force-delete",
type=bool,
required=False,
default=False,
help="""
(Optional) If set to `True`, it will delete the current `./artifacts/` folder and create a new one. Default is `False`.
""",
)
@click.option(
"--python-path",
type=str,
required=False,
default=DATAMODEL_DIR,
help="""
(Optional) The path to the individual Python module or the directory containing the Python modules to process the datamodel.
Default is `./bam_masterdata/datamodel/`.
""",
)
def export_to_rdf(force_delete, python_path):
# Get the directories from the Python modules and the export directory for the static artifacts
export_dir = os.path.join(".", "artifacts")

# Delete and create the export directory
delete_and_create_dir(
directory_path=export_dir,
logger=logger,
force_delete=force_delete,
)

# Get the Python modules to process the datamodel
py_modules = listdir_py_modules(directory_path=python_path, logger=logger)
# ! Remove the module containing 'vocabulary_types.py'
py_modules = [
module for module in py_modules if "vocabulary_types.py" not in module
]

# Process each module using the `to_json` method of each entity
graph = Graph()
for module_path in py_modules:
entities_to_rdf(graph=graph, module_path=module_path, logger=logger)

# Saving RDF/XML to file
rdf_output = graph.serialize(format="pretty-xml")
masterdata_file = os.path.join(export_dir, "masterdata.owl")
with open(masterdata_file, "w", encoding="utf-8") as f:
f.write(rdf_output)

click.echo(
f"All masterdata has been generated in RDF/XML format and saved to {masterdata_file}"
)


if __name__ == "__main__":
cli()
213 changes: 213 additions & 0 deletions bam_masterdata/cli/entities_to_rdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
import inspect
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from rdflib import Graph
from structlog._config import BoundLoggerLazyProxy

import click
from rdflib import BNode, Literal, Namespace
from rdflib.namespace import DC, OWL, RDF, RDFS

from bam_masterdata.utils import code_to_class_name, import_module

BAM = Namespace("https://bamresearch.github.io/bam-masterdata/")
PROV = Namespace("http://www.w3.org/ns/prov#")


def rdf_graph_init(g: "Graph") -> None:
# Adding base namespaces
g.bind("dc", DC)
g.bind("owl", OWL)
g.bind("rdf", RDF)
g.bind("rdfs", RDFS)
g.bind("bam", BAM)
g.bind("prov", PROV)

# Adding annotation properties from base namespaces
annotation_props = [
RDFS.label,
RDFS.comment,
DC.identifier,
]
for prop in annotation_props:
g.add((prop, RDF.type, OWL.AnnotationProperty))

# Custom annotation properties from openBIS: `dataType`, `propertyLabel
custom_annotation_props = {
BAM[
"dataType"
]: """Represents the data type of a property as defined in the openBIS platform.
This annotation is used to ensure alignment with the native data types in openBIS,
facilitating seamless integration and data exchange.

The allowed values for this annotation correspond directly to the openBIS type system,
including BOOLEAN, CONTROLLEDVOCABULARY, DATE, HYPERLINK, INTEGER, MULTILINE_VARCHAR, OBJECT,
REAL, TIMESTAMP, VARCHAR, and XML.

While `bam:dataType` is primarily intended for internal usage with openBIS, mappings to
standard vocabularies such as `xsd` (e.g., `xsd:boolean`, `xsd:string`) are possible to use and documented to
enhance external interoperability. The full mapping is:
- BOOLEAN: xsd:boolean
- CONTROLLEDVOCABULARY: xsd:string
- DATE: xsd:date
- HYPERLINK: xsd:anyURI
- INTEGER: xsd:integer
- MULTILINE_VARCHAR: xsd:string
- OBJECT: bam:ObjectType
- REAL: xsd:decimal
- TIMESTAMP: xsd:dateTime
- VARCHAR: xsd:string
- XML: xsd:string""",
BAM[
"propertyLabel"
]: """A UI-specific annotation used in openBIS to provide an alternative label for a property
displayed in the frontend. Not intended for semantic reasoning or interoperability beyond openBIS.""",
}
for custom_prop, custom_prop_def in custom_annotation_props.items():
g.add((custom_prop, RDF.type, OWL.AnnotationProperty))
g.add(
(
custom_prop,
RDFS.label,
Literal(f"bam:{custom_prop.split('/')[-1]}", lang="en"),
)
)
g.add((custom_prop, RDFS.comment, Literal(custom_prop_def, lang="en")))

# Internal BAM properties
# ? `section`, `ordinal`, `show_in_edit_views`?
bam_props_uri = {
BAM["hasMandatoryProperty"]: [
(RDF.type, OWL.ObjectProperty),
# (RDFS.domain, OWL.Class),
(RDFS.range, BAM.PropertyType),
(RDFS.label, Literal("hasMandatoryProperty", lang="en")),
(
RDFS.comment,
Literal(
"The property must be mandatorily filled when creating the object in openBIS.",
lang="en",
),
),
],
BAM["hasOptionalProperty"]: [
(RDF.type, OWL.ObjectProperty),
# (RDFS.domain, OWL.Class),
(RDFS.range, BAM.PropertyType),
(RDFS.label, Literal("hasOptionalProperty", lang="en")),
(
RDFS.comment,
Literal(
"The property is optionally filled when creating the object in openBIS.",
lang="en",
),
),
],
BAM["referenceTo"]: [
(RDF.type, OWL.ObjectProperty),
(RDFS.domain, BAM.PropertyType), # Restricting domain to PropertyType
# (RDFS.range, OWL.Class), # Explicitly setting range to ObjectType
(RDFS.label, Literal("referenceTo", lang="en")),
(
RDFS.comment,
Literal(
"The property is referencing an object existing in openBIS.",
lang="en",
),
),
],
}
for prop_uri, obj_properties in bam_props_uri.items():
for prop in obj_properties: # type: ignore
g.add((prop_uri, prop[0], prop[1])) # type: ignore

# Adding base PropertyType and other objects as placeholders
# ! add only PropertyType
prop_type_description = """A conceptual placeholder used to define and organize properties as first-class entities.
PropertyType is used to place properties and define their metadata, separating properties from the
entities they describe.

In integration scenarios:
- PropertyType can align with `BFO:Quality` for inherent attributes.
- PropertyType can represent `BFO:Role` if properties serve functional purposes.
- PropertyType can be treated as a `prov:Entity` when properties participate in provenance relationships."""
for entity in ["PropertyType", "ObjectType", "CollectionType", "DatasetType"]:
entity_uri = BAM[entity]
g.add((entity_uri, RDF.type, OWL.Thing))
g.add((entity_uri, RDFS.label, Literal(entity, lang="en")))
if entity == "PropertyType":
g.add((entity_uri, RDFS.comment, Literal(prop_type_description, lang="en")))


def entities_to_rdf(
graph: "Graph", module_path: str, logger: "BoundLoggerLazyProxy"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does the "Graph" class work? Does it inserts everything inside the file already nested?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graph is the class to describe the triples in notologies, and hence when printing to RDF/XML already has the format looked for

"triples" are normally 2 nodes connected via a relationship. Something like: (node_1, relationship, node_2), which you can see some examples when using Graph.add(). Basically it is a way of defining DAGs.

Here the complication is not printing to RDF/XML, or to create the Graph, but actually mapping the openBIS info into the triples.

) -> None:
rdf_graph_init(graph)

module = import_module(module_path=module_path)

# Special case of `PropertyTypeDef` in `property_types.py`
# PROPERTY TYPES
# skos:prefLabel used for class names
# skos:definition used for `description` (en, de)
# skos:altLabel used for `property_label`
# dc:identifier used for `code` # ! only defined for internal codes with $ symbol
# dc:type used for `data_type`
if "property_types.py" in module_path:
for name, obj in inspect.getmembers(module):
if name.startswith("_") or name == "PropertyTypeDef":
continue
prop_uri = BAM[obj.id]

# Define the property as an OWL class inheriting from PropertyType
graph.add((prop_uri, RDF.type, OWL.Thing))
graph.add((prop_uri, RDFS.subClassOf, BAM.PropertyType))

# Add attributes like id, code, description in English and Deutsch, property_label, data_type
graph.add((prop_uri, RDFS.label, Literal(obj.id, lang="en")))
graph.add((prop_uri, DC.identifier, Literal(obj.code)))
descriptions = obj.description.split("//")
if len(descriptions) > 1:
graph.add((prop_uri, RDFS.comment, Literal(descriptions[0], lang="en")))
graph.add((prop_uri, RDFS.comment, Literal(descriptions[1], lang="de")))
else:
graph.add((prop_uri, RDFS.comment, Literal(obj.description, lang="en")))
graph.add(
(prop_uri, BAM.propertyLabel, Literal(obj.property_label, lang="en"))
)
graph.add((prop_uri, BAM.dataType, Literal(obj.data_type.value)))
if obj.data_type.value == "OBJECT":
# entity_ref_uri = BAM[code_to_class_name(obj.object_code)]
# graph.add((prop_uri, BAM.referenceTo, entity_ref_uri))
entity_ref_uri = BAM[code_to_class_name(obj.object_code)]

# Create a restriction with referenceTo
restriction = BNode()
graph.add((restriction, RDF.type, OWL.Restriction))
graph.add((restriction, OWL.onProperty, BAM["referenceTo"]))
graph.add((restriction, OWL.someValuesFrom, entity_ref_uri))

# Add the restriction as a subclass of the property
graph.add((prop_uri, RDFS.subClassOf, restriction))
return None

# All other datamodel modules
# OBJECT/DATASET/COLLECTION TYPES
# skos:prefLabel used for class names
# skos:definition used for `description` (en, de)
# dc:identifier used for `code` # ! only defined for internal codes with $ symbol
# parents defined from `code`
# assigned properties can be Mandatory or Optional, can be PropertyType or ObjectType
# ? For OBJECT TYPES
# ? `generated_code_prefix`, `auto_generated_codes`?
for name, obj in inspect.getmembers(module, inspect.isclass):
# Ensure the class has the `to_json` method
if not hasattr(obj, "defs") or not callable(getattr(obj, "to_rdf")):
continue
try:
# Instantiate the class and call the method
entity = obj()
entity.to_rdf(namespace=BAM, graph=graph)
except Exception as err:
click.echo(f"Failed to process class {name} in {module_path}: {err}")
31 changes: 30 additions & 1 deletion bam_masterdata/metadata/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from pydantic import BaseModel, Field, field_validator, model_validator

from bam_masterdata.utils import code_to_class_name


class DataType(str, Enum):
"""Enumeration of the data types available in openBIS."""
Expand Down Expand Up @@ -74,6 +76,14 @@ class EntityDef(BaseModel):
""",
)

id: Optional[str] = Field(
default=None,
description="""
Identifier of the entity defined as the class name and used to serialize the entity definitions
in other formats.
""",
)

# TODO check ontology_id, ontology_version, ontology_annotation_id, internal (found in the openBIS docu)

@field_validator("code")
Expand Down Expand Up @@ -115,7 +125,26 @@ def excel_headers(self) -> list[str]:
"""
Returns the headers for the entity in a format suitable for the openBIS Excel file.
"""
return [k.capitalize().replace("_", " ") for k in self.model_fields.keys()]
return [
k.capitalize().replace("_", " ")
for k in self.model_fields.keys()
if k != "id"
]

@model_validator(mode="after")
@classmethod
def model_id(cls, data: Any) -> Any:
"""
Stores the model `id` as the class name from the `code` field.

Args:
data (Any): The data containing the fields values to validate.

Returns:
Any: The data with the validated fields.
"""
data.id = code_to_class_name(data.code)
return data


class BaseObjectTypeDef(EntityDef):
Expand Down
Loading
Loading