Skip to content
Merged
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.6.9
2.6.10
1 change: 0 additions & 1 deletion src/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
Flask==3.0.3
neo4j==5.20.0
prov==2.0.1
Werkzeug==3.0.3
deepdiff==7.0.1

# For interacting with memcached
Expand Down
10 changes: 10 additions & 0 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,13 @@ class TriggerTypeEnum(Enum):
BEFORE_UPDATE = 'before_update_trigger'
AFTER_CREATE = 'after_create_trigger'
AFTER_UPDATE = 'after_update_trigger'

# Define an enumeration of accepted Neo4j relationship types.
class Neo4jRelationshipEnum(Enum):
ACTIVITY_INPUT = 'ACTIVITY_INPUT'
ACTIVITY_OUTPUT = 'ACTIVITY_INPUT'
IN_COLLECTION = 'IN_COLLECTION'
IN_UPLOAD = 'IN_UPLOAD'
REVISION_OF = 'REVISION_OF'
USES_DATA = 'USES_DATA'

85 changes: 76 additions & 9 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import neo4j
from neo4j.exceptions import TransactionError
from schema.schema_constants import SchemaConstants
from neo4j import Session as Neo4jSession
from schema.schema_constants import SchemaConstants, Neo4jRelationshipEnum
import logging

logger = logging.getLogger(__name__)

# The filed name of the single result record
record_field_name = 'result'


####################################################################################################
## Functions can be called by app.py, schema_manager.py, and schema_triggers.py
####################################################################################################
Expand Down Expand Up @@ -109,7 +110,38 @@ def get_entity(neo4j_driver, uuid):

return result

"""
Given a list of UUIDs, return a dict mapping uuid -> entity_node
Only UUIDs present in Neo4j will be returned.
Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid_list : list of str
The uuids of target entities to retrieve from Neo4j
Returns
-------
dict
A dictionary of entity details returned from the Cypher query, keyed by
the uuid provided in uuid_list.
"""
def identify_existing_dataset_entities(neo4j_driver, dataset_uuid_list:list):

if not dataset_uuid_list:
return {}

query = """
MATCH (e:Entity)
WHERE e.uuid IN $param_uuids
AND e.entity_type='Dataset'
RETURN e.uuid AS uuid
"""

with neo4j_driver.session() as session:
results = session.run(query, param_uuids=dataset_uuid_list)
return [record["uuid"] for record in results]

"""
Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type
Expand Down Expand Up @@ -889,13 +921,11 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list
_delete_collection_linkages_tx(tx=tx
, uuid=collection_uuid)

# Create relationship from each member Dataset node to this Collection node
for dataset_uuid in dataset_uuid_list:
create_relationship_tx(tx=tx
, source_node_uuid=dataset_uuid
, direction='->'
, target_node_uuid=collection_uuid
, relationship='IN_COLLECTION')
_create_relationships_unwind_tx(tx=tx
, source_uuid_list=dataset_uuid_list
, target_uuid=collection_uuid
, relationship=Neo4jRelationshipEnum.IN_COLLECTION
, direction='->')

tx.commit()
except TransactionError as te:
Expand Down Expand Up @@ -1980,6 +2010,43 @@ def create_relationship_tx(tx, source_node_uuid, target_node_uuid, relationship,

result = tx.run(query)

"""
Create multiple relationships between a target node and each node in
a list of source nodes in neo4j
Parameters
----------
tx : neo4j.Session object
The neo4j.Session object instance
source_uuid_list : list[str]
A list of UUIDs for nodes which will have a relationship to the node with target_uuid
target_uuid : str
The UUID of target node
relationship : Neo4jRelationshipEnum
The string for the Neo4j relationship type between each source node and the target node.
direction: str
The relationship direction of each source node to the target node: outgoing `->` or incoming `<-`
Neo4j CQL CREATE command supports only directional relationships
"""
def _create_relationships_unwind_tx(tx:Neo4jSession, source_uuid_list:list, target_uuid:str
, relationship:Neo4jRelationshipEnum, direction:str)->None:
logger.info("====== enter _create_relationships_unwind_tx() ======")
incoming = direction if direction == "<-" else "-"
outgoing = direction if direction == "->" else "-"

query = (
f"MATCH (t {{uuid: $target_uuid}}) "
f"UNWIND $source_uuid_list AS src_uuid "
f"MATCH (s {{uuid: src_uuid}}) "
f"CREATE (s){incoming}[r:{relationship.value}]{outgoing}(t) "
f"RETURN src_uuid AS linked_uuid"
)

result = tx.run( query=query
, target_uuid=target_uuid
, source_uuid_list=source_uuid_list)
logger.info("====== returning from _create_relationships_unwind_tx() ======")

"""
Execute one query to create all outgoing relationships from each node whose
identifier is in the source node list to the target Activity node in neo4j
Expand Down
52 changes: 16 additions & 36 deletions src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ def verify_DOI_pair(property_key, normalized_entity_type, request, existing_data
f" the prefix {SchemaConstants.DOI_BASE_URL}.")

"""
Validate every entity in a list is of entity_type accepted
Validate every entity in a list is of entity_type that can be in a
Collection and already exists in Neo4j.
Parameters
----------
Expand All @@ -318,41 +319,20 @@ def collection_entities_are_existing_datasets(property_key, normalized_entity_ty
# Verify each UUID specified exists in the uuid-api, exists in Neo4j, and is for a Dataset before
# proceeding with creation of Collection.
bad_dataset_uuids = []
for dataset_uuid in new_data_dict['dataset_uuids']:
try:
## The following code duplicates some functionality existing in app.py, in
## query_target_entity(), which also deals with caching. In the future, the
## validation logic shared by this file and app.py should become a utility
## module, shared by validators as well as app.py. But for now, the code
## is repeated for the following.

# Get cached ids if exist otherwise retrieve from UUID-API. Expect an
# Exception to be raised if not found.
dataset_uuid_entity = schema_manager.get_hubmap_ids(id=dataset_uuid)

# If the uuid exists per the uuid-api, make sure it also exists as a Neo4j entity.
uuid = dataset_uuid_entity['uuid']
entity_dict = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance(), dataset_uuid)

# If dataset_uuid is not found in Neo4j or is not for a Dataset, fail the validation.
if not entity_dict:
logger.info(f"Request for {dataset_uuid} inclusion in Collection,"
f" but not found in Neo4j.")
bad_dataset_uuids.append(dataset_uuid)
elif entity_dict['entity_type'] != 'Dataset':
logger.info(f"Request for {dataset_uuid} inclusion in Collection,"
f" but entity_type={entity_dict['entity_type']}, not Dataset.")
bad_dataset_uuids.append(dataset_uuid)
except Exception as nfe:
# If the dataset_uuid is not found, fail the validation.
logger.info(f"Request for {dataset_uuid} inclusion in Collection"
f" failed uuid-api retrieval.")
bad_dataset_uuids.append(dataset_uuid)
# If any uuids in the request dataset_uuids are not for an existing Dataset entity which
# exists in uuid-api and Neo4j, raise an Exception so the validation fails and the
# operation can be rejected.
if bad_dataset_uuids:
raise ValueError(f"Unable to find Datasets for {bad_dataset_uuids}.")
dataset_uuid_list = new_data_dict['dataset_uuids']
if not dataset_uuid_list:
return

existing_datasets_list = schema_neo4j_queries.identify_existing_dataset_entities( neo4j_driver=schema_manager.get_neo4j_driver_instance()
, dataset_uuid_list=dataset_uuid_list)

# If any UUIDs which were passed in do not exist in Neo4j or are not Datasets, identify them
missing_uuid_set = set(dataset_uuid_list) - set(existing_datasets_list)
if missing_uuid_set:
logger.info(f"Only existing Datasets may be included in a Collection:"
f" {sorted(missing_uuid_set)}")
raise ValueError( f"Only existing Datasets may be included in a Collection, not these: "
f" {sorted(missing_uuid_set)}")

"""
Validate the provided value of Dataset.status on update via PUT
Expand Down