Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,13 @@ class TriggerTypeEnum(Enum):
BEFORE_UPDATE = 'before_update_trigger'
AFTER_CREATE = 'after_create_trigger'
AFTER_UPDATE = 'after_update_trigger'

# Define an enumeration of accepted Neo4j relationship types.
class Neo4jRelationshipEnum(Enum):
ACTIVITY_INPUT = 'ACTIVITY_INPUT'
ACTIVITY_OUTPUT = 'ACTIVITY_INPUT'
IN_COLLECTION = 'IN_COLLECTION'
IN_UPLOAD = 'IN_UPLOAD'
REVISION_OF = 'REVISION_OF'
USES_DATA = 'USES_DATA'

86 changes: 76 additions & 10 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import neo4j
from neo4j.exceptions import TransactionError
from schema.schema_constants import SchemaConstants
from neo4j import Session as Neo4jSession
from schema.schema_constants import SchemaConstants, Neo4jRelationshipEnum
import logging

logger = logging.getLogger(__name__)

# The filed name of the single result record
record_field_name = 'result'


####################################################################################################
## Functions can be called by app.py, schema_manager.py, and schema_triggers.py
####################################################################################################
Expand Down Expand Up @@ -109,7 +110,38 @@ def get_entity(neo4j_driver, uuid):

return result

"""
Given a list of UUIDs, return a dict mapping uuid -> entity_node
Only UUIDs present in Neo4j will be returned.

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid_list : list of str
The uuids of target entities to retrieve from Neo4j

Returns
-------
dict
A dictionary of entity details returned from the Cypher query, keyed by
the uuid provided in uuid_list.
"""
def identify_existing_dataset_entities(neo4j_driver, dataset_uuid_list:list):

if not dataset_uuid_list:
return {}

query = """
MATCH (e:Entity)
WHERE e.uuid IN $param_uuids
AND e.entity_type='Dataset'
RETURN e.uuid AS uuid
"""

with neo4j_driver.session() as session:
results = session.run(query, param_uuids=dataset_uuid_list)
return [record["uuid"] for record in results]

"""
Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type
Expand Down Expand Up @@ -884,18 +916,15 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list
try:
with neo4j_driver.session() as session:
tx = session.begin_transaction()

# First delete all the old linkages between this Collection and its member Datasets
_delete_collection_linkages_tx(tx=tx
, uuid=collection_uuid)

# Create relationship from each member Dataset node to this Collection node
for dataset_uuid in dataset_uuid_list:
create_relationship_tx(tx=tx
, source_node_uuid=dataset_uuid
, direction='->'
, target_node_uuid=collection_uuid
, relationship='IN_COLLECTION')
_create_relationships_unwind_tx(tx=tx
, source_uuid_list=dataset_uuid_list
, target_uuid=collection_uuid
, relationship=Neo4jRelationshipEnum.IN_COLLECTION
, direction='->')

tx.commit()
except TransactionError as te:
Expand Down Expand Up @@ -1980,6 +2009,43 @@ def create_relationship_tx(tx, source_node_uuid, target_node_uuid, relationship,

result = tx.run(query)

"""
Create multiple relationships between a target node and each node in
a list of source nodes in neo4j

Parameters
----------
tx : neo4j.Session object
The neo4j.Session object instance
source_uuid_list : list[str]
A list of UUIDs for nodes which will have a relationship to the node with target_uuid
target_uuid : str
The UUID of target node
relationship : Neo4jRelationshipEnum
The string for the Neo4j relationship type between each source node and the target node.
direction: str
The relationship direction of each source node to the target node: outgoing `->` or incoming `<-`
Neo4j CQL CREATE command supports only directional relationships
"""
def _create_relationships_unwind_tx(tx:Neo4jSession, source_uuid_list:list, target_uuid:str
, relationship:Neo4jRelationshipEnum, direction:str)->None:
logger.info("====== enter _create_relationships_unwind_tx() ======")
incoming = direction if direction == "<-" else "-"
outgoing = direction if direction == "->" else "-"

query = (
f"MATCH (t:Collection {{uuid: $target_uuid}}) "
f"UNWIND $source_uuid_list AS src_uuid "
f"MATCH (s:Dataset {{uuid: src_uuid}}) "
f"CREATE (s){incoming}[r:{relationship.value}]{outgoing}(t) "
f"RETURN src_uuid AS linked_uuid"
)

result = tx.run( query=query
, target_uuid=target_uuid
, source_uuid_list=source_uuid_list)
logger.info("====== returning from _create_relationships_unwind_tx() ======")

"""
Execute one query to create all outgoing relationships from each node whose
identifier is in the source node list to the target Activity node in neo4j
Expand Down
52 changes: 16 additions & 36 deletions src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ def verify_DOI_pair(property_key, normalized_entity_type, request, existing_data
f" the prefix {SchemaConstants.DOI_BASE_URL}.")

"""
Validate every entity in a list is of entity_type accepted
Validate every entity in a list is of entity_type that can be in a
Collection and already exists in Neo4j.

Parameters
----------
Expand All @@ -318,41 +319,20 @@ def collection_entities_are_existing_datasets(property_key, normalized_entity_ty
# Verify each UUID specified exists in the uuid-api, exists in Neo4j, and is for a Dataset before
# proceeding with creation of Collection.
bad_dataset_uuids = []
for dataset_uuid in new_data_dict['dataset_uuids']:
try:
## The following code duplicates some functionality existing in app.py, in
## query_target_entity(), which also deals with caching. In the future, the
## validation logic shared by this file and app.py should become a utility
## module, shared by validators as well as app.py. But for now, the code
## is repeated for the following.

# Get cached ids if exist otherwise retrieve from UUID-API. Expect an
# Exception to be raised if not found.
dataset_uuid_entity = schema_manager.get_hubmap_ids(id=dataset_uuid)

# If the uuid exists per the uuid-api, make sure it also exists as a Neo4j entity.
uuid = dataset_uuid_entity['uuid']
entity_dict = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance(), dataset_uuid)

# If dataset_uuid is not found in Neo4j or is not for a Dataset, fail the validation.
if not entity_dict:
logger.info(f"Request for {dataset_uuid} inclusion in Collection,"
f" but not found in Neo4j.")
bad_dataset_uuids.append(dataset_uuid)
elif entity_dict['entity_type'] != 'Dataset':
logger.info(f"Request for {dataset_uuid} inclusion in Collection,"
f" but entity_type={entity_dict['entity_type']}, not Dataset.")
bad_dataset_uuids.append(dataset_uuid)
except Exception as nfe:
# If the dataset_uuid is not found, fail the validation.
logger.info(f"Request for {dataset_uuid} inclusion in Collection"
f" failed uuid-api retrieval.")
bad_dataset_uuids.append(dataset_uuid)
# If any uuids in the request dataset_uuids are not for an existing Dataset entity which
# exists in uuid-api and Neo4j, raise an Exception so the validation fails and the
# operation can be rejected.
if bad_dataset_uuids:
raise ValueError(f"Unable to find Datasets for {bad_dataset_uuids}.")
dataset_uuid_list = new_data_dict['dataset_uuids']
if not dataset_uuid_list:
return

existing_datasets_list = schema_neo4j_queries.identify_existing_dataset_entities( neo4j_driver=schema_manager.get_neo4j_driver_instance()
, dataset_uuid_list=dataset_uuid_list)

# If any UUIDs which were passed in do not exist in Neo4j or are not Datasets, identify them
missing_uuid_set = set(dataset_uuid_list) - set(existing_datasets_list)
if missing_uuid_set:
logger.info(f"Only existing Datasets may be included in a Collection:"
f" {sorted(missing_uuid_set)}")
raise ValueError( f"Only existing Datasets may be included in a Collection, not these: "
f" {sorted(missing_uuid_set)}")

"""
Validate the provided value of Dataset.status on update via PUT
Expand Down