@@ -28,6 +28,8 @@ def __init__(self, config_file_path, uri, pwd, user):
2828
2929 #self.processedDataFolder = processedDataFolder
3030
31+ self .datatypes = ['string' , 'int' , 'float' , 'boolean' , 'datetime' ]
32+
3133 # Create python to neo4j driver
3234 self .py2neo = Py2Neo (uri = uri , user = user , pwd = pwd )
3335
@@ -49,13 +51,22 @@ def __init__(self, config_file_path, uri, pwd, user):
4951 }
5052 }
5153 },
52- "required" :["node_description" ]
54+ "required" :["node_description" , "node_properties" ]
5355 },
5456 "relation" : {"description" : "Data element encapsulated in the edge" ,
5557 "type" : "object" ,
5658 "properties" : {"relation_description" : {"type" : "string" , "description" : "Type of relationship encapsulated in the relation between two nodes" },
5759 "from_entity" : {"type" : "string" , "description" : "Label of the departure node" },
58- "to_entity" : {"type" : "string" , "description" : "Label of the arrival node" }},
60+ "to_entity" : {"type" : "string" , "description" : "Label of the arrival node" },
61+ "relation_properties" : {"type" : "array" ,
62+ "description" : "Allowed properties associate with the relation" ,
63+ "items" : {"type" : "object" ,
64+ "properties" : {"name" : {"type" : "string" , "description" : "Name of the relation property" },
65+ "type" : {"type" : "string" , "description" : "Type of the node property" },
66+ "optional" : {"type" : "boolean" , "description" : "Specifies if this property is required or not" }},
67+ "required" :["name" ,"type" ,"optional" ],
68+ }
69+ }},
5970 "required" :["relation_description" ,"from_entity" ,"to_entity" ],
6071 }
6172 },
@@ -70,7 +81,7 @@ def __init__(self, config_file_path, uri, pwd, user):
7081 def resetGraph (self ):
7182 self .py2neo .reset ()
7283
73- def checkSchemaStructure (self , importedSchema ):
84+ def _checkSchemaStructure (self , importedSchema ):
7485 try :
7586 validate (instance = importedSchema , schema = self .schemaSchema )
7687 print ("TOML content is valid against the schema." )
@@ -86,13 +97,16 @@ def importGraphSchema(self, graphSchemaName, tomlFilename):
8697
8798 with open (config_path , 'rb' ) as f :
8899 config_data = tomllib .load (f )
100+
101+ # Check structure of imported graphSchema
102+ self ._checkSchemaStructure (config_data )
89103
90- self .checkSchemaStructure ( config_data )
104+ #TODO: check datatypes against self.datatypes
91105
92- # Check structure of imported graphSchema
93- for node in config_data ['node' ].keys ():
94- pass
95106 # Check imported graphSchema against self.graphSchemas
107+ if graphSchemaName in list (self .graphSchemas .keys ()):
108+ print ('Schema ' + str (graphSchemaName ) + ' is already defined in the exisiting schemas' )
109+
96110 for node in config_data ['node' ].keys ():
97111 for schema in self .graphSchemas :
98112 if node in schema ['node' ].keys ():
@@ -106,11 +120,33 @@ def importGraphSchema(self, graphSchemaName, tomlFilename):
106120 self .graphSchemas [graphSchemaName ] = config_data
107121 return config_data
108122
109- def schemaValidation (self , constructionSchema ):
123+ def _schemaReturnNodeProperties (self , nodeLabel ):
124+ for schema in self .graphSchemas :
125+ if nodeLabel in schema ['node' ].keys ():
126+ node_properties = schema ['node' ][nodeLabel ]['node_properties' ]
127+ df = pd .DataFrame (node_properties )
128+ return df
129+ print ('Node not found' )
130+ return None
131+
132+ def _schemaValidation (self , constructionSchema ):
133+ # For each node check that required properties are listed
110134 for node in constructionSchema ['nodes' ]:
111- for schema in self .graphSchemas :
112- if node in schema ['node' ].keys ():
113- pass
135+ specified_prop = set (constructionSchema ['nodes' ][node ].keys ())
136+
137+ prop_df = self ._schemaReturnNodeProperties (node )
138+ allowed_properties = set (prop_df ['name' ])
139+
140+ selected_prop_df = prop_df [prop_df ['optional' ]== False ]
141+ req_properties = set (selected_prop_df ['name' ])
142+
143+ if not req_properties .issubset (specified_prop ):
144+ print ('Node ' + str (node ) + 'requires all these properties: ' + str (req_properties ))
145+
146+ if not specified_prop .issubset (allowed_properties ):
147+ print ('Node ' + str (node ) + 'requires these properties: ' + str (allowed_properties ))
148+
149+ # For each relation check that required properties are listed
114150
115151 def genericWorkflow (self , data , constructionSchema ):
116152 # Check constructionSchema against self.graphSchemas
@@ -260,7 +296,7 @@ def eventReportWorkflow(self, filename, constructionSchema, pipelines):
260296
261297 def kgConstructionWorkflow (self , dataframe , graphSchema , constructionSchema ):
262298
263- self .schemaValidation (self , constructionSchema , graphSchema )
299+ self ._schemaValidation (self , constructionSchema , graphSchema )
264300
265301 for node in constructionSchema ['nodes' ].keys ():
266302 map = {value : key for key , value in constructionSchema ['nodes' ][node ].items ()}
0 commit comments