1616import os , sys
1717import tomllib
1818import jsonschema
19+ import copy
1920
2021class KG :
21- def __init__ (self , config_file_path , import_folder_path , uri , pwd , processedDataFolder ):
22+ #def __init__(self, config_file_path, import_folder_path, uri, pwd, user, processedDataFolder):
23+ def __init__ (self , config_file_path , uri , pwd , user ):
2224 # Change import folder to user specific location
23- set_neo4j_import_folder (config_file_path , import_folder_path )
25+
26+ #set_neo4j_import_folder(config_file_path, import_folder_path)
2427
25- self .processedDataFolder = processedDataFolder
28+ # self.processedDataFolder = processedDataFolder
2629
2730 # Create python to neo4j driver
28- self .py2neo = Py2Neo (uri = uri , user = 'neo4j' , pwd = pwd )
31+ self .py2neo = Py2Neo (uri = uri , user = user , pwd = pwd )
2932
3033 self .graphSchemas = {}
3134
@@ -51,23 +54,42 @@ def schemaValidation(self, constructionSchema):
5154 def genericWorkflow (self , data , constructionSchema ):
5255 # Check constructionSchema against self.graphSchemas
5356
54- # Parse data (pd.dataframe?) and update KG
55-
56- '''
57- ---- Example of construction schema ----
58-
59- constructionSchema = {'nodes': nodeConstructionSchema,
60- 'edges': edgeConstructionSchema}
61-
62- nodeConstructionSchema = {'nodeLabel1': {'property1': 'node.colA', 'property2': 'node.colB'},
63- 'nodeLabel2': {'property1': 'node.colC'}}
64-
65- edgeConstructionSchema = [{'source': ('nodeLabel1.property1','col1'),
66- 'target': ('nodeLabel2.property1','col2'),
67- 'type': 'edgeType',
68- 'properties': {'property1': 'colAlpha', 'property2': 'colBeta'}}]
69- '''
70-
57+ # Parse data (pd.dataframe) and update KG
58+ # Nodes
59+ if 'nodes' in constructionSchema :
60+ data_temp = copy .deepcopy (data )
61+ for node in constructionSchema ['nodes' ].keys ():
62+ mapping = {value : key for key , value in constructionSchema ['nodes' ][node ].items ()}
63+ data_renamed = data_temp .rename (columns = mapping )
64+ self .py2neo .load_dataframe_for_nodes (df = data_renamed , labels = node , properties = list (mapping .values ()))
65+
66+ # Relations
67+ # --> TODO: check nodes exist
68+ if 'relations' in constructionSchema :
69+ data_temp = copy .deepcopy (data )
70+ for rel in constructionSchema ['relations' ]:
71+ source_node_label = next (iter (rel ['source' ])).split ('.' )[0 ]
72+ source_node_prop = next (iter (rel ['source' ])).split ('.' )[1 ]
73+
74+ target_node_label = next (iter (rel ['target' ])).split ('.' )[0 ]
75+ target_node_prop = next (iter (rel ['target' ])).split ('.' )[1 ]
76+
77+ mapping = {}
78+ data_renamed = data_temp .rename (columns = {next (iter (rel ['source' ].values ())):source_node_prop ,
79+ next (iter (rel ['target' ].values ())):target_node_prop })
80+
81+ for prop in rel ['properties' ].keys ():
82+ data_renamed = data_renamed .rename (columns = {rel ['properties' ][prop ]: prop })
83+
84+ data_renamed [source_node_label ] = source_node_label
85+ data_renamed [target_node_label ] = target_node_label
86+ data_renamed [rel ['type' ]] = rel ['type' ]
87+
88+ self .py2neo .load_dataframe_for_relations (df = data_renamed ,
89+ l1 = source_node_label , p1 = source_node_prop ,
90+ l2 = target_node_label , p2 = target_node_prop ,
91+ lr = rel ['type' ],
92+ pr = list (rel ['properties' ].keys ()))
7193
7294 '''
7395 ---- Example of graph schema (toml file) ----
@@ -78,18 +100,32 @@ def genericWorkflow(self, data, constructionSchema):
78100 # Nodes
79101 [node.label1]
80102 description = "This node represents ..."
81- properties = [{name = "prop1", type = "date", required = bool},
82- {name = "prop2", type = "string", required = bool},
83- ]
103+ properties = {"prop1": {type="date", required=bool},
104+ "prop2": {type="string", required=bool}}
84105
85106 # Relationships
86107 [relationships.relation1]
87108 description = "relation1 indicates ... "
88109 from_entity = entity1
89110 to_entity = entity2
90- properties = [{name = "prop1", type = "int" , required = bool},
91- {name = "prop2", type = "float", required = bool},
92- ]
111+ properties = {"prop1": {type="int", required=bool},
112+ "prop2": {type="float", required=bool}}
113+
114+
115+
116+ ---- Example of construction schema ----
117+
118+ constructionSchema = {'nodes': nodeConstructionSchema,
119+ 'relations': edgeConstructionSchema}
120+
121+ nodeConstructionSchema = {'nodeLabel1': {'property1': 'node.colA', 'property2': 'node.colB'},
122+ 'nodeLabel2': {'property1': 'node.colC'}}
123+
124+ edgeConstructionSchema = [{'source': {'nodeLabel1.property1':'col1'},
125+ 'target': {'nodeLabel2.property1':'col2'},
126+ 'type' : 'edgeType',
127+ 'properties': {'property1': 'colAlpha', 'property2': 'colBeta'}}]
128+
93129 '''
94130
95131 # These are workflows specific to the RIAM project
@@ -155,7 +191,7 @@ def monitoringWorkflow(self, filename, constructionSchema):
155191
156192
157193 def eventReportWorkflow (self , filename , constructionSchema , pipelines ):
158- graphSchemas = TBD
194+ graphSchema = TBD
159195
160196 #TODO: Check constructionSchema against graphSchemas
161197
0 commit comments