Skip to content

Commit 54cdf11

Browse files
committed
first KG method and edits on py2neo
1 parent 97a24ba commit 54cdf11

File tree

3 files changed

+78
-41
lines changed

3 files changed

+78
-41
lines changed

src/dackar/knowledge_graph/KGconstruction.py

Lines changed: 64 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,19 @@
1616
import os, sys
1717
import tomllib
1818
import jsonschema
19+
import copy
1920

2021
class KG:
21-
def __init__(self, config_file_path, import_folder_path, uri, pwd, processedDataFolder):
22+
#def __init__(self, config_file_path, import_folder_path, uri, pwd, user, processedDataFolder):
23+
def __init__(self, config_file_path, uri, pwd, user):
2224
# Change import folder to user specific location
23-
set_neo4j_import_folder(config_file_path, import_folder_path)
25+
26+
#set_neo4j_import_folder(config_file_path, import_folder_path)
2427

25-
self.processedDataFolder = processedDataFolder
28+
#self.processedDataFolder = processedDataFolder
2629

2730
# Create python to neo4j driver
28-
self.py2neo = Py2Neo(uri=uri, user='neo4j', pwd=pwd)
31+
self.py2neo = Py2Neo(uri=uri, user=user, pwd=pwd)
2932

3033
self.graphSchemas = {}
3134

@@ -51,23 +54,42 @@ def schemaValidation(self, constructionSchema):
5154
def genericWorkflow(self, data, constructionSchema):
5255
# Check constructionSchema against self.graphSchemas
5356

54-
# Parse data (pd.dataframe?) and update KG
55-
56-
'''
57-
---- Example of construction schema ----
58-
59-
constructionSchema = {'nodes': nodeConstructionSchema,
60-
'edges': edgeConstructionSchema}
61-
62-
nodeConstructionSchema = {'nodeLabel1': {'property1': 'node.colA', 'property2': 'node.colB'},
63-
'nodeLabel2': {'property1': 'node.colC'}}
64-
65-
edgeConstructionSchema = [{'source': ('nodeLabel1.property1','col1'),
66-
'target': ('nodeLabel2.property1','col2'),
67-
'type': 'edgeType',
68-
'properties': {'property1': 'colAlpha', 'property2': 'colBeta'}}]
69-
'''
70-
57+
# Parse data (pd.dataframe) and update KG
58+
# Nodes
59+
if 'nodes' in constructionSchema:
60+
data_temp = copy.deepcopy(data)
61+
for node in constructionSchema['nodes'].keys():
62+
mapping = {value: key for key, value in constructionSchema['nodes'][node].items()}
63+
data_renamed = data_temp.rename(columns=mapping)
64+
self.py2neo.load_dataframe_for_nodes(df=data_renamed, labels=node, properties=list(mapping.values()))
65+
66+
# Relations
67+
# --> TODO: check nodes exist
68+
if 'relations' in constructionSchema:
69+
data_temp = copy.deepcopy(data)
70+
for rel in constructionSchema['relations']:
71+
source_node_label = next(iter(rel['source'])).split('.')[0]
72+
source_node_prop = next(iter(rel['source'])).split('.')[1]
73+
74+
target_node_label = next(iter(rel['target'])).split('.')[0]
75+
target_node_prop = next(iter(rel['target'])).split('.')[1]
76+
77+
mapping = {}
78+
data_renamed = data_temp.rename(columns={next(iter(rel['source'].values())):source_node_prop,
79+
next(iter(rel['target'].values())):target_node_prop})
80+
81+
for prop in rel['properties'].keys():
82+
data_renamed = data_renamed.rename(columns={rel['properties'][prop]: prop})
83+
84+
data_renamed[source_node_label] = source_node_label
85+
data_renamed[target_node_label] = target_node_label
86+
data_renamed[rel['type']] = rel['type']
87+
88+
self.py2neo.load_dataframe_for_relations(df=data_renamed,
89+
l1=source_node_label, p1=source_node_prop,
90+
l2=target_node_label, p2=target_node_prop,
91+
lr=rel['type'],
92+
pr=list(rel['properties'].keys()))
7193

7294
'''
7395
---- Example of graph schema (toml file) ----
@@ -78,18 +100,32 @@ def genericWorkflow(self, data, constructionSchema):
78100
# Nodes
79101
[node.label1]
80102
description = "This node represents ..."
81-
properties = [{name = "prop1", type = "date", required = bool},
82-
{name = "prop2", type = "string", required = bool},
83-
]
103+
properties = {"prop1": {type="date", required=bool},
104+
"prop2": {type="string", required=bool}}
84105
85106
# Relationships
86107
[relationships.relation1]
87108
description = "relation1 indicates ... "
88109
from_entity = entity1
89110
to_entity = entity2
90-
properties = [{name = "prop1", type = "int" , required = bool},
91-
{name = "prop2", type = "float", required = bool},
92-
]
111+
properties = {"prop1": {type="int", required=bool},
112+
"prop2": {type="float", required=bool}}
113+
114+
115+
116+
---- Example of construction schema ----
117+
118+
constructionSchema = {'nodes': nodeConstructionSchema,
119+
'relations': edgeConstructionSchema}
120+
121+
nodeConstructionSchema = {'nodeLabel1': {'property1': 'node.colA', 'property2': 'node.colB'},
122+
'nodeLabel2': {'property1': 'node.colC'}}
123+
124+
edgeConstructionSchema = [{'source': {'nodeLabel1.property1':'col1'},
125+
'target': {'nodeLabel2.property1':'col2'},
126+
'type' : 'edgeType',
127+
'properties': {'property1': 'colAlpha', 'property2': 'colBeta'}}]
128+
93129
'''
94130

95131
# These are workflows specific to the RIAM project
@@ -155,7 +191,7 @@ def monitoringWorkflow(self, filename, constructionSchema):
155191

156192

157193
def eventReportWorkflow(self, filename, constructionSchema, pipelines):
158-
graphSchemas = TBD
194+
graphSchema = TBD
159195

160196
#TODO: Check constructionSchema against graphSchemas
161197

src/dackar/knowledge_graph/py2neo.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,18 +109,17 @@ def _create_relation(tx, l1, p1, l2, p2, lr, pr):
109109
"""
110110
if pr is not None:
111111
query = f"""
112-
MERGE (l1:{l1} {{ {', '.join([f'{k}:"{v}"' for k, v in p1.items()])} }})
113-
MERGE (l2:{l2} {{ {', '.join([f'{k}:"{v}"' for k, v in p2.items()])} }})
112+
MATCH (l1:{l1} {{ {', '.join([f'{k}:"{v}"' for k, v in p1.items()])} }})
113+
MATCH (l2:{l2} {{ {', '.join([f'{k}:"{v}"' for k, v in p2.items()])} }})
114114
MERGE (l1)-[r:{lr} {{ {', '.join([f'{k}: ${k}' for k in pr.keys()])} }} ]->(l2)
115115
"""
116116
tx.run(query, **pr)
117117
else:
118118
query = f"""
119-
MERGE (l1:{l1} {{ {', '.join([f'{k}:"{v}"' for k, v in p1.items()])} }})
120-
MERGE (l2:{l2} {{ {', '.join([f'{k}:"{v}"' for k, v in p2.items()])} }})
119+
MATCH (l1:{l1} {{ {', '.join([f'{k}:"{v}"' for k, v in p1.items()])} }})
120+
MATCH (l2:{l2} {{ {', '.join([f'{k}:"{v}"' for k, v in p2.items()])} }})
121121
MERGE (l1)-[r:{lr}]->(l2)
122122
"""
123-
# print(query)
124123
tx.run(query)
125124

126125
def find_nodes(self, label, properties=None):
@@ -278,7 +277,7 @@ def load_dataframe_for_nodes(self, df, labels, properties):
278277
"""
279278
assert set(properties).issubset(set(df.columns))
280279
for _, row in df.iterrows():
281-
self.create_node(labels, row.to_dict())
280+
self.create_node(labels, row[properties].to_dict())
282281

283282
# Load csv function to create relations
284283
def load_dataframe_for_relations(self, df, l1='sourceLabel', p1='sourceNodeId', l2='targetLabel', p2='targetNodeId', lr='relationshipType', pr=None):
@@ -297,17 +296,19 @@ def load_dataframe_for_relations(self, df, l1='sourceLabel', p1='sourceNodeId',
297296
# for nodes. Future development need to be performed.
298297
# label (l1/l2), properties (p1/p2), and relation label (lr), relation properties (pr)
299298
valid = []
299+
300300
valid.extend([l1, l2, lr, p1, p2])
301301
if pr is not None:
302302
valid.extend(pr)
303+
303304
assert set(valid).issubset(set(df.columns))
304305

305306
with self.__driver.session() as session:
306307
for _, row in df.iterrows():
307308
l1_ = row[l1]
308-
p1_ = {'nodeId': row[p1]}
309+
p1_ = {p1: row[p1]}
309310
l2_ = row[l2]
310-
p2_ = {'nodeId': row[p2]}
311+
p2_ = {p2: row[p2]}
311312
lr_ = row[lr]
312313
if pr is not None:
313314
pr_ = row[pr].to_dict()

src/dackar/knowledge_graph/schemas/monitoringSystemSchema.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ version = "1.0"
66
# ====================
77

88
[entities.monitoring_variable]
9-
description = "Represents a measurable physical variable that is continuously or periodically recorded by monitoring sensors from
10-
a specific component, equipment,
11-
or system. This entity captures dynamic operational data that reflects the real-time or historical
12-
state of the asset, enabling monitoring, diagnostics, performance analysis, and integration with
13-
predictive models or digital twins within engineering and plant operations contexts."
9+
description = "Represents a measurable physical variable that is continuously or periodically recorded by monitoring
10+
sensors from a specific component, equipment, or system. This entity captures dynamic operational
11+
data that reflects the real-time or historical state of the asset, enabling monitoring, diagnostics,
12+
performance analysis, and integration with predictive models or digital twins within engineering and
13+
plant operations contexts."
1414
properties = [
1515
{ name = "label", type = "string", optional = true },
1616
{ name = "ID", type = "string"},

0 commit comments

Comments
 (0)