Skip to content

Commit e524d90

Browse files
committed
updates on schema checks
1 parent cb9421d commit e524d90

File tree

2 files changed

+53
-20
lines changed

2 files changed

+53
-20
lines changed

src/dackar/knowledge_graph/KGconstruction.py

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ def __init__(self, config_file_path, uri, pwd, user):
2828

2929
#self.processedDataFolder = processedDataFolder
3030

31+
self.datatypes = ['string', 'int', 'float', 'boolean', 'datetime']
32+
3133
# Create python to neo4j driver
3234
self.py2neo = Py2Neo(uri=uri, user=user, pwd=pwd)
3335

@@ -49,13 +51,22 @@ def __init__(self, config_file_path, uri, pwd, user):
4951
}
5052
}
5153
},
52-
"required":["node_description"]
54+
"required":["node_description","node_properties"]
5355
},
5456
"relation": {"description": "Data element encapsulated in the edge",
5557
"type": "object",
5658
"properties" : {"relation_description": {"type": "string", "description": "Type of relationship encapsulated in the relation between two nodes"},
5759
"from_entity": {"type": "string", "description": "Label of the departure node"},
58-
"to_entity" : {"type": "string", "description": "Label of the arrival node"}},
60+
"to_entity" : {"type": "string", "description": "Label of the arrival node"},
61+
"relation_properties": {"type": "array",
62+
"description": "Allowed properties associate with the relation",
63+
"items": {"type": "object",
64+
"properties": {"name" : {"type": "string", "description": "Name of the relation property"},
65+
"type" : {"type": "string", "description": "Type of the node property"},
66+
"optional": {"type": "boolean", "description": "Specifies if this property is required or not"}},
67+
"required":["name","type","optional"],
68+
}
69+
}},
5970
"required":["relation_description","from_entity","to_entity"],
6071
}
6172
},
@@ -70,7 +81,7 @@ def __init__(self, config_file_path, uri, pwd, user):
7081
def resetGraph(self):
7182
self.py2neo.reset()
7283

73-
def checkSchemaStructure(self, importedSchema):
84+
def _checkSchemaStructure(self, importedSchema):
7485
try:
7586
validate(instance=importedSchema, schema=self.schemaSchema)
7687
print("TOML content is valid against the schema.")
@@ -86,13 +97,16 @@ def importGraphSchema(self, graphSchemaName, tomlFilename):
8697

8798
with open(config_path, 'rb') as f:
8899
config_data = tomllib.load(f)
100+
101+
# Check structure of imported graphSchema
102+
self._checkSchemaStructure(config_data)
89103

90-
self.checkSchemaStructure(config_data)
104+
#TODO: check datatypes against self.datatypes
91105

92-
# Check structure of imported graphSchema
93-
for node in config_data['node'].keys():
94-
pass
95106
# Check imported graphSchema against self.graphSchemas
107+
if graphSchemaName in list(self.graphSchemas.keys()):
108+
print('Schema ' + str(graphSchemaName) + ' is already defined in the exisiting schemas')
109+
96110
for node in config_data['node'].keys():
97111
for schema in self.graphSchemas:
98112
if node in schema['node'].keys():
@@ -106,11 +120,33 @@ def importGraphSchema(self, graphSchemaName, tomlFilename):
106120
self.graphSchemas[graphSchemaName] = config_data
107121
return config_data
108122

109-
def schemaValidation(self, constructionSchema):
123+
def _schemaReturnNodeProperties(self, nodeLabel):
124+
for schema in self.graphSchemas:
125+
if nodeLabel in schema['node'].keys():
126+
node_properties = schema['node'][nodeLabel]['node_properties']
127+
df = pd.DataFrame(node_properties)
128+
return df
129+
print('Node not found')
130+
return None
131+
132+
def _schemaValidation(self, constructionSchema):
133+
# For each node check that required properties are listed
110134
for node in constructionSchema['nodes']:
111-
for schema in self.graphSchemas:
112-
if node in schema['node'].keys():
113-
pass
135+
specified_prop = set(constructionSchema['nodes'][node].keys())
136+
137+
prop_df = self._schemaReturnNodeProperties(node)
138+
allowed_properties = set(prop_df['name'])
139+
140+
selected_prop_df = prop_df[prop_df['optional']==False]
141+
req_properties = set(selected_prop_df['name'])
142+
143+
if not req_properties.issubset(specified_prop):
144+
print('Node ' + str(node) + 'requires all these properties: ' + str(req_properties))
145+
146+
if not specified_prop.issubset(allowed_properties):
147+
print('Node ' + str(node) + 'requires these properties: ' + str(allowed_properties))
148+
149+
# For each relation check that required properties are listed
114150

115151
def genericWorkflow(self, data, constructionSchema):
116152
# Check constructionSchema against self.graphSchemas
@@ -260,7 +296,7 @@ def eventReportWorkflow(self, filename, constructionSchema, pipelines):
260296

261297
def kgConstructionWorkflow(self, dataframe, graphSchema, constructionSchema):
262298

263-
self.schemaValidation(self, constructionSchema, graphSchema)
299+
self._schemaValidation(self, constructionSchema, graphSchema)
264300

265301
for node in constructionSchema['nodes'].keys():
266302
map = {value: key for key, value in constructionSchema['nodes'][node].items()}

src/dackar/knowledge_graph/schemas/conditionReportSchema.toml

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,13 @@ version = 1.0
99
node_description = """Represents a structured report documenting an observed abnormal event or condition within
1010
a plant or operational environment. This entity captures descriptive information provided
1111
by plant staff, including the nature, context, and potential implications of the anomaly."""
12-
node_properties = [
13-
{name = "date", type = "string", optional = false},
14-
{name = "ID", type = "string", optional = false},
15-
]
12+
node_properties = [{name = "date", type = "string", optional = false},
13+
{name = "ID", type = "string", optional = false}]
1614

1715
[node.work_order]
1816
description = """[]"""
19-
edge_properties = [
20-
{name = "date", type = "string", optional = false},
21-
{name = "ID", type = "string", optional = false},
22-
]
17+
node_properties = [{name = "date", type = "string", optional = false},
18+
{name = "ID", type = "string", optional = false}]
2319

2420
# ====================
2521
# Relations
@@ -32,6 +28,7 @@ relation_description = """Indicates that a generalized nuclear-relevant entity s
3228
a condition report"""
3329
from_entity = "condition_report"
3430
to_entity = "nuclear_entity"
31+
relation_properties = [{name = "date", type = "string", optional = false}]
3532

3633

3734
[relation.mentions]

0 commit comments

Comments
 (0)