Skip to content

Commit 151e0df

Browse files
Merge branch 'develop'
2 parents 53a4fcc + 355e7af commit 151e0df

File tree

56 files changed

+1967
-142
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+1967
-142
lines changed

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ def read(file_path):
88
setup(
99
name = 'shexer',
1010
packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
11-
version = '2.6.5.1',
11+
version = '2.7.0',
1212
description = 'Automatic schema extraction for RDF graphs',
1313
author = 'Daniel Fernandez-Alvarez',
1414
author_email = 'danifdezalvarez@gmail.com',
1515
url = 'https://github.com/DaniFdezAlvarez/shexer',
16-
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.6.5.1.tar.gz',
16+
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.7.0.tar.gz',
1717
keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "shacl", "schema"],
1818
long_description = read('README.md'),
1919
long_description_content_type='text/markdown',

shexer/consts.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,8 @@
5050
"http://xmlns.com/foaf/0.1/": "foaf"
5151
}
5252

53-
# WESO-SHAPES-ONTO
54-
FREQ_PROP = "http://weso.es/shexer/ontology/ratio_property_usage"
53+
# WESO-SHAPES-ONTO # todo !
54+
FREQ_PROP = "http://weso.es/shexer/ontology/ratio_property_usage"
55+
EXTRA_INFO_PROP = "http://www.w3.org/2000/01/rdf-schema#comment"
56+
EXAMPLE_CONFORMANCE_PROP = "http://weso.es/shexer/ontology/conformant_example"
57+
ABSOLUTE_COUNT_PROP = "http://weso.es/shexer/ontology/total_conforming_instances"

shexer/core/shexing/class_shexer.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22

3-
from shexer.consts import RDF_TYPE, SHAPES_DEFAULT_NAMESPACE, FREQ_PROP
3+
from shexer.consts import RDF_TYPE, SHAPES_DEFAULT_NAMESPACE, FREQ_PROP, ABSOLUTE_COUNT_PROP, EXTRA_INFO_PROP
44
from shexer.core.shexing.strategy.direct_shexing_strategy import DirectShexingStrategy
55
from shexer.core.shexing.strategy.direct_and_inverse_shexing_strategy import DirectAndInverseShexingStrategy
66
from shexer.utils.target_elements import determine_original_target_nodes_if_needed
@@ -18,8 +18,9 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
1818
allow_opt_cardinality=True, disable_exact_cardinality=False,
1919
shapes_namespace=SHAPES_DEFAULT_NAMESPACE, inverse_paths=False,
2020
decimals=-1, instances_report_mode=RATIO_INSTANCES, detect_minimal_iri=False,
21-
class_min_iris_dict=None, allow_redundant_or=False, shape_names_dict=None, frequency_property=FREQ_PROP,
22-
comments_to_annotations=False):
21+
class_min_iris_dict=None, allow_redundant_or=False, shape_names_dict=None,
22+
frequency_property=FREQ_PROP, comments_to_annotations=False, extra_info_property=EXTRA_INFO_PROP,
23+
absolute_count_property=ABSOLUTE_COUNT_PROP):
2324
self._class_counts_dict = class_counts_dict
2425
self._class_profile_dict = class_profile_dict if class_profile_dict is not None else self._load_class_profile_dict_from_file(
2526
class_profile_json_file)
@@ -45,6 +46,8 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
4546
self._shape_names_dict = shape_names_dict if shape_names_dict is not None else {}
4647
self._frequency_property = frequency_property
4748
self._comments_to_annotations = comments_to_annotations
49+
self._extra_info_property = extra_info_property
50+
self._absolute_count_property = absolute_count_property
4851

4952
self._original_target_nodes = determine_original_target_nodes_if_needed(remove_empty_shapes=remove_empty_shapes,
5053
original_target_classes=original_target_classes,

shexer/core/shexing/class_shexer_fed_sources.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from shexer.core.shexing.class_shexer import ClassShexer
2-
from shexer.consts import RDF_TYPE, SHAPES_DEFAULT_NAMESPACE, FREQ_PROP, RATIO_INSTANCES
2+
from shexer.consts import RDF_TYPE, SHAPES_DEFAULT_NAMESPACE, FREQ_PROP, RATIO_INSTANCES, ABSOLUTE_COUNT_PROP, EXTRA_INFO_PROP
33
from shexer.core.instances.pconsts import FEDERATION_TAG_MARK
44

55
_COMMENT_FED_PROPERTY = "# Constraint only observed in {}"
@@ -15,14 +15,15 @@ def __init__(self, class_counts_dict, class_profile_dict=None, class_profile_jso
1515
allow_opt_cardinality=True, disable_exact_cardinality=False, shapes_namespace=SHAPES_DEFAULT_NAMESPACE,
1616
inverse_paths=False, decimals=-1, instances_report_mode=RATIO_INSTANCES, detect_minimal_iri=False,
1717
class_min_iris_dict=None, allow_redundant_or=False, fed_sources=None, shape_names_dict=None,
18-
frequency_property=FREQ_PROP, comments_to_annotations=False):
18+
frequency_property=FREQ_PROP, comments_to_annotations=False, extra_info_property=EXTRA_INFO_PROP,
19+
absolute_counts_prop=ABSOLUTE_COUNT_PROP):
1920
super().__init__(class_counts_dict, class_profile_dict, class_profile_json_file, remove_empty_shapes,
2021
original_target_classes, original_shape_map, discard_useless_constraints_with_positive_closure,
2122
keep_less_specific, all_compliant_mode, instantiation_property, disable_or_statements,
2223
disable_comments, namespaces_dict, tolerance_to_keep_similar_rules, allow_opt_cardinality,
2324
disable_exact_cardinality, shapes_namespace, inverse_paths, decimals, instances_report_mode,
2425
detect_minimal_iri, class_min_iris_dict, allow_redundant_or, shape_names_dict,
25-
frequency_property, comments_to_annotations)
26+
frequency_property, comments_to_annotations, extra_info_property, absolute_counts_prop)
2627
self._fed_sources = fed_sources
2728

2829
def shex_classes(self, acceptance_threshold=0,

shexer/core/shexing/strategy/abstract_shexing_strategy.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ def __init__(self, class_shexer):
3939
disable_comments=self._disable_comments,
4040
frequency_property=self._class_shexer._frequency_property,
4141
namespaces_dict=self._namespaces_dict,
42-
comments_to_annotations=class_shexer._comments_to_annotations)
42+
comments_to_annotations=class_shexer._comments_to_annotations,
43+
extra_info_property=self._class_shexer._extra_info_property,
44+
absolute_count_property=self._class_shexer._absolute_count_property)
4345

4446

4547
def yield_base_shapes(self, acceptance_threshold):

shexer/io/shacl/formater/shacl_serializer.py

Lines changed: 94 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
from shexer.model.shape import STARTING_CHAR_FOR_SHAPE_NAME
33
from rdflib import Graph, Namespace, URIRef, RDF, BNode, XSD, Literal
44
from shexer.model.statement import POSITIVE_CLOSURE, KLEENE_CLOSURE, OPT_CARDINALITY
5-
from shexer.utils.literal import XSD_NAMESPACE, LANG_STRING_TYPE
5+
from shexer.utils.literal import XSD_NAMESPACE, LANG_STRING_TYPE, decide_literal_type
66
from shexer.model.const_elem_types import IRI_ELEM_TYPE, LITERAL_ELEM_TYPE, DOT_ELEM_TYPE, BNODE_ELEM_TYPE
77
from shexer.io.wikidata import wikidata_annotation
88
from wlighter import TURTLE_FORMAT
9-
from shexer.model.node_selector import NodeSelectorSparql, NodeSelectorNoSparql
9+
from shexer.model.node_selector import NodeSelectorSparql
1010
from shexer.utils.log import log_msg
11+
from shexer.consts import RATIO_INSTANCES, EXAMPLE_CONFORMANCE_PROP, ABSOLUTE_COUNT_PROP, EXTRA_INFO_PROP, FREQ_PROP, \
12+
SHAPE_EXAMPLES, ALL_EXAMPLES, MIXED_INSTANCES, ABSOLUTE_INSTANCES, CONSTRAINT_EXAMPLES
1113

1214
_EXPECTED_SHAPE_BEGINING = STARTING_CHAR_FOR_SHAPE_NAME + "<"
1315
_EXPECTED_SHAPE_ENDING = ">"
@@ -56,7 +58,11 @@ class ShaclSerializer(object):
5658

5759
def __init__(self, target_file, shapes_list, namespaces_dict=None, string_return=False,
5860
instantiation_property_str=RDF_TYPE_STR, wikidata_annotation=False,
59-
detect_minimal_iri=False, shape_example_features=None, shape_map=None, verbose=False):
61+
detect_minimal_iri=False, shape_example_features=None, shape_map=None, verbose=False,
62+
instances_report_mode=RATIO_INSTANCES, examples_mode=None, inverse_paths=False,
63+
example_constraint_prop=EXAMPLE_CONFORMANCE_PROP, comments_to_annotations=False,
64+
absolute_counts_prop=ABSOLUTE_COUNT_PROP, extra_info_prop=EXTRA_INFO_PROP,
65+
frequency_prop=FREQ_PROP):
6066
self._target_file = target_file
6167
self._namespaces_dict = namespaces_dict if namespaces_dict is not None else {}
6268
self._shapes_list = shapes_list
@@ -68,6 +74,15 @@ def __init__(self, target_file, shapes_list, namespaces_dict=None, string_return
6874
self._shape_map = shape_map
6975
self._verbose = verbose
7076

77+
self._instances_report_mode = instances_report_mode
78+
self._examples_mode = examples_mode
79+
self._inverse_paths = inverse_paths
80+
self._example_constraint_prop = URIRef(example_constraint_prop)
81+
self._generate_annotations = comments_to_annotations
82+
self._absolute_counts_prop = URIRef(absolute_counts_prop)
83+
self._extra_info_prop = URIRef(extra_info_prop)
84+
self._frequency_prop = URIRef(frequency_prop)
85+
7186
self._g_shapes = Graph()
7287

7388
# self._uri_dict = {}
@@ -128,8 +143,26 @@ def _add_shape(self, shape):
128143
shape=shape)
129144
self._add_shape_constraints(shape=shape,
130145
r_shape_uri=r_shape_uri)
146+
self._add_shape_annotations(shape=shape,
147+
r_shape_uri=r_shape_uri)
131148

132-
149+
def _add_shape_annotations(self, shape, r_shape_uri):
150+
if not self._generate_annotations:
151+
return
152+
if self._examples_mode in [SHAPE_EXAMPLES, ALL_EXAMPLES]:
153+
self._add_shape_example(shape, r_shape_uri)
154+
if self._instances_report_mode in [MIXED_INSTANCES, ABSOLUTE_INSTANCES]:
155+
self._add_shape_counts(shape, r_shape_uri)
156+
157+
def _add_shape_example(self, shape, r_shape_uri):
158+
self._add_triple(r_shape_uri,
159+
self._example_constraint_prop,
160+
URIRef(self._shape_example_features.shape_example(shape_id=shape.class_uri)))
161+
162+
def _add_shape_counts(self, shape, r_shape_uri):
163+
self._add_triple(r_shape_uri,
164+
self._absolute_counts_prop,
165+
Literal(shape.n_instances))
133166
def _add_target_class(self, shape, r_shape_uri):
134167
if self._shape_map is None:
135168
if shape.class_uri is not None:
@@ -152,18 +185,58 @@ def _literal_iri_pattern(self, shape):
152185
def _add_shape_constraints(self, shape, r_shape_uri):
153186
for a_statement in shape.yield_statements():
154187
self._add_constraint(statement=a_statement,
155-
r_shape_uri=r_shape_uri)
188+
r_shape_uri=r_shape_uri,
189+
shape=shape)
156190

157191
def _is_instantiation_property(self, str_property):
158192
return str_property == self._instantiation_property_str
159193

160-
def _add_constraint(self, statement, r_shape_uri):
194+
def _add_constraint(self, statement, r_shape_uri, shape):
195+
r_constraint_node = self._generate_bnode()
161196
if self._is_instantiation_property(statement.st_property):
162197
self._add_instantiation_constraint(statement=statement,
163-
r_shape_uri=r_shape_uri)
198+
r_shape_uri=r_shape_uri,
199+
r_constraint_node=r_constraint_node)
164200
else:
165201
self._add_regular_constraint(statement=statement,
166-
r_shape_uri=r_shape_uri)
202+
r_shape_uri=r_shape_uri,
203+
r_constraint_node=r_constraint_node,
204+
shape=shape)
205+
206+
if not self._generate_annotations:
207+
return
208+
if self._instances_report_mode in [MIXED_INSTANCES, RATIO_INSTANCES]:
209+
self._add_constraint_ratio_annotation(statement, r_constraint_node)
210+
if self._instances_report_mode in [MIXED_INSTANCES, ABSOLUTE_INSTANCES]:
211+
self._add_constraint_absolutes_annotation(statement, r_constraint_node)
212+
213+
def _add_constraint_ratio_annotation(self, statement, r_constraint_node):
214+
self._add_triple(r_constraint_node,
215+
self._frequency_prop,
216+
Literal(statement.probability, datatype=XSD.decimal))
217+
218+
def _add_constraint_absolutes_annotation(self, statement, r_constraint_node):
219+
self._add_triple(r_constraint_node,
220+
self._absolute_counts_prop,
221+
Literal(statement.n_occurences))
222+
223+
def _add_constraint_example(self, statement, r_constraint_node, shape):
224+
triple_obj = self._get_example_of_triple_obj(statement, shape)
225+
self._add_triple(r_constraint_node,
226+
self._example_constraint_prop,
227+
triple_obj)
228+
229+
def _get_example_of_triple_obj(self, statement, shape):
230+
if self._inverse_paths:
231+
candidate = self._shape_example_features.get_constraint_example(shape_id=shape.class_uri,
232+
prop=statement.st_property,
233+
inverse=statement.is_inverse)
234+
else:
235+
candidate = self._shape_example_features.get_constraint_example(shape_id=shape.class_uri,
236+
prop=statement.st_property)
237+
if candidate.startswith("http://"):
238+
return URIRef(candidate)
239+
return Literal(candidate, datatype=decide_literal_type(candidate))
167240

168241
def _add_exactly_one_cardinality(self, r_constraint_node):
169242
self._add_min_occurs(r_constraint_node=r_constraint_node,
@@ -178,8 +251,7 @@ def _add_in_instance(self, r_constraint_node, statement):
178251
self._add_triple(list_seed_node, RDF.first, target_node)
179252
self._add_triple(list_seed_node, RDF.rest, RDF.nil)
180253

181-
def _add_instantiation_constraint(self, statement, r_shape_uri):
182-
r_constraint_node = self._generate_bnode()
254+
def _add_instantiation_constraint(self, statement, r_shape_uri, r_constraint_node):
183255
self._add_bnode_property(r_shape_uri=r_shape_uri,
184256
r_constraint_node=r_constraint_node)
185257
self._add_direct_path(statement=statement,
@@ -188,8 +260,7 @@ def _add_instantiation_constraint(self, statement, r_shape_uri):
188260
self._add_in_instance(statement=statement,
189261
r_constraint_node=r_constraint_node)
190262

191-
def _add_regular_constraint(self, statement, r_shape_uri):
192-
r_constraint_node = self._generate_bnode()
263+
def _add_regular_constraint(self, statement, r_shape_uri, r_constraint_node, shape):
193264
self._add_bnode_property(r_shape_uri=r_shape_uri,
194265
r_constraint_node=r_constraint_node)
195266
self._add_node_type(statement=statement,
@@ -198,6 +269,17 @@ def _add_regular_constraint(self, statement, r_shape_uri):
198269
r_constraint_node=r_constraint_node)
199270
self._add_path(statement=statement,
200271
r_constraint_node=r_constraint_node)
272+
if self._generate_annotations:
273+
self._add_statement_comments(statement=statement,
274+
r_constraint_node=r_constraint_node)
275+
if self._examples_mode in [ALL_EXAMPLES, CONSTRAINT_EXAMPLES]:
276+
self._add_constraint_example(statement, r_constraint_node, shape)
277+
278+
def _add_statement_comments(self, statement, r_constraint_node):
279+
for a_comment in statement.comments:
280+
self._add_triple(r_constraint_node,
281+
self._extra_info_prop,
282+
Literal(a_comment))
201283

202284
def _add_path(self, statement, r_constraint_node):
203285
if not statement.is_inverse:

shexer/io/shex/formater/consts.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@
66
POSITIVE_CLOSURE = "+"
77
KLEENE_CLOSURE = "*"
88
OPT_CARDINALITY = "?"
9-
SHAPE_LINK_CHAR = "@"
9+
SHAPE_LINK_CHAR = "@"
10+
ANNOTATION_BEGIN = "//"

0 commit comments

Comments
 (0)