Skip to content

Commit 0d779c3

Browse files
Merge branch 'develop'
2 parents d85dc7a + 01eed8d commit 0d779c3

24 files changed

+235
-80
lines changed

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ def read(file_path):
88
setup(
99
name = 'shexer',
1010
packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
11-
version = '2.7.1',
11+
version = '2.7.2',
1212
description = 'Automatic schema extraction for RDF graphs',
1313
author = 'Daniel Fernandez-Alvarez',
1414
author_email = 'danifdezalvarez@gmail.com',
1515
url = 'https://github.com/DaniFdezAlvarez/shexer',
16-
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.7.1.tar.gz',
16+
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.7.2.tar.gz',
1717
keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "shacl", "schema"],
1818
long_description = read('README.md'),
1919
long_description_content_type='text/markdown',
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from shexer.io.graph.yielder.base_triples_yielder import BaseTriplesYielder
2+
from shexer.io.graph.yielder.multifile_base_triples_yielder import MultifileBaseTripleYielder
3+
from shexer.utils.triple_yielders import tune_subj, tune_prop, tune_token
4+
from shexer.utils.uri import remove_corners
5+
import tempfile
6+
from pathlib import Path
7+
import re
8+
from shexer.utils.exception import ParseError
9+
10+
WHITESPACES= re.compile(r"\s+")
11+
12+
import lightrdf
13+
14+
15+
class LightTurtleTriplesYielder(BaseTriplesYielder):
16+
17+
def __init__(self, source_file, raw_graph, namespaces_dict):
18+
super().__init__()
19+
self._prefixes = {}
20+
self._source_file = source_file
21+
self._raw_graph = raw_graph
22+
self._namespaces_dict = namespaces_dict if namespaces_dict is not None else {}
23+
self._yielded_triples = 0
24+
25+
def _yield_triples(self):
26+
self._extract_prefixes()
27+
parser = lightrdf.turtle.Parser()
28+
try:
29+
for s, p, o in parser.parse(self._source_file, base_iri=None):
30+
yield (
31+
tune_subj(s),
32+
tune_prop(p),
33+
tune_token(o)
34+
)
35+
self._yielded_triples += 1
36+
except BaseException as e:
37+
raise ParseError(f"Error while parsing: {e}") from e
38+
39+
def yield_triples(self):
40+
if self._raw_graph is not None:
41+
with tempfile.TemporaryDirectory() as tmpdir:
42+
self._source_file = Path(tmpdir) / "data.nt"
43+
self._source_file.write_text(self._raw_graph, encoding="utf-8")
44+
self._source_file = str(self._source_file)
45+
for a_triple in self._yield_triples():
46+
yield a_triple
47+
else:
48+
for a_triple in self._yield_triples():
49+
yield a_triple
50+
51+
@property
52+
def yielded_triples(self):
53+
return self._yielded_triples
54+
55+
@property
56+
def error_triples(self): # No error triples in this parser, it crashes when finding an error
57+
return 0
58+
59+
def _extract_prefixes(self):
60+
with open(self._source_file, "r", encoding="utf-8") as f:
61+
for line in f:
62+
line = line.strip()
63+
line = WHITESPACES.sub(" ", line)
64+
if line.startswith("@prefix"):
65+
self._process_prefix_line(line)
66+
else: # if declarations are not consecutive and at the beginning, it will not work
67+
break
68+
69+
def _process_prefix_line(self, line):
70+
pieces = line.split(" ")
71+
prefix = pieces[1] if not pieces[1].endswith(":") else pieces[1][: - 1]
72+
base_url = remove_corners(pieces[2])
73+
if base_url not in self._namespaces_dict:
74+
self._namespaces_dict[base_url] = prefix
75+
76+
class MultiLightTurtleTriplesYielder(MultifileBaseTripleYielder):
77+
def __init__(self, list_of_files, namespaces_dict):
78+
super(MultiLightTurtleTriplesYielder, self).__init__(
79+
list_of_files=list_of_files,
80+
namespaces_to_ignore=None,
81+
allow_untyped_numbers=False,
82+
compression_mode=None,
83+
zip_base_archive=None)
84+
self._namespaces_dict = namespaces_dict
85+
86+
def _constructor_file_yielder(self, a_source_file, parse_namespaces=False):
87+
return LightTurtleTriplesYielder(source_file=a_source_file,
88+
namespaces_dict=self._namespaces_dict,
89+
raw_graph=None)
90+

shexer/io/graph/yielder/nt_triples_yielder.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,51 @@
22
from shexer.utils.literal import there_is_arroba_after_last_quotes
33
from shexer.utils.triple_yielders import tune_prop, tune_token # , check_if_property_belongs_to_namespace_list
44
from shexer.io.graph.yielder.base_triples_yielder import BaseTriplesYielder
5+
from shexer.utils.exception import ParseError
6+
57

68

79
class NtTriplesYielder(BaseTriplesYielder):
810

911
def __init__(self, source_file=None, allow_untyped_numbers=False, raw_graph=None,
10-
compression_mode=None, zip_base_archive=None):
12+
compression_mode=None, zip_base_archive=None, silent_error=False):
1113

1214
super(NtTriplesYielder, self).__init__()
1315
self._source_file = source_file
1416
self._raw_graph = raw_graph
1517
self._triples_count = 0
1618
self._error_triples = 0
1719
self._allow_untyped_numbers = allow_untyped_numbers
20+
self._silent_error = silent_error
1821
self._line_reader = self._decide_line_reader(source_file=source_file,
1922
raw_graph=raw_graph,
2023
compression_mode=compression_mode,
2124
zip_base_archive=zip_base_archive)
22-
# The following ones are refs to functions. Im avoiding some comparison here.
25+
# The following ones are refs to functions. I'm avoiding some comparison here.
2326
# self.yield_triples = self._yield_triples_not_excluding_namespaces if namespaces_to_ignore is None \
2427
# else self._yield_triples_excluding_namespaces
2528

2629
def yield_triples(self):
2730
self._reset_count()
28-
for a_line in self._line_reader.read_lines():
29-
tokens = self._look_for_tokens(a_line.strip())
30-
if len(tokens) != 3:
31-
self._error_triples += 1
32-
log_msg(verbose=False, msg="This line was discarded: " + a_line)
33-
else:
34-
yield (tune_token(a_token=tokens[0]),
35-
tune_prop(a_token=tokens[1]),
36-
tune_token(a_token=tokens[2],
37-
allow_untyped_numbers=self._allow_untyped_numbers))
38-
self._triples_count += 1
31+
try:
32+
for a_line in self._line_reader.read_lines():
33+
a_line = a_line.strip()
34+
if a_line != "":
35+
tokens = self._look_for_tokens(a_line)
36+
if len(tokens) != 3:
37+
if self._silent_error:
38+
self._error_triples += 1
39+
log_msg(verbose=False, msg="This line was discarded: " + a_line)
40+
else:
41+
raise ParseError(f"Line: '{a_line}'")
42+
else:
43+
yield (tune_token(a_token=tokens[0]),
44+
tune_prop(a_token=tokens[1]),
45+
tune_token(a_token=tokens[2],
46+
allow_untyped_numbers=self._allow_untyped_numbers))
47+
self._triples_count += 1
48+
except BaseException as e:
49+
raise ParseError(f"Error while parsing. {e}") from e
3950

4051
def _look_for_tokens(self, str_line):
4152
result = []

shexer/utils/exception.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
class ParseError(Exception):
2+
pass

shexer/utils/factories/triple_yielders_factory.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from shexer.io.graph.yielder.big_ttl_triples_yielder import BigTtlTriplesYielder
1010
from shexer.io.graph.yielder.multi_big_ttl_files_triple_yielder import MultiBigTtlTriplesYielder
1111
from shexer.io.graph.yielder.multi_zip_triples_yielder import MultiZipTriplesYielder
12+
from shexer.io.graph.yielder.light_turtle_triples_yielder import LightTurtleTriplesYielder, MultiLightTurtleTriplesYielder
1213
from shexer.utils.factories.shape_map_parser_factory import get_shape_map_parser
1314
from shexer.model.graph.endpoint_sgraph import EndpointSGraph
1415
from shexer.utils.translators.list_of_classes_to_shape_map import ListOfClassesToShapeMap
@@ -110,6 +111,12 @@ def get_triple_yielder(source_file=None, list_of_source_files=None, input_format
110111
list_of_files=list_of_source_files,
111112
compression_mode=compression_mode,
112113
zip_base_archives=zip_base_archives)
114+
115+
elif input_format in [TURTLE_ITER, TURTLE] and compression_mode is None:
116+
result = _yielder_for_turtle_light(source_file=source_file,
117+
list_of_files=list_of_source_files,
118+
namespaces_dict=namespaces_dict,
119+
raw_graph=raw_graph)
113120
elif input_format == TURTLE_ITER:
114121
result = _yielder_for_turtle_iter(source_file=source_file,
115122
allow_untyped_numbers=allow_untyped_numbers,
@@ -194,6 +201,14 @@ def _yielder_for_turtle_iter(source_file, raw_graph, allow_untyped_numbers, list
194201
allow_untyped_numbers=allow_untyped_numbers,
195202
compression_mode=compression_mode)
196203

204+
def _yielder_for_turtle_light(source_file, list_of_files, namespaces_dict, raw_graph):
205+
if list_of_files is None:
206+
return LightTurtleTriplesYielder(source_file=source_file,
207+
raw_graph=raw_graph,
208+
namespaces_dict=namespaces_dict)
209+
return MultiLightTurtleTriplesYielder(list_of_files=list_of_files,
210+
namespaces_dict=namespaces_dict)
211+
197212

198213
def _yielder_for_tsv_spo(source_file, raw_graph, allow_untyped_numbers, list_of_files,
199214
compression_mode, zip_base_archives):

test/t_files/annotations/one_class_all_examples_stats.ttl

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,30 @@
1010
:Person a sh:NodeShape ;
1111
shexer:conformant_example ex:Jimmy ;
1212
shexer:total_conforming_instances 5 ;
13+
sh:pattern "^http://example.org/" ;
1314
sh:property [ a sh:PropertyShape ;
15+
shexer:conformant_example "22"^^xsd:string ;
16+
shexer:ratio_property_usage 0.8 ;
17+
shexer:total_conforming_instances 4 ;
18+
rdfs:comment "# 80.0 % (4 instances). obj: xsd:integer. Cardinality: {1}" ;
19+
sh:dataType xsd:integer ;
20+
sh:maxCount 1 ;
21+
sh:path foaf:age ],
22+
[ a sh:PropertyShape ;
1423
shexer:conformant_example "Jimmy"^^xsd:string ;
1524
shexer:ratio_property_usage 0.6 ;
1625
shexer:total_conforming_instances 3 ;
1726
rdfs:comment "# 60.0 % (3 instances). obj: xsd:string. Cardinality: {1}" ;
1827
sh:dataType xsd:string ;
1928
sh:maxCount 1 ;
2029
sh:path foaf:name ],
30+
[ a sh:PropertyShape ;
31+
shexer:ratio_property_usage 1.0 ;
32+
shexer:total_conforming_instances 5 ;
33+
sh:in ( foaf:Person ) ;
34+
sh:maxCount 1 ;
35+
sh:minCount 1 ;
36+
sh:path rdf:type ],
2137
[ a sh:PropertyShape ;
2238
shexer:conformant_example "Jones"^^xsd:string ;
2339
shexer:ratio_property_usage 0.4 ;
@@ -33,20 +49,5 @@
3349
rdfs:comment "# 20.0 % (1 instance). obj: @:Person. Cardinality: {1}" ;
3450
sh:maxCount 1 ;
3551
sh:node :Person ;
36-
sh:path foaf:knows ],
37-
[ a sh:PropertyShape ;
38-
shexer:conformant_example "22"^^xsd:string ;
39-
shexer:ratio_property_usage 0.8 ;
40-
shexer:total_conforming_instances 4 ;
41-
rdfs:comment "# 80.0 % (4 instances). obj: xsd:integer. Cardinality: {1}" ;
42-
sh:dataType xsd:integer ;
43-
sh:maxCount 1 ;
44-
sh:path foaf:age ],
45-
[ a sh:PropertyShape ;
46-
shexer:ratio_property_usage 1.0 ;
47-
shexer:total_conforming_instances 5 ;
48-
sh:in ( foaf:Person ) ;
49-
sh:maxCount 1 ;
50-
sh:minCount 1 ;
51-
sh:path rdf:type ] ;
52+
sh:path foaf:knows ] ;
5253
sh:targetClass foaf:Person .
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
21
_:Alice <http://example.org/knows> _:Bob .
3-
_:Alice a <http://example.org/person> .
2+
_:Alice <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/person> .
43

54
_:Bob <http://example.org/knows> _:Eve .
6-
_:Bob a <http://example.org/person> .
5+
_:Bob <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/person> .
76
_:Bob <http://example.org/name> "Bob" .
87

98
_:Eve <http://example.org/name> "Eve" .
10-
_:Eve a <http://example.org/person> .
9+
_:Eve <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/person> .

test/t_files/bnodes/bnode_people.ttl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ _:Alice ex:knows _:Bob ;
44
a ex:person .
55

66
_:Bob ex:knows _:Eve ;
7-
a ex:person .
7+
a ex:person ;
88
ex:name "Bob" .
99

1010
_:Eve ex:name "Eve" ;

test/t_files/bnodes/or_with_redundant_bnodes_and_shapes.shex

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ PREFIX : <http://weso.es/shapes/>
88

99
:person
1010
{
11+
rdf:type [ex:person] ;
1112
ex:knows BNode OR @:person ;
12-
rdf:type [ex:person]
13+
ex:name xsd:string ?
1314
}

test/t_files/bnodes/people_some_bnodes_dont_have_shape.ttl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ _:Alice ex:knows _:Bob ;
44
a ex:person .
55

66
_:Bob ex:knows _:Eve ;
7-
a ex:person .
7+
a ex:person ;
88
ex:name "Bob" .
99

1010
_:Eve ex:name "Eve" .

0 commit comments

Comments
 (0)