weso
diff --git a/‎setup.py‎
Lines changed: 2 additions & 2 deletions b/‎setup.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎shexer/io/graph/yielder/light_turtle_triples_yielder.py‎
Lines changed: 90 additions & 0 deletions b/‎shexer/io/graph/yielder/light_turtle_triples_yielder.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎shexer/io/graph/yielder/nt_triples_yielder.py‎
Lines changed: 24 additions & 13 deletions b/‎shexer/io/graph/yielder/nt_triples_yielder.py‎
Lines changed: 24 additions & 13 deletions
diff --git a/‎shexer/utils/exception.py‎
Lines changed: 2 additions & 0 deletions b/‎shexer/utils/exception.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎shexer/utils/factories/triple_yielders_factory.py‎
Lines changed: 15 additions & 0 deletions b/‎shexer/utils/factories/triple_yielders_factory.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎test/t_files/annotations/one_class_all_examples_stats.ttl‎
Lines changed: 17 additions & 16 deletions b/‎test/t_files/annotations/one_class_all_examples_stats.ttl‎
Lines changed: 17 additions & 16 deletions
diff --git a/‎test/t_files/bnodes/bnode_people.nt‎
Lines changed: 3 additions & 4 deletions b/‎test/t_files/bnodes/bnode_people.nt‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎test/t_files/bnodes/bnode_people.ttl‎
Lines changed: 1 addition & 1 deletion b/‎test/t_files/bnodes/bnode_people.ttl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/t_files/bnodes/or_with_redundant_bnodes_and_shapes.shex‎
Lines changed: 2 additions & 1 deletion b/‎test/t_files/bnodes/or_with_redundant_bnodes_and_shapes.shex‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎test/t_files/bnodes/people_some_bnodes_dont_have_shape.ttl‎
Lines changed: 1 addition & 1 deletion b/‎test/t_files/bnodes/people_some_bnodes_dont_have_shape.ttl‎
Lines changed: 1 addition & 1 deletion
@@ -8,12 +8,12 @@ def read(file_path):
 setup(
   name = 'shexer',
   packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
-  version = '2.7.1',
+  version = '2.7.2',
   description = 'Automatic schema extraction for RDF graphs',
   author = 'Daniel Fernandez-Alvarez',
   author_email = 'danifdezalvarez@gmail.com',
   url = 'https://github.com/DaniFdezAlvarez/shexer',
-  download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.7.1.tar.gz',
+  download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.7.2.tar.gz',
   keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "shacl", "schema"],
   long_description = read('README.md'),
   long_description_content_type='text/markdown',
 
@@ -0,0 +1,90 @@
+from shexer.io.graph.yielder.base_triples_yielder import BaseTriplesYielder
+from shexer.io.graph.yielder.multifile_base_triples_yielder import MultifileBaseTripleYielder
+from shexer.utils.triple_yielders import tune_subj, tune_prop, tune_token
+from shexer.utils.uri import remove_corners
+import tempfile
+from pathlib import Path
+import re
+from shexer.utils.exception import ParseError
+
+WHITESPACES= re.compile(r"\s+")
+
+import lightrdf
+
+
+class LightTurtleTriplesYielder(BaseTriplesYielder):
+
+    def __init__(self, source_file, raw_graph, namespaces_dict):
+        super().__init__()
+        self._prefixes = {}
+        self._source_file = source_file
+        self._raw_graph = raw_graph
+        self._namespaces_dict = namespaces_dict if namespaces_dict is not None else {}
+        self._yielded_triples = 0
+
+    def _yield_triples(self):
+        self._extract_prefixes()
+        parser = lightrdf.turtle.Parser()
+        try:
+            for s, p, o in parser.parse(self._source_file, base_iri=None):
+                yield (
+                    tune_subj(s),
+                    tune_prop(p),
+                    tune_token(o)
+                )
+                self._yielded_triples += 1
+        except BaseException as e:
+            raise ParseError(f"Error while parsing: {e}") from e
+
+    def yield_triples(self):
+        if self._raw_graph is not None:
+            with tempfile.TemporaryDirectory() as tmpdir:
+                self._source_file = Path(tmpdir) / "data.nt"
+                self._source_file.write_text(self._raw_graph, encoding="utf-8")
+                self._source_file = str(self._source_file)
+                for a_triple in self._yield_triples():
+                    yield a_triple
+        else:
+            for a_triple in self._yield_triples():
+                yield a_triple
+
+    @property
+    def yielded_triples(self):
+        return self._yielded_triples
+
+    @property
+    def error_triples(self):  # No error triples in this parser, it crashes when finding an error
+        return 0
+
+    def _extract_prefixes(self):
+        with open(self._source_file, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                line = WHITESPACES.sub(" ", line)
+                if line.startswith("@prefix"):
+                    self._process_prefix_line(line)
+                else:  # if declarations are not consecutive and at the beginning, it will not work
+                    break
+
+    def _process_prefix_line(self, line):
+        pieces = line.split(" ")
+        prefix = pieces[1] if not pieces[1].endswith(":") else pieces[1][: - 1]
+        base_url = remove_corners(pieces[2])
+        if base_url not in self._namespaces_dict:
+            self._namespaces_dict[base_url] = prefix
+
+class MultiLightTurtleTriplesYielder(MultifileBaseTripleYielder):
+    def __init__(self, list_of_files, namespaces_dict):
+        super(MultiLightTurtleTriplesYielder, self).__init__(
+            list_of_files=list_of_files,
+            namespaces_to_ignore=None,
+            allow_untyped_numbers=False,
+            compression_mode=None,
+            zip_base_archive=None)
+        self._namespaces_dict = namespaces_dict
+
+    def _constructor_file_yielder(self, a_source_file, parse_namespaces=False):
+        return LightTurtleTriplesYielder(source_file=a_source_file,
+                                         namespaces_dict=self._namespaces_dict,
+                                         raw_graph=None)
+
@@ -2,40 +2,51 @@
 from shexer.utils.literal import there_is_arroba_after_last_quotes
 from shexer.utils.triple_yielders import tune_prop, tune_token  # , check_if_property_belongs_to_namespace_list
 from shexer.io.graph.yielder.base_triples_yielder import BaseTriplesYielder
+from shexer.utils.exception import ParseError
+
 
 
 class NtTriplesYielder(BaseTriplesYielder):
 
     def __init__(self, source_file=None, allow_untyped_numbers=False, raw_graph=None,
-                 compression_mode=None, zip_base_archive=None):
+                 compression_mode=None, zip_base_archive=None, silent_error=False):
 
         super(NtTriplesYielder, self).__init__()
         self._source_file = source_file
         self._raw_graph = raw_graph
         self._triples_count = 0
         self._error_triples = 0
         self._allow_untyped_numbers = allow_untyped_numbers
+        self._silent_error = silent_error
         self._line_reader = self._decide_line_reader(source_file=source_file,
                                                      raw_graph=raw_graph,
                                                      compression_mode=compression_mode,
                                                      zip_base_archive=zip_base_archive)
-        # The following ones are refs to functions. Im avoiding some comparison here.
+        # The following ones are refs to functions. I'm avoiding some comparison here.
         # self.yield_triples = self._yield_triples_not_excluding_namespaces if namespaces_to_ignore is None \
         #     else self._yield_triples_excluding_namespaces
 
     def yield_triples(self):
         self._reset_count()
-        for a_line in self._line_reader.read_lines():
-            tokens = self._look_for_tokens(a_line.strip())
-            if len(tokens) != 3:
-                self._error_triples += 1
-                log_msg(verbose=False, msg="This line was discarded: " + a_line)
-            else:
-                yield (tune_token(a_token=tokens[0]),
-                       tune_prop(a_token=tokens[1]),
-                       tune_token(a_token=tokens[2],
-                                  allow_untyped_numbers=self._allow_untyped_numbers))
-                self._triples_count += 1
+        try:
+            for a_line in self._line_reader.read_lines():
+                a_line = a_line.strip()
+                if a_line != "":
+                    tokens = self._look_for_tokens(a_line)
+                    if len(tokens) != 3:
+                        if self._silent_error:
+                            self._error_triples += 1
+                            log_msg(verbose=False, msg="This line was discarded: " + a_line)
+                        else:
+                            raise ParseError(f"Line: '{a_line}'")
+                    else:
+                        yield (tune_token(a_token=tokens[0]),
+                               tune_prop(a_token=tokens[1]),
+                               tune_token(a_token=tokens[2],
+                                          allow_untyped_numbers=self._allow_untyped_numbers))
+                        self._triples_count += 1
+        except BaseException as e:
+            raise ParseError(f"Error while parsing. {e}") from e
 
     def _look_for_tokens(self, str_line):
         result = []
 
@@ -0,0 +1,2 @@
+class ParseError(Exception):
+    pass
@@ -9,6 +9,7 @@
 from shexer.io.graph.yielder.big_ttl_triples_yielder import BigTtlTriplesYielder
 from shexer.io.graph.yielder.multi_big_ttl_files_triple_yielder import MultiBigTtlTriplesYielder
 from shexer.io.graph.yielder.multi_zip_triples_yielder import MultiZipTriplesYielder
+from shexer.io.graph.yielder.light_turtle_triples_yielder import LightTurtleTriplesYielder, MultiLightTurtleTriplesYielder
 from shexer.utils.factories.shape_map_parser_factory import get_shape_map_parser
 from shexer.model.graph.endpoint_sgraph import EndpointSGraph
 from shexer.utils.translators.list_of_classes_to_shape_map import ListOfClassesToShapeMap
@@ -110,6 +111,12 @@ def get_triple_yielder(source_file=None, list_of_source_files=None, input_format
                                       list_of_files=list_of_source_files,
                                       compression_mode=compression_mode,
                                       zip_base_archives=zip_base_archives)
+
+    elif input_format in [TURTLE_ITER, TURTLE] and compression_mode is None:
+        result = _yielder_for_turtle_light(source_file=source_file,
+                                           list_of_files=list_of_source_files,
+                                           namespaces_dict=namespaces_dict,
+                                           raw_graph=raw_graph)
     elif input_format == TURTLE_ITER:
         result = _yielder_for_turtle_iter(source_file=source_file,
                                           allow_untyped_numbers=allow_untyped_numbers,
@@ -194,6 +201,14 @@ def _yielder_for_turtle_iter(source_file, raw_graph, allow_untyped_numbers, list
                                          allow_untyped_numbers=allow_untyped_numbers,
                                          compression_mode=compression_mode)
 
+def _yielder_for_turtle_light(source_file, list_of_files, namespaces_dict, raw_graph):
+    if list_of_files is None:
+        return LightTurtleTriplesYielder(source_file=source_file,
+                                         raw_graph=raw_graph,
+                                         namespaces_dict=namespaces_dict)
+    return MultiLightTurtleTriplesYielder(list_of_files=list_of_files,
+                                          namespaces_dict=namespaces_dict)
+
 
 def _yielder_for_tsv_spo(source_file, raw_graph, allow_untyped_numbers, list_of_files,
                          compression_mode, zip_base_archives):
 
@@ -10,14 +10,30 @@
 :Person a sh:NodeShape ;
     shexer:conformant_example ex:Jimmy ;
     shexer:total_conforming_instances 5 ;
+    sh:pattern "^http://example.org/" ;
     sh:property [ a sh:PropertyShape ;
+            shexer:conformant_example "22"^^xsd:string ;
+            shexer:ratio_property_usage 0.8 ;
+            shexer:total_conforming_instances 4 ;
+            rdfs:comment "# 80.0 % (4 instances). obj: xsd:integer. Cardinality: {1}" ;
+            sh:dataType xsd:integer ;
+            sh:maxCount 1 ;
+            sh:path foaf:age ],
+        [ a sh:PropertyShape ;
             shexer:conformant_example "Jimmy"^^xsd:string ;
             shexer:ratio_property_usage 0.6 ;
             shexer:total_conforming_instances 3 ;
             rdfs:comment "# 60.0 % (3 instances). obj: xsd:string. Cardinality: {1}" ;
             sh:dataType xsd:string ;
             sh:maxCount 1 ;
             sh:path foaf:name ],
+        [ a sh:PropertyShape ;
+            shexer:ratio_property_usage 1.0 ;
+            shexer:total_conforming_instances 5 ;
+            sh:in ( foaf:Person ) ;
+            sh:maxCount 1 ;
+            sh:minCount 1 ;
+            sh:path rdf:type ],
         [ a sh:PropertyShape ;
             shexer:conformant_example "Jones"^^xsd:string ;
             shexer:ratio_property_usage 0.4 ;
@@ -33,20 +49,5 @@
             rdfs:comment "# 20.0 % (1 instance). obj: @:Person. Cardinality: {1}" ;
             sh:maxCount 1 ;
             sh:node :Person ;
-            sh:path foaf:knows ],
-        [ a sh:PropertyShape ;
-            shexer:conformant_example "22"^^xsd:string ;
-            shexer:ratio_property_usage 0.8 ;
-            shexer:total_conforming_instances 4 ;
-            rdfs:comment "# 80.0 % (4 instances). obj: xsd:integer. Cardinality: {1}" ;
-            sh:dataType xsd:integer ;
-            sh:maxCount 1 ;
-            sh:path foaf:age ],
-        [ a sh:PropertyShape ;
-            shexer:ratio_property_usage 1.0 ;
-            shexer:total_conforming_instances 5 ;
-            sh:in ( foaf:Person ) ;
-            sh:maxCount 1 ;
-            sh:minCount 1 ;
-            sh:path rdf:type ] ;
+            sh:path foaf:knows ] ;
     sh:targetClass foaf:Person .
@@ -1,10 +1,9 @@
-
 _:Alice <http://example.org/knows> _:Bob .
-_:Alice a <http://example.org/person> .
+_:Alice <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/person> .
 
 _:Bob <http://example.org/knows> _:Eve .
-_:Bob a <http://example.org/person> .
+_:Bob <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/person> .
 _:Bob <http://example.org/name> "Bob" .
 
 _:Eve <http://example.org/name> "Eve" .
-_:Eve a <http://example.org/person> .
+_:Eve <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/person> .
@@ -4,7 +4,7 @@ _:Alice ex:knows _:Bob ;
         a ex:person .
 
 _:Bob ex:knows _:Eve ;
-    a ex:person .
+    a ex:person ;
     ex:name "Bob" .
 
 _:Eve ex:name "Eve" ;
 
@@ -8,6 +8,7 @@ PREFIX : <http://weso.es/shapes/>
 
 :person
 {
+   rdf:type  [ex:person]  ;
    ex:knows  BNode  OR  @:person  ;
-   rdf:type  [ex:person]
+   ex:name  xsd:string  ?
 }
@@ -4,7 +4,7 @@ _:Alice ex:knows _:Bob ;
         a ex:person .
 
 _:Bob ex:knows _:Eve ;
-    a ex:person .
+    a ex:person ;
     ex:name "Bob" .
 
 _:Eve ex:name "Eve" .
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+class ParseError(Exception):`
	`2`	`+ pass`
Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@ PREFIX : <http://weso.es/shapes/>`
`8`	`8`
`9`	`9`	`:person`
`10`	`10`	`{`
	`11`	`+ rdf:type [ex:person] ;`
`11`	`12`	`ex:knows BNode OR @:person ;`
`12`		`- rdf:type [ex:person]`
	`13`	`+ ex:name xsd:string ?`
`13`	`14`	`}`