Fix tests with new chunking

jgoguen · jgoguen · commit b60e7b768080 · 2024-10-13T19:55:31.000-04:00
Text is now split differently, requiring a different approach to validating the generated kobo spans.
diff --git a/tests/test_container.py b/tests/test_container.py
@@ -277,7 +277,12 @@ def __run_single_node_test(self, text, text_only=False, number_of_sentences=None
         if number_of_sentences is not None:
             self.assertEqual(len(node.getchildren()), number_of_sentences)
 
-        for span in node.getchildren():
+        para_count = 1
+        text_chunks = [
+            chunk.strip() for chunk in text.split("\n") if chunk.strip() != ""
+        ]
+        for span, text_chunk in zip(node.getchildren(), text_chunks):
+            self.assertEqual(span.text, text_chunk)
             # spans should not end in whitespace (PR#191), and be nonempty
             self.assertFalse(re.match(r'\s', span.text[-1]))
             # tail of span should *only* be whitespace
@@ -286,8 +291,9 @@ def __run_single_node_test(self, text, text_only=False, number_of_sentences=None
             # attrib is technically of type lxml.etree._Attrib, but functionally
             # it's a dict. Cast it here to make assertDictEqual() happy.
             self.assertDictEqual(
-                dict(span.attrib), {"id": "kobo.1.1", "class": "koboSpan"}
+                dict(span.attrib), {"id": f"kobo.1.{para_count}", "class": "koboSpan"}
             )
+            para_count += 1
 
         # remaining text should only contain whitespace
         self.assertTrue(re.match(r'\s*', node.text or ''))