Skip to content

Commit b60e7b7

Browse files
committed
Fix tests with new chunking
Text is now split differently, requiring a different approach to validating the generated kobo spans.
1 parent d63e6b4 commit b60e7b7

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

tests/test_container.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,12 @@ def __run_single_node_test(self, text, text_only=False, number_of_sentences=None
277277
if number_of_sentences is not None:
278278
self.assertEqual(len(node.getchildren()), number_of_sentences)
279279

280-
for span in node.getchildren():
280+
para_count = 1
281+
text_chunks = [
282+
chunk.strip() for chunk in text.split("\n") if chunk.strip() != ""
283+
]
284+
for span, text_chunk in zip(node.getchildren(), text_chunks):
285+
self.assertEqual(span.text, text_chunk)
281286
# spans should not end in whitespace (PR#191), and be nonempty
282287
self.assertFalse(re.match(r'\s', span.text[-1]))
283288
# tail of span should *only* be whitespace
@@ -286,8 +291,9 @@ def __run_single_node_test(self, text, text_only=False, number_of_sentences=None
286291
# attrib is technically of type lxml.etree._Attrib, but functionally
287292
# it's a dict. Cast it here to make assertDictEqual() happy.
288293
self.assertDictEqual(
289-
dict(span.attrib), {"id": "kobo.1.1", "class": "koboSpan"}
294+
dict(span.attrib), {"id": f"kobo.1.{para_count}", "class": "koboSpan"}
290295
)
296+
para_count += 1
291297

292298
# remaining text should only contain whitespace
293299
self.assertTrue(re.match(r'\s*', node.text or ''))

0 commit comments

Comments
 (0)