@@ -277,7 +277,12 @@ def __run_single_node_test(self, text, text_only=False, number_of_sentences=None
277
277
if number_of_sentences is not None :
278
278
self .assertEqual (len (node .getchildren ()), number_of_sentences )
279
279
280
- for span in node .getchildren ():
280
+ para_count = 1
281
+ text_chunks = [
282
+ chunk .strip () for chunk in text .split ("\n " ) if chunk .strip () != ""
283
+ ]
284
+ for span , text_chunk in zip (node .getchildren (), text_chunks ):
285
+ self .assertEqual (span .text , text_chunk )
281
286
# spans should not end in whitespace (PR#191), and be nonempty
282
287
self .assertFalse (re .match (r'\s' , span .text [- 1 ]))
283
288
# tail of span should *only* be whitespace
@@ -286,8 +291,9 @@ def __run_single_node_test(self, text, text_only=False, number_of_sentences=None
286
291
# attrib is technically of type lxml.etree._Attrib, but functionally
287
292
# it's a dict. Cast it here to make assertDictEqual() happy.
288
293
self .assertDictEqual (
289
- dict (span .attrib ), {"id" : "kobo.1.1 " , "class" : "koboSpan" }
294
+ dict (span .attrib ), {"id" : f "kobo.1.{ para_count } " , "class" : "koboSpan" }
290
295
)
296
+ para_count += 1
291
297
292
298
# remaining text should only contain whitespace
293
299
self .assertTrue (re .match (r'\s*' , node .text or '' ))
0 commit comments