@@ -283,14 +283,63 @@ def test_highlighting(fake_environment): # pylint:disable=unused-argument
283283 # This second example the morphemizer finds the correct morph. However, the regex does
284284 # not match the morph because of the whitespace between 'す ね', which means that no
285285 # spans are made, potentially causing an 'index out of range' error immediately.
286- input_text : str = "そうです ね"
287- card_morphs : list [ Morpheme ] = [
286+ input_text = "そうです ね"
287+ card_morphs = [
288288 Morpheme (
289289 lemma = "そうですね" , inflection = "そうですね" , highest_learning_interval = 0
290290 ),
291291 ]
292- correct_result : str = "そうです ね"
293- highlighted_text : str = text_highlighting .get_highlighted_text (
292+ correct_result = "そうです ね"
293+ highlighted_text = text_highlighting .get_highlighted_text (
294+ am_config , card_morphs , input_text
295+ )
296+
297+ assert highlighted_text == correct_result
298+
299+ # This third example checks if letter casing is preserved in the highlighted version
300+ input_text = "Das sind doch die Schädel von den Flüchtlingen, die wir gefunden hatten! Keine Sorge, dein Kopf wird auch schon bald in meiner Sammlung sein."
301+ card_morphs = [
302+ Morpheme (
303+ lemma = "Flüchtling" , inflection = "flüchtlingen" , highest_learning_interval = 0
304+ ),
305+ Morpheme (lemma = "Sammlung" , inflection = "sammlung" , highest_learning_interval = 0 ),
306+ Morpheme (lemma = "finden" , inflection = "gefunden" , highest_learning_interval = 0 ),
307+ Morpheme (lemma = "Schädel" , inflection = "schädel" , highest_learning_interval = 0 ),
308+ Morpheme (lemma = "haben" , inflection = "hatten" , highest_learning_interval = 0 ),
309+ Morpheme (lemma = "mein" , inflection = "meiner" , highest_learning_interval = 0 ),
310+ Morpheme (lemma = "Sorge" , inflection = "sorge" , highest_learning_interval = 0 ),
311+ Morpheme (lemma = "kein" , inflection = "keine" , highest_learning_interval = 0 ),
312+ Morpheme (lemma = "schon" , inflection = "schon" , highest_learning_interval = 0 ),
313+ Morpheme (lemma = "Kopf" , inflection = "kopf" , highest_learning_interval = 0 ),
314+ Morpheme (lemma = "auch" , inflection = "auch" , highest_learning_interval = 0 ),
315+ Morpheme (lemma = "bald" , inflection = "bald" , highest_learning_interval = 0 ),
316+ Morpheme (lemma = "dein" , inflection = "dein" , highest_learning_interval = 0 ),
317+ Morpheme (lemma = "doch" , inflection = "doch" , highest_learning_interval = 0 ),
318+ Morpheme (lemma = "sein" , inflection = "sein" , highest_learning_interval = 0 ),
319+ Morpheme (lemma = "sein" , inflection = "sind" , highest_learning_interval = 0 ),
320+ Morpheme (lemma = "werden" , inflection = "wird" , highest_learning_interval = 0 ),
321+ Morpheme (lemma = "der" , inflection = "das" , highest_learning_interval = 0 ),
322+ Morpheme (lemma = "der" , inflection = "den" , highest_learning_interval = 0 ),
323+ Morpheme (lemma = "der" , inflection = "die" , highest_learning_interval = 0 ),
324+ Morpheme (lemma = "von" , inflection = "von" , highest_learning_interval = 0 ),
325+ Morpheme (lemma = "wir" , inflection = "wir" , highest_learning_interval = 0 ),
326+ Morpheme (lemma = "in" , inflection = "in" , highest_learning_interval = 0 ),
327+ ]
328+ correct_result = '<span morph-status="unknown">Das</span> <span morph-status="unknown">sind</span> <span morph-status="unknown">doch</span> <span morph-status="unknown">die</span> <span morph-status="unknown">Schädel</span> <span morph-status="unknown">von</span> <span morph-status="unknown">den</span> <span morph-status="unknown">Flüchtlingen</span>, <span morph-status="unknown">die</span> <span morph-status="unknown">wir</span> <span morph-status="unknown">gefunden</span> <span morph-status="unknown">hatten</span>! <span morph-status="unknown">Keine</span> <span morph-status="unknown">Sorge</span>, <span morph-status="unknown">dein</span> <span morph-status="unknown">Kopf</span> <span morph-status="unknown">wird</span> <span morph-status="unknown">auch</span> <span morph-status="unknown">schon</span> <span morph-status="unknown">bald</span> <span morph-status="unknown">in</span> <span morph-status="unknown">meiner</span> <span morph-status="unknown">Sammlung</span> <span morph-status="unknown">sein</span>.'
329+ highlighted_text = text_highlighting .get_highlighted_text (
330+ am_config , card_morphs , input_text
331+ )
332+
333+ assert highlighted_text == correct_result
334+
335+ # This fourth example checks if morphs with special regex characters are escaped properly
336+ input_text = "몇...?<div><br></div><div>몇...</div>"
337+ card_morphs = [
338+ Morpheme (lemma = "?몇" , inflection = "?몇" , highest_learning_interval = 0 ),
339+ Morpheme (lemma = "몇" , inflection = "몇" , highest_learning_interval = 0 ),
340+ ]
341+ correct_result = '<span morph-status="unknown">몇</span>...?<div><br></div><div><span morph-status="unknown">몇</span>...</div>'
342+ highlighted_text = text_highlighting .get_highlighted_text (
294343 am_config , card_morphs , input_text
295344 )
296345
0 commit comments