Fix Tags and add URLS (#603)

Gykes · web-flow · commit fe28ba2308d1 · 2025-08-16T23:54:13.000+03:00
diff --git a/plugins/nfoSceneParser/nfoParser.py b/plugins/nfoSceneParser/nfoParser.py
@@ -193,8 +193,7 @@ def parse(self):
             # Below are NFO extensions or liberal tag interpretations (not part of the nfo spec)
             "movie": self._nfo_root.findtext("set/name") or self._get_default("title", "nfo"),
             "scene_index": self._nfo_root.findtext("set/index") or None,
-            # TODO: read multiple URL tags into array
-            "urls": None if not self._nfo_root.findtext("url") else [self._nfo_root.findtext("url")],
+            "urls": [url.text for url in self._nfo_root.findall("url") if url.text],
 
         }
         return file_data
diff --git a/plugins/nfoSceneParser/nfoSceneParser.py b/plugins/nfoSceneParser/nfoSceneParser.py
@@ -5,6 +5,7 @@
 import config
 import log
 import re
+import unicodedata
 from abstractParser import AbstractParser
 from nfoParser import NfoParser
 from reParser import RegExParser
@@ -135,7 +136,7 @@ def __find_create_scene_data(self):
             map(lambda p: p.get("id"), self._scene["performers"]))
         scene_tag_ids = list(map(lambda t: t.get("id"), self._scene["tags"]))
         # in "reload" mode, removes the reload marker tag as part of the scene update
-        if config.reload_tag and self._reload_tag_id:
+        if config.reload_tag and self._reload_tag_id and self._reload_tag_id in scene_tag_ids:
             scene_tag_ids.remove(self._reload_tag_id)
         # Currently supports only one movie (the first one...)
         scene_movie_id = scene_movie_index = None
@@ -178,14 +179,21 @@ def levenshtein_distance(self, str1, str2, ):
     def __is_matching(self, text1, text2, tolerance=False):
         if not text1 or not text2:
             return text1 == text2
+        
+        # Normalize Unicode to handle emoji and special character variations
+        normalized_text1 = unicodedata.normalize('NFC', text1).strip()
+        normalized_text2 = unicodedata.normalize('NFC', text2).strip()
+        
         if tolerance:
-            distance = self.levenshtein_distance(text1.lower(), text2.lower())
-            match = distance < (config.levenshtein_distance_tolerance * log10(len(text1)))
+            distance = self.levenshtein_distance(normalized_text1.lower(), normalized_text2.lower())
+            # Ensure minimum tolerance for very short strings (like single emoji)
+            tolerance_threshold = max(config.levenshtein_distance_tolerance * log10(max(len(normalized_text1), 2)), 1)
+            match = distance < tolerance_threshold
             if match and distance:
-                log.LogDebug(f"Matched with distance {distance}: '{text1}' with '{text2}'")
+                log.LogDebug(f"Matched with distance {distance}: '{normalized_text1}' with '{normalized_text2}'")
             return match
         else:
-            return text1.lower() == text2.lower()
+            return normalized_text1.lower() == normalized_text2.lower()
 
     def __find_create_performers(self):
         performer_ids = []

Original file line number	Diff line number	Diff line change
`@@ -193,8 +193,7 @@ def parse(self):`
`193`	`193`	`# Below are NFO extensions or liberal tag interpretations (not part of the nfo spec)`
`194`	`194`	`"movie": self._nfo_root.findtext("set/name") or self._get_default("title", "nfo"),`
`195`	`195`	`"scene_index": self._nfo_root.findtext("set/index") or None,`
`196`		`- # TODO: read multiple URL tags into array`
`197`		`- "urls": None if not self._nfo_root.findtext("url") else [self._nfo_root.findtext("url")],`
	`196`	`+ "urls": [url.text for url in self._nfo_root.findall("url") if url.text],`
`198`	`197`
`199`	`198`	`}`
`200`	`199`	`return file_data`