|
5 | 5 | import config |
6 | 6 | import log |
7 | 7 | import re |
| 8 | +import unicodedata |
8 | 9 | from abstractParser import AbstractParser |
9 | 10 | from nfoParser import NfoParser |
10 | 11 | from reParser import RegExParser |
@@ -135,7 +136,7 @@ def __find_create_scene_data(self): |
135 | 136 | map(lambda p: p.get("id"), self._scene["performers"])) |
136 | 137 | scene_tag_ids = list(map(lambda t: t.get("id"), self._scene["tags"])) |
137 | 138 | # in "reload" mode, removes the reload marker tag as part of the scene update |
138 | | - if config.reload_tag and self._reload_tag_id: |
| 139 | + if config.reload_tag and self._reload_tag_id and self._reload_tag_id in scene_tag_ids: |
139 | 140 | scene_tag_ids.remove(self._reload_tag_id) |
140 | 141 | # Currently supports only one movie (the first one...) |
141 | 142 | scene_movie_id = scene_movie_index = None |
@@ -178,14 +179,21 @@ def levenshtein_distance(self, str1, str2, ): |
178 | 179 | def __is_matching(self, text1, text2, tolerance=False): |
179 | 180 | if not text1 or not text2: |
180 | 181 | return text1 == text2 |
| 182 | + |
| 183 | + # Normalize Unicode to handle emoji and special character variations |
| 184 | + normalized_text1 = unicodedata.normalize('NFC', text1).strip() |
| 185 | + normalized_text2 = unicodedata.normalize('NFC', text2).strip() |
| 186 | + |
181 | 187 | if tolerance: |
182 | | - distance = self.levenshtein_distance(text1.lower(), text2.lower()) |
183 | | - match = distance < (config.levenshtein_distance_tolerance * log10(len(text1))) |
| 188 | + distance = self.levenshtein_distance(normalized_text1.lower(), normalized_text2.lower()) |
| 189 | + # Ensure minimum tolerance for very short strings (like single emoji) |
| 190 | + tolerance_threshold = max(config.levenshtein_distance_tolerance * log10(max(len(normalized_text1), 2)), 1) |
| 191 | + match = distance < tolerance_threshold |
184 | 192 | if match and distance: |
185 | | - log.LogDebug(f"Matched with distance {distance}: '{text1}' with '{text2}'") |
| 193 | + log.LogDebug(f"Matched with distance {distance}: '{normalized_text1}' with '{normalized_text2}'") |
186 | 194 | return match |
187 | 195 | else: |
188 | | - return text1.lower() == text2.lower() |
| 196 | + return normalized_text1.lower() == normalized_text2.lower() |
189 | 197 |
|
190 | 198 | def __find_create_performers(self): |
191 | 199 | performer_ids = [] |
|
0 commit comments