@@ -104,14 +104,16 @@ def __add__(self, other: FileMorphsStats) -> FileMorphsStats:
104104 return self
105105
106106
107- class PreprocessOptions :
107+ class PreprocessOptions : # pylint:disable=too-many-instance-attributes
108108 def __init__ (self , ui : Ui_GeneratorsWindow ):
109109 self .filter_square_brackets : bool = ui .squareBracketsCheckBox .isChecked ()
110110 self .filter_round_brackets : bool = ui .roundBracketsCheckBox .isChecked ()
111111 self .filter_slim_round_brackets : bool = ui .slimRoundBracketsCheckBox .isChecked ()
112112 self .filter_numbers : bool = ui .numbersCheckBox .isChecked ()
113113 self .filter_morphemizer_names : bool = ui .namesMorphemizerCheckBox .isChecked ()
114114 self .filter_names_from_file : bool = ui .namesFileCheckBox .isChecked ()
115+ self .filter_custom_chars : bool = ui .customCharactersCheckBox .isChecked ()
116+ self .custom_chars_to_ignore : str = ui .customCharactersLineEdit .text ()
115117
116118 def to_mock_am_config (self ) -> AnkiMorphsConfig :
117119 return Mock (
@@ -122,7 +124,8 @@ def to_mock_am_config(self) -> AnkiMorphsConfig:
122124 preprocess_ignore_numbers = self .filter_numbers ,
123125 preprocess_ignore_names_morphemizer = self .filter_morphemizer_names ,
124126 preprocess_ignore_names_textfile = self .filter_names_from_file ,
125- preprocess_ignore_custom_characters = "" , # todo: add option in generators window?
127+ preprocess_ignore_custom_characters = self .filter_custom_chars ,
128+ preprocess_custom_characters_to_ignore = self .custom_chars_to_ignore ,
126129 )
127130
128131
@@ -206,7 +209,6 @@ def generate_morph_occurrences_by_file(
206209 )
207210 preprocess_options = PreprocessOptions (ui )
208211 morph_occurrences_by_file : dict [Path , dict [str , MorphOccurrence ]] = {}
209-
210212 sorted_input_files : list [Path ]
211213
212214 if sorted_by_table :
@@ -217,6 +219,8 @@ def generate_morph_occurrences_by_file(
217219 else :
218220 sorted_input_files = input_files
219221
222+ translation_table = str .maketrans ("" , "" , preprocess_options .custom_chars_to_ignore )
223+
220224 for input_file in sorted_input_files :
221225 if mw .progress .want_cancel (): # user clicked 'x' button
222226 raise CancelledOperationException
@@ -233,6 +237,7 @@ def generate_morph_occurrences_by_file(
233237 preprocess_options = preprocess_options ,
234238 file_path = input_file ,
235239 morphemizer = _morphemizer ,
240+ translation_table = translation_table ,
236241 )
237242 )
238243 morph_occurrences_by_file [input_file ] = file_morph_occurrences
@@ -244,9 +249,9 @@ def create_file_morph_occurrences(
244249 preprocess_options : PreprocessOptions ,
245250 file_path : Path ,
246251 morphemizer : Morphemizer ,
252+ translation_table : dict [int , int | None ],
247253) -> dict [str , MorphOccurrence ]:
248254
249- morph_occurrences : dict [str , MorphOccurrence ]
250255 raw_lines : list [str ]
251256 filtered_lines : list [str ] = []
252257 extension = file_path .suffix
@@ -261,22 +266,21 @@ def create_file_morph_occurrences(
261266 for line in raw_lines :
262267 # lower-case to avoid proper noun false-positives
263268 filtered_line = text_preprocessing .get_processed_text (
264- am_config = mock_am_config , text = line .strip ().lower ()
269+ am_config = mock_am_config ,
270+ text = line .strip ().lower (),
271+ translation_table = translation_table ,
265272 )
266273 if filtered_line :
267274 filtered_lines .append (filtered_line )
268-
269275 except UnicodeDecodeError as exc :
270276 raise UnicodeException (path = file_path ) from exc
271277
272- morph_occurrences = get_morph_occurrences (
278+ return get_morph_occurrences (
273279 mock_am_config = mock_am_config ,
274280 morphemizer = morphemizer ,
275281 all_lines = filtered_lines ,
276282 )
277283
278- return morph_occurrences
279-
280284
281285def get_morph_occurrences (
282286 mock_am_config : AnkiMorphsConfig ,
0 commit comments