Skip to content

Commit 3874f6b

Browse files
committed
fixed morphemizer not removing names (#41)
1 parent f37aee0 commit 3874f6b

File tree

5 files changed

+42
-22
lines changed

5 files changed

+42
-22
lines changed

ankimorphs/__init__.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ def add_name_action(web_view: AnkiWebView, menu: QMenu) -> None:
406406

407407

408408
def create_frequency_file_action() -> QAction:
409-
action = QAction("&Generate Frequency File", mw)
409+
action = QAction("&Frequency File Generator", mw)
410410
action.triggered.connect(frequency_file_generator.main)
411411
return action
412412

@@ -423,21 +423,27 @@ def test_function() -> None:
423423
assert mw is not None
424424
assert mw.col.db is not None
425425

426-
am_db = AnkiMorphsDB()
427-
428-
with am_db.con:
429-
result = am_db.con.execute(
430-
"""
431-
SELECT morph_norm, morph_inflected, highest_learning_interval
432-
FROM Card_Morph_Map
433-
INNER JOIN Morphs ON
434-
Card_Morph_Map.morph_norm = Morphs.norm AND Card_Morph_Map.morph_inflected = Morphs.inflected
435-
WHERE card_id = 1691325367067
436-
"""
437-
).fetchall()
438-
print(f"result?: {result}")
426+
# am_db = AnkiMorphsDB()
427+
#
428+
# with am_db.con:
429+
# result = am_db.con.execute(
430+
# """
431+
# SELECT morph_norm, morph_inflected, highest_learning_interval
432+
# FROM Card_Morph_Map
433+
# INNER JOIN Morphs ON
434+
# Card_Morph_Map.morph_norm = Morphs.norm AND Card_Morph_Map.morph_inflected = Morphs.inflected
435+
# WHERE card_id = 1691325367067
436+
# """
437+
# ).fetchall()
438+
# print(f"result?: {result}")
439+
#
440+
# am_db.con.close()
439441

440-
am_db.con.close()
442+
# morphemizer = MecabMorphemizer()
443+
# expression = "うん あ カフェ モカ お願い おかわり"
444+
# morphs = get_morphemes(morphemizer, expression, am_config=AnkiMorphsConfig())
445+
# morph_list = [morph.inflected for morph in morphs]
446+
# print(f"morphs: {morph_list}")
441447

442448

443449
main()

ankimorphs/frequency_file_generator.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919
from .exceptions import CancelledOperationException, EmptyFileSelectionException
2020
from .morph_utils import (
21-
_remove_names,
21+
remove_names_morphemizer,
22+
remove_names_textfile,
2223
round_brackets_regex,
2324
slim_round_brackets_regexp,
2425
square_brackets_regex,
@@ -136,8 +137,10 @@ def _background_generate_frequency_file(self, col: Collection) -> None:
136137
for line in file:
137138
expression = self._filter_expression(line)
138139
morphs = morphemizer.get_morphemes_from_expr(expression)
140+
if self.ui.namesMorphemizerCheckBox.isChecked():
141+
morphs = remove_names_morphemizer(morphs)
139142
if self.ui.namesFileCheckBox.isChecked():
140-
morphs = _remove_names(morphs)
143+
morphs = remove_names_textfile(morphs)
141144
for morph in morphs:
142145
key = morph.norm + morph.inflected
143146
if key in morph_frequency_dict:

ankimorphs/morph_utils.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@ def get_morphemes(
1515
) -> list[Morpheme]:
1616
expression = _get_parsed_expression(am_config, expression)
1717
morphs = morphemizer.get_morphemes_from_expr(expression)
18-
if not am_config.parse_ignore_names_textfile:
19-
return morphs
20-
morphs = _remove_names(morphs)
18+
19+
if am_config.parse_ignore_names_morphemizer:
20+
morphs = remove_names_morphemizer(morphs)
21+
22+
if am_config.parse_ignore_names_textfile:
23+
morphs = remove_names_textfile(morphs)
24+
2125
return morphs
2226

2327

@@ -37,7 +41,7 @@ def _get_parsed_expression(am_config: AnkiMorphsConfig, expression: str) -> str:
3741
return expression
3842

3943

40-
def _remove_names(morphs: list[Morpheme]) -> list[Morpheme]:
44+
def remove_names_textfile(morphs: list[Morpheme]) -> list[Morpheme]:
4145
names = name_file_utils.create_hash_set_out_of_names()
4246
non_name_morphs: list[Morpheme] = []
4347

@@ -46,3 +50,7 @@ def _remove_names(morphs: list[Morpheme]) -> list[Morpheme]:
4650
non_name_morphs.append(morph)
4751

4852
return non_name_morphs
53+
54+
55+
def remove_names_morphemizer(morphs: list[Morpheme]) -> list[Morpheme]:
56+
return [morph for morph in morphs if not morph.is_proper_noun()]

ankimorphs/morpheme.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ def __hash__(self):
5353
(self.norm, self.base, self.inflected, self.read, self.pos, self.sub_pos)
5454
)
5555

56+
def is_proper_noun(self) -> bool:
57+
return self.sub_pos == "固有名詞" or self.pos == "PROPN"
58+
5659

5760
class SimplifiedMorph:
5861
__slots__ = (

ankimorphs/settings_dialog.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def __init__(self, parent: Optional[QMainWindow] = None) -> None:
5454
self.ui.tabWidget.currentChanged.connect(self.tab_change)
5555

5656
# Semantic Versioning https://semver.org/
57-
self.ui.ankimorphs_version_label.setText("AnkiMorphs version: 0.5.0-alpha")
57+
self.ui.ankimorphs_version_label.setText("AnkiMorphs version: 0.5.1-alpha")
5858

5959
def _setup_note_filters_table(
6060
self, config_filters: list[AnkiMorphsConfigFilter]

0 commit comments

Comments
 (0)