Skip to content

Commit 1675102

Browse files
committed
added spaCy manager
1 parent d6f3e06 commit 1675102

File tree

18 files changed

+711
-264
lines changed

18 files changed

+711
-264
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
runs-on: ${{ matrix.os }}
1010
strategy:
1111
matrix:
12-
python_version: [3.9, 3.13]
12+
python_version: [3.13]
1313
os: [ubuntu-latest, macOS-latest, windows-latest]
1414
fail-fast: false
1515
steps:

ankimorphs/__init__.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
from .recalc import recalc_main
6262
from .settings import settings_dialog
6363
from .settings.settings_dialog import SettingsDialog
64+
from .spacy_manager import SpacyManagerDialog
6465
from .tag_selection_dialog import TagSelectionDialog
6566
from .toolbar_stats import MorphToolbarStats
6667

@@ -73,7 +74,7 @@
7374

7475

7576
def main() -> None:
76-
# Support anki version 2.1.50 and above
77+
# Support anki version 25.07.3 and above
7778
# Place hooks in the order they are executed
7879

7980
gui_hooks.top_toolbar_did_init_links.append(init_toolbar_items)
@@ -212,6 +213,10 @@ def register_addon_dialogs() -> None:
212213
name=am_globals.KNOWN_MORPHS_EXPORTER_DIALOG_NAME,
213214
creator=KnownMorphsExporterDialog,
214215
)
216+
aqt.dialogs.register_dialog(
217+
name=am_globals.SPACY_MANAGER_DIALOG_NAME,
218+
creator=SpacyManagerDialog,
219+
)
215220

216221

217222
def redraw_toolbar() -> None:
@@ -235,6 +240,7 @@ def init_tool_menu_and_actions() -> None:
235240
generators_action = create_generators_dialog_action(am_config)
236241
progression_action = create_progression_dialog_action(am_config)
237242
known_morphs_exporter_action = create_known_morphs_exporter_action(am_config)
243+
spacy_manager_action = create_spacy_manager_dialog_action()
238244
reset_tags_action = create_tag_reset_action()
239245
guide_action = create_guide_action()
240246
changelog_action = create_changelog_action()
@@ -245,6 +251,7 @@ def init_tool_menu_and_actions() -> None:
245251
am_tool_menu.addAction(generators_action)
246252
am_tool_menu.addAction(progression_action)
247253
am_tool_menu.addAction(known_morphs_exporter_action)
254+
am_tool_menu.addAction(spacy_manager_action)
248255
am_tool_menu.addAction(reset_tags_action)
249256
am_tool_menu.addAction(guide_action)
250257
am_tool_menu.addAction(changelog_action)
@@ -594,6 +601,17 @@ def create_known_morphs_exporter_action(am_config: AnkiMorphsConfig) -> QAction:
594601
return action
595602

596603

604+
def create_spacy_manager_dialog_action() -> QAction:
605+
action = QAction("&spaCy Manager", mw)
606+
action.triggered.connect(
607+
partial(
608+
aqt.dialogs.open,
609+
name=am_globals.SPACY_MANAGER_DIALOG_NAME,
610+
)
611+
)
612+
return action
613+
614+
597615
def create_test_action() -> QAction:
598616
keys = QKeySequence("Ctrl+T")
599617
action = QAction("&Test", mw)

ankimorphs/ankimorphs_globals.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""
66

77
# Semantic Versioning https://semver.org/
8-
__version__ = "4.0.2"
8+
__version__ = "5.0.0"
99

1010
DEV_MODE: bool = False
1111

@@ -19,6 +19,7 @@
1919
GENERATOR_DIALOG_NAME: str = "am_generator_dialog"
2020
PROGRESSION_DIALOG_NAME: str = "am_progression_dialog"
2121
KNOWN_MORPHS_EXPORTER_DIALOG_NAME: str = "am_known_morphs_exporter_dialog"
22+
SPACY_MANAGER_DIALOG_NAME: str = "am_spacy_manager_dialog"
2223

2324
# The static names of the extra fields
2425
EXTRA_FIELD_ALL_MORPHS: str = "am-all-morphs"

ankimorphs/extra_settings/ankimorphs_extra_settings.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
KnownMorphsExporterKeys,
1717
PreprocessKeys,
1818
ProgressionWindowKeys,
19+
SpacyManagerWindowKeys,
1920
)
2021

2122

@@ -74,6 +75,13 @@ def save_known_morphs_exporter_settings(
7475
self.endGroup()
7576
# fmt: on
7677

78+
def spacy_manager_window_settings(self, geometry: QByteArray) -> None:
79+
# fmt: off
80+
self.beginGroup(keys.Dialogs.SPACY_MANAGER_WINDOW)
81+
self.setValue(SpacyManagerWindowKeys.WINDOW_GEOMETRY, geometry)
82+
self.endGroup()
83+
# fmt: on
84+
7785
def save_progression_window_settings(
7886
self, ui: Ui_ProgressionWindow, geometry: QByteArray
7987
) -> None:

ankimorphs/extra_settings/extra_settings_keys.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ class General:
55
class Dialogs:
66
GENERATORS_WINDOW = "generators_window"
77
KNOWN_MORPHS_EXPORTER = "known_morphs_exporter"
8+
SPACY_MANAGER_WINDOW = "spacy_manager_window"
89
PROGRESSION_WINDOW = "progression_window"
910
GENERATOR_OUTPUT_PRIORITY_FILE = "generator_output_priority_file"
1011
GENERATOR_OUTPUT_STUDY_PLAN = "generator_output_study_plan"
@@ -46,6 +47,10 @@ class KnownMorphsExporterKeys:
4647
OCCURRENCES = "occurrences"
4748

4849

50+
class SpacyManagerWindowKeys:
51+
WINDOW_GEOMETRY = "window_geometry"
52+
53+
4954
class ProgressionWindowKeys:
5055
WINDOW_GEOMETRY = "window_geometry"
5156
PRIORITY_FILE = "priority_file"

ankimorphs/message_box_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@
99
)
1010

1111

12+
def show_info_box(title: str, body: str, parent: QWidget) -> None:
13+
info_box = QMessageBox(parent)
14+
info_box.setWindowTitle(title)
15+
info_box.setIcon(QMessageBox.Icon.Information)
16+
info_box.setStandardButtons(QMessageBox.StandardButton.Ok)
17+
info_box.setTextFormat(Qt.TextFormat.MarkdownText)
18+
info_box.setText(body)
19+
info_box.exec()
20+
21+
1222
def show_warning_box(title: str, body: str, parent: QWidget) -> bool:
1323
"""
1424
Returns 'True' if user clicked 'Ok' button

ankimorphs/morphemizers/spacy_wrapper.py

Lines changed: 127 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22

33
import functools
44
import os.path
5+
import shutil
6+
import subprocess
57
import sys
68
from types import ModuleType
79
from typing import Any
810

911
from anki.utils import is_win
1012
from aqt import mw
13+
from aqt.package import venv_binary
1114

1215
updated_python_path: bool = False
1316
testing_environment: bool = False
@@ -20,35 +23,71 @@
2023
_SpacyTokenizer: Any = None # spacy.tokenizer
2124
_SpacyDoc: Any = None # spacy.tokens.doc
2225

26+
27+
# spaCy does not have a cli to query available languages or models, so we hardcode it.
28+
available_langs_and_models: dict[str, list[str]] = {
29+
# fmt: off
30+
"Catalan": ["ca_core_news_sm", "ca_core_news_md", "ca_core_news_lg"],
31+
"Chinese": ["zh_core_web_sm", "zh_core_web_md", "zh_core_web_lg"],
32+
"Croatian": ["hr_core_news_sm", "hr_core_news_md", "hr_core_news_lg"],
33+
"Danish": ["da_core_news_sm", "da_core_news_md", "da_core_news_lg"],
34+
"Dutch": ["nl_core_news_sm", "nl_core_news_md", "nl_core_news_lg"],
35+
"English": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg"],
36+
"Finnish": ["fi_core_news_sm", "fi_core_news_md", "fi_core_news_lg"],
37+
"French": ["fr_core_news_sm", "fr_core_news_md", "fr_core_news_lg"],
38+
"German": ["de_core_news_sm", "de_core_news_md", "de_core_news_lg"],
39+
"Greek": ["el_core_news_sm", "el_core_news_md", "el_core_news_lg"],
40+
"Italian": ["it_core_news_sm", "it_core_news_md", "it_core_news_lg"],
41+
"Japanese": ["ja_core_news_sm", "ja_core_news_md", "ja_core_news_lg"],
42+
"Korean": ["ko_core_news_sm", "ko_core_news_md", "ko_core_news_lg"],
43+
"Lithuanian": ["lt_core_news_sm", "lt_core_news_md", "lt_core_news_lg"],
44+
"Macedonian": ["mk_core_news_sm", "mk_core_news_md", "mk_core_news_lg"],
45+
"Norwegian Bokmål": ["nb_core_news_sm", "nb_core_news_md", "nb_core_news_lg"],
46+
"Polish": ["pl_core_news_sm", "pl_core_news_md", "pl_core_news_lg"],
47+
"Portuguese": ["pt_core_news_sm", "pt_core_news_md", "pt_core_news_lg"],
48+
"Romanian": ["ro_core_news_sm", "ro_core_news_md", "ro_core_news_lg"],
49+
"Russian": ["ru_core_news_sm", "ru_core_news_md", "ru_core_news_lg"],
50+
"Slovenian": ["sl_core_news_sm", "sl_core_news_md", "sl_core_news_lg"],
51+
"Spanish": ["es_core_news_sm", "es_core_news_md", "es_core_news_lg"],
52+
"Swedish": ["sv_core_news_sm", "sv_core_news_md", "sv_core_news_lg"],
53+
"Ukrainian": ["uk_core_news_sm", "uk_core_news_md", "uk_core_news_lg"],
54+
# fmt: on
55+
}
56+
57+
2358
LANGUAGE_PIPE_CONFIGS: dict[str, set[str]] = {
24-
"ja": {""}, # Japanese uses SudachiPy
25-
"nb": {"tok2vec", "attribute_ruler", "lemmatizer", "morphologizer", "ner"},
59+
"ca": {"tok2vec", "morphologizer", "lemmatizer"},
60+
"zh": {"tok2vec", "tagger", "attribute_ruler"},
61+
"hr": {"tok2vec", "lemmatizer", "morphologizer"},
2662
"da": {"tok2vec", "morphologizer", "lemmatizer"},
27-
"de": {"tok2vec", "lemmatizer", "morphologizer"},
28-
"fr": {"tok2vec", "lemmatizer", "morphologizer"},
29-
"en": {"tok2vec", "tagger", "attribute_ruler", "lemmatizer", "morphologizer"},
30-
"es": {"tok2vec", "lemmatizer", "morphologizer"},
31-
"sv": {"tok2vec", "lemmatizer", "morphologizer"},
3263
"nl": {"tok2vec", "lemmatizer", "morphologizer"},
33-
"hr": {"tok2vec", "lemmatizer", "morphologizer"},
64+
"en": {"tok2vec", "tagger", "attribute_ruler", "lemmatizer", "morphologizer"},
3465
"fi": {"tok2vec", "lemmatizer", "morphologizer"},
66+
"fr": {"tok2vec", "lemmatizer", "morphologizer"},
67+
"de": {"tok2vec", "lemmatizer", "morphologizer"},
3568
"el": {"tok2vec", "lemmatizer", "morphologizer"},
3669
"it": {"tok2vec", "lemmatizer", "morphologizer"},
70+
"ja": {""}, # Japanese uses SudachiPy
71+
"ko": {"tok2vec", "morphologizer", "lemmatizer"},
3772
"lt": {"tok2vec", "lemmatizer", "morphologizer"},
3873
"mk": {"tok2vec", "lemmatizer", "morphologizer"},
74+
"nb": {"tok2vec", "attribute_ruler", "lemmatizer", "morphologizer", "ner"},
3975
"pl": {"tok2vec", "lemmatizer", "morphologizer"},
4076
"pt": {"tok2vec", "lemmatizer", "morphologizer"},
4177
"ro": {"tok2vec", "tagger", "morphologizer", "lemmatizer", "attribute_ruler"},
42-
"sl": {"tok2vec", "lemmatizer", "morphologizer"},
43-
"ca": {"tok2vec", "morphologizer", "lemmatizer"},
4478
"ru": {"tok2vec", "morphologizer", "lemmatizer"},
79+
"sl": {"tok2vec", "lemmatizer", "morphologizer"},
80+
"es": {"tok2vec", "lemmatizer", "morphologizer"},
81+
"sv": {"tok2vec", "lemmatizer", "morphologizer"},
4582
"uk": {"tok2vec", "morphologizer", "lemmatizer"},
46-
"ko": {"tok2vec", "morphologizer", "lemmatizer"},
47-
"zh": {"tok2vec", "tagger", "attribute_ruler"},
4883
}
4984

5085

5186
def load_spacy_modules() -> None:
87+
# We load the spacy modules in this complicated way to maintain at least
88+
# some form of static type checking, and to minimize error checking
89+
# and exception handling
90+
5291
global updated_python_path
5392
global successful_import
5493
global _spacy
@@ -59,14 +98,9 @@ def load_spacy_modules() -> None:
5998

6099
# dev environments should already have spaCy, so this can be skipped
61100
if not updated_python_path and not testing_environment:
62-
# Anki only looks into its own directories for python packages,
63-
# to add other lookup folders we have to change the sys path.
64-
# In the guide we instruct the users to install the spacy
65-
# virtual environment into the addons21 folder as 'spacyenv',
66-
# that way we can get the path based on the anki mw.pm.
67-
68101
assert mw is not None
69-
spacy_path = os.path.join(mw.pm.addonFolder(), "spacyenv")
102+
103+
spacy_path = _get_am_spacy_venv_path()
70104

71105
if is_win is True:
72106
spacy_bin_path = os.path.join(spacy_path, "Scripts")
@@ -80,6 +114,7 @@ def load_spacy_modules() -> None:
80114
"site-packages",
81115
)
82116

117+
# appending to the path is less disruptive than prepending
83118
sys.path.append(spacy_bin_path)
84119
sys.path.append(spacy_site_packages_path)
85120
updated_python_path = True
@@ -116,6 +151,79 @@ def get_installed_models() -> list[str]:
116151
return [f"{model_name}" for model_name in _spacy_utils.get_installed_models()]
117152

118153

154+
def _get_am_spacy_venv_python() -> str:
155+
if is_win:
156+
return os.path.join(_get_am_spacy_venv_path(), "Scripts", "python", ".exe")
157+
return os.path.join(_get_am_spacy_venv_path(), "bin", "python")
158+
159+
160+
def _get_am_spacy_venv_path() -> str:
161+
python_version = f"{sys.version_info.major}_{sys.version_info.minor}"
162+
return os.path.join(mw.pm.addonFolder(), f"spacy-venv-python-{python_version}")
163+
164+
165+
def create_spacy_venv() -> None:
166+
"""
167+
We create a dedicated venv to avoid polluting the anki launcher environment
168+
"""
169+
170+
spacy_venv_path = _get_am_spacy_venv_path()
171+
python_path: str | None = venv_binary("python")
172+
assert python_path is not None
173+
174+
subprocess.run([python_path, "-m", "venv", spacy_venv_path], check=True)
175+
176+
if is_win:
177+
spacy_venv_python = os.path.join(spacy_venv_path, "Scripts", "python", ".exe")
178+
else:
179+
spacy_venv_python = os.path.join(spacy_venv_path, "bin", "python")
180+
181+
# make sure pip, setuptools, and wheel are up to date
182+
subprocess.run(
183+
[
184+
spacy_venv_python,
185+
"-m",
186+
"pip",
187+
"install",
188+
"--upgrade",
189+
"pip",
190+
"setuptools",
191+
"wheel",
192+
],
193+
check=True,
194+
)
195+
196+
# six is necessary for some models
197+
subprocess.run(
198+
[spacy_venv_python, "-m", "pip", "install", "--upgrade", "spacy", "six"],
199+
check=True,
200+
)
201+
202+
203+
def delete_spacy_venv() -> None:
204+
shutil.rmtree(_get_am_spacy_venv_path())
205+
206+
207+
def install_model(model_name: str) -> None:
208+
assert successful_import
209+
assert _spacy is not None
210+
211+
subprocess.run(
212+
[_get_am_spacy_venv_python(), "-m", "spacy", "download", model_name], check=True
213+
)
214+
215+
216+
def uninstall_model(model_name: str) -> None:
217+
assert successful_import
218+
assert _spacy is not None
219+
220+
# the -y flag prevents a confirmation prompt
221+
subprocess.run(
222+
[_get_am_spacy_venv_python(), "-m", "pip", "uninstall", "-y", model_name],
223+
check=True,
224+
)
225+
226+
119227
# the cache needs to have a max size to maintain garbage collection
120228
@functools.lru_cache(maxsize=131072)
121229
def get_nlp(spacy_model_name: str): # type: ignore[no-untyped-def] # pylint:disable=too-many-branches, too-many-statements

0 commit comments

Comments
 (0)