Skip to content

Commit

Permalink
added spaCy (#62)
Browse files Browse the repository at this point in the history
  • Loading branch information
mortii committed Dec 30, 2023
1 parent 0c00aef commit 5ba4e1b
Show file tree
Hide file tree
Showing 97 changed files with 7,023 additions and 1,595,068 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
strategy:
matrix:
python_version: ['3.9']
os: [ubuntu-latest, windows-latest]
os: [ubuntu-latest, macOS-latest, windows-latest]
anki: ['2.1.66']
steps:
- uses: actions/checkout@v3
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ docs/book
ankimorphs.db
anki_output.txt
profile.svg
ankimorphs*.ankiaddon
ankimorphs*.ankiaddon
ja_ffg_test_result.csv
43 changes: 12 additions & 31 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ ci: # the pre-commit ci bot on github does not like repo: local

default_language_version:
python: python3.9
exclude: |
(?x)^(
ankimorphs/ui/.*|
ankimorphs/deps/.*|
ankimorphs/mecab_wrapper.py|
)$
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
Expand All @@ -13,7 +19,6 @@ repos:
args: ['--enforce-all']
exclude: |
(?x)^(
ankimorphs/deps/.*||
docs/src/img/revert_changes.mp4|
docs/src/img/note-type.mp4|
docs/src/img/deck-options.mp4|
Expand All @@ -28,10 +33,6 @@ repos:
args: ['--autofix']
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
exclude: |
(?x)^(
ankimorphs/deps/jieba/analyse/textrank.py|
)$
- id: check-merge-conflict
- id: check-toml
- id: debug-statements
Expand All @@ -40,8 +41,6 @@ repos:
exclude: |
(?x)^(
tests/data/.*|
tests/fake_config.py|
tests/fake_preferences.py|
)$
- id: requirements-txt-fixer
- repo: https://github.com/pre-commit/pygrep-hooks
Expand All @@ -60,11 +59,6 @@ repos:
- id: pyupgrade
args: [--py39-plus]
files: 'ankimorphs/|^tests/'
exclude: |
(?x)^(
ankimorphs/deps/.*|
tests/fake_preferences.py|
)$
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
Expand All @@ -86,9 +80,6 @@ repos:
files: 'ankimorphs/|^tests/'
exclude: |
(?x)^(
ankimorphs/deps/.*|
ankimorphs/ui/.*|
ankimorphs/old_recalc.py|
tests/.*|
)$
- id: black
Expand All @@ -99,8 +90,6 @@ repos:
files: 'ankimorphs/|^tests/'
exclude: |
(?x)^(
ankimorphs/deps/.*|
ankimorphs/ui/.*|
tests/.*|
)$
- id: mypy
Expand All @@ -111,19 +100,11 @@ repos:
files: 'ankimorphs/|^tests/'
exclude: |
(?x)^(
ankimorphs/deps/.*|
ankimorphs/ui/.*|
ankimorphs/mecab_wrapper.py|
ankimorphs/morpheme.py|
ankimorphs/morphemizer.py|
ankimorphs/old_recalc.py|
ankimorphs/text_utils.py|
ankimorphs/util_external.py|
tests/.*|
)$
# - id: pytest
# name: pytest
# entry: pytest
# language: system
# pass_filenames: false
# always_run: true
- id: pytest
name: pytest
entry: pytest
language: system
pass_filenames: false
always_run: true
9 changes: 1 addition & 8 deletions ankimorphs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
################################################################

from functools import partial
from typing import Literal, Optional
from typing import Literal

import aqt
from anki.cards import Card
Expand All @@ -35,7 +35,6 @@
from . import (
ankimorphs_globals,
browser_utils,
frequency_file_generator,
name_file_utils,
recalc,
reviewing_utils,
Expand Down Expand Up @@ -451,12 +450,6 @@ def test_function() -> None:
#
# am_db.con.close()

# morphemizer = MecabMorphemizer()
# expression = "うん あ カフェ モカ お願い おかわり"
# morphs = get_morphemes(morphemizer, expression, am_config=AnkiMorphsConfig())
# morph_list = [morph.inflected for morph in morphs]
# print(f"morphs: {morph_list}")

# print("Seen_Morphs:")
# am_db = AnkiMorphsDB()
# am_db.print_table("Seen_Morphs")
Expand Down
7 changes: 6 additions & 1 deletion ankimorphs/anki_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@


from collections.abc import Sequence
from typing import Any, Union
from typing import Any, Optional, Union

import anki.utils
from anki.tags import TagManager

from .config import AnkiMorphsConfig, AnkiMorphsConfigFilter
from .morpheme import Morpheme


class AnkiDBRowData:
Expand Down Expand Up @@ -55,6 +56,7 @@ class AnkiCardData: # pylint:disable=too-many-instance-attributes
"fields",
"tags",
"note_id",
"morphs",
)

def __init__(
Expand Down Expand Up @@ -88,6 +90,9 @@ def __init__(
self.tags = anki_row_data.note_tags
self.note_id = anki_row_data.note_id

# this is set later when spacy is used
self.morphs: Optional[set[Morpheme]] = None


class AnkiMorphsCardData:
__slots__ = (
Expand Down
65 changes: 43 additions & 22 deletions ankimorphs/ankimorphs_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ def create_card_morph_map_table(self) -> None:
CREATE TABLE IF NOT EXISTS Card_Morph_Map
(
card_id INTEGER,
morph_norm TEXT,
morph_base TEXT,
morph_inflected TEXT,
FOREIGN KEY(card_id) REFERENCES card(id),
FOREIGN KEY(morph_norm, morph_inflected) REFERENCES morph(norm, inflected)
FOREIGN KEY(morph_base, morph_inflected) REFERENCES morph(base, inflected)
)
"""
)
Expand All @@ -65,12 +65,11 @@ def create_morph_table(self) -> None:
"""
CREATE TABLE IF NOT EXISTS Morphs
(
norm TEXT,
base TEXT,
inflected TEXT,
is_base INTEGER,
highest_learning_interval INTEGER,
PRIMARY KEY (norm, inflected)
PRIMARY KEY (base, inflected)
)
"""
)
Expand All @@ -81,9 +80,9 @@ def create_seen_morph_table(self) -> None:
"""
CREATE TABLE IF NOT EXISTS Seen_Morphs
(
norm TEXT,
base TEXT,
inflected TEXT,
PRIMARY KEY (norm, inflected)
PRIMARY KEY (base, inflected)
)
"""
)
Expand Down Expand Up @@ -116,13 +115,12 @@ def insert_many_into_morph_table(
INSERT INTO Morphs
VALUES
(
:norm,
:base,
:inflected,
:is_base,
:highest_learning_interval
)
ON CONFLICT(norm, inflected) DO UPDATE SET
ON CONFLICT(base, inflected) DO UPDATE SET
highest_learning_interval = :highest_learning_interval
WHERE highest_learning_interval < :highest_learning_interval
""",
Expand All @@ -138,7 +136,7 @@ def insert_many_into_card_morph_map_table(
INSERT OR IGNORE INTO Card_Morph_Map VALUES
(
:card_id,
:morph_norm,
:morph_base,
:morph_inflected
)
""",
Expand All @@ -151,7 +149,7 @@ def get_readable_card_morphs(self, card_id: int) -> list[tuple[str, str]]:
with self.con:
card_morphs_raw = self.con.execute(
"""
SELECT morph_norm, morph_inflected
SELECT morph_base, morph_inflected
FROM Card_Morph_Map
WHERE card_id = ?
""",
Expand All @@ -170,7 +168,7 @@ def get_all_morphs_seen_today(self) -> set[str]:
with self.con:
card_morphs_raw = self.con.execute(
"""
SELECT norm, inflected
SELECT base, inflected
FROM Seen_Morphs
"""
).fetchall()
Expand All @@ -184,8 +182,8 @@ def update_seen_morphs_today_single_card(self, card_id: int) -> None:
with self.con:
self.con.execute(
"""
INSERT OR IGNORE INTO Seen_Morphs (norm, inflected)
SELECT morph_norm, morph_inflected
INSERT OR IGNORE INTO Seen_Morphs (base, inflected)
SELECT morph_base, morph_inflected
FROM Card_Morph_Map
WHERE card_id = ?
""",
Expand All @@ -205,10 +203,10 @@ def get_morphs_of_card(
with self.con:
card_morphs = self.con.execute(
"""
SELECT DISTINCT morph_norm, morph_inflected
SELECT DISTINCT morph_base, morph_inflected
FROM Card_Morph_Map
INNER JOIN Morphs ON
Card_Morph_Map.morph_norm = Morphs.norm AND Card_Morph_Map.morph_inflected = Morphs.inflected
Card_Morph_Map.morph_base = Morphs.base AND Card_Morph_Map.morph_inflected = Morphs.inflected
"""
+ where_query_string,
(card_id,),
Expand Down Expand Up @@ -238,7 +236,7 @@ def get_ids_of_cards_with_same_morphs(

where_query_string = "WHERE" + "".join(
[
f" (morph_norm = '{morph[0]}' AND morph_inflected = '{morph[1]}') OR"
f" (morph_base = '{morph[0]}' AND morph_inflected = '{morph[1]}') OR"
for morph in card_morphs
]
)
Expand All @@ -261,15 +259,16 @@ def get_ids_of_cards_with_same_morphs(

return card_ids

def get_highest_learning_interval(self, norm: str, inflected: str) -> Optional[int]:
def get_highest_learning_interval(self, base: str, inflected: str) -> Optional[int]:
# todo update this usage!!!
with self.con:
highest_learning_interval = self.con.execute(
"""
SELECT highest_learning_interval
FROM Morphs
WHERE norm = ? And inflected = ?
WHERE base = ? And inflected = ?
""",
(norm, inflected),
(base, inflected),
).fetchone()

if highest_learning_interval is None:
Expand Down Expand Up @@ -322,6 +321,7 @@ def rebuild_seen_morphs_today() -> None:
op=AnkiMorphsDB.rebuild_seen_morphs_today_background,
success=_on_success,
)
operation.failure(_on_failure)
operation.with_progress().run_in_background()

@staticmethod
Expand All @@ -345,12 +345,12 @@ def rebuild_seen_morphs_today_background(collection: Collection) -> None:
am_db.create_seen_morph_table()

with am_db.con:
# if no cards are studied then don't insert any morphs
# if no cards are studied, then don't insert any morphs
if where_query_string != "":
am_db.con.execute(
"""
INSERT OR IGNORE INTO Seen_Morphs (norm, inflected)
SELECT morph_norm, morph_inflected
INSERT OR IGNORE INTO Seen_Morphs (base, inflected)
SELECT morph_base, morph_inflected
FROM Card_Morph_Map
"""
+ where_query_string
Expand Down Expand Up @@ -409,3 +409,24 @@ def _on_success(result: Any) -> None:
assert mw is not None
assert mw.progress is not None
mw.progress.finish()


def _on_failure(
error: Union[
Exception,
sqlite3.OperationalError,
]
) -> None:
# This function runs on the main thread.
assert mw is not None
assert mw.progress is not None
mw.progress.finish()

if isinstance(error, sqlite3.OperationalError):
# schema has been changed
am_db = AnkiMorphsDB()
am_db.drop_all_tables()
am_db.con.close()
return

raise error
12 changes: 6 additions & 6 deletions ankimorphs/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
}
}
],
"parse_ignore_bracket_contents": false,
"parse_ignore_names_morphemizer": true,
"parse_ignore_names_textfile": false,
"parse_ignore_round_bracket_contents": false,
"parse_ignore_slim_round_bracket_contents": false,
"parse_ignore_suspended_cards_content": false,
"preprocess_ignore_bracket_contents": false,
"preprocess_ignore_names_morphemizer": false,
"preprocess_ignore_names_textfile": false,
"preprocess_ignore_round_bracket_contents": false,
"preprocess_ignore_slim_round_bracket_contents": false,
"preprocess_ignore_suspended_cards_content": false,
"recalc_interval_for_known": 21,
"recalc_on_sync": false,
"recalc_suspend_known_new_cards": false,
Expand Down
Loading

0 comments on commit 5ba4e1b

Please sign in to comment.