Skip to content

Commit

Permalink
Merge pull request #7 from aynumosir/kana-release
Browse files Browse the repository at this point in the history
Prepare kana feature for release
  • Loading branch information
neet authored Aug 20, 2024
2 parents a2c73e0 + feec0a4 commit fb63d74
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
with:
target: x86_64
args: --release --out dist --find-interpreter --manifest-path ./ainu-utils-python/Cargo.toml
# sccache: 'true'
sccache: 'true'
manylinux: auto

- name: pytest
Expand Down
2 changes: 1 addition & 1 deletion ainu-utils-js/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ainu-utils-js"
version = "0.2.0"
version = "0.3.0"
edition = "2021"
description = "A collection of utilities for the Ainu language"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion ainu-utils-python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ainu-utils-python"
version = "0.2.1"
version = "0.3.0"
edition = "2021"

[lib]
Expand Down
2 changes: 1 addition & 1 deletion ainu-utils-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ features = ["pyo3/extension-module"]
name = "ainu-utils"
description = "A collection of utility for with the Ainu language"
requires-python = ">=3.8"
version = "0.2.1"
version = "0.3.0"
license = "MIT"

[project.optional-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion ainu-utils-python/tests/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def test_tokenize():


def test_tokenize_with_whitespace():
result = ainu_utils.segment("irankarapte. e=iwanke ya?", keep_whitespace=True)
result = ainu_utils.tokenize("irankarapte. e=iwanke ya?", keep_whitespace=True)
assert result == ["irankarapte", ".", " ", "e=", "iwanke", " ", "ya", "?"]


Expand Down
2 changes: 1 addition & 1 deletion ainu-utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ainu-utils"
version = "0.2.0"
version = "0.3.0"
edition = "2021"
description = "A collection of utilities for the Ainu language"
license = "MIT"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ pub static VOWELS: Lazy<HashSet<char>> = Lazy::new(|| {
});

pub static CONSONANTS: Lazy<HashSet<char>> = Lazy::new(|| {
let items = ['k', 's', 't', 'c', 'n', 'h', 'p', 'm', 'y', 'r', 'w'];
let items = [
'k', 'g', 's', 'z', 't', 'd', 'c', 'j', 'n', 'h', 'p', 'b', 'f', 'm', 'y', 'r', 'w',
];
items.iter().cloned().collect()
});

Expand Down
15 changes: 9 additions & 6 deletions ainu-utils/src/kana/kana.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use super::sets::{CONSONANTS, SPECIAL_CONSONANTS, SYMBOLS, VOWELS};
use super::tables::{TABLE_1, TABLE_2, TABLE_3};
use super::constants::{CONSONANTS, SPECIAL_CONSONANTS, SYMBOLS, VOWELS};
use super::linking::link;
use super::maps::{TABLE_1, TABLE_2, TABLE_3};
use diacritics::remove_diacritics;

fn normalize(input: &str) -> String {
let mut input = input.to_string();

fn normalize(mut input: String) -> String {
input = input.to_lowercase();
input = input.replace("=", "");
input = remove_diacritics(&input).to_string();
Expand All @@ -13,7 +12,11 @@ fn normalize(input: &str) -> String {
}

pub fn to_kana(input: &str) -> String {
let input = normalize(input);
let mut input = input.to_string();

input = normalize(input);
input = link(input);

let chars: Vec<char> = input.chars().collect();

let mut kana = String::new();
Expand Down
128 changes: 127 additions & 1 deletion ainu-utils/src/kana/kana_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,134 @@ fn test_to_kana() {
"アシㇼパ エキㇺネ パテㇰ キ ワ、 メノコ モンライケ エアイカㇷ゚。",
"ケメイキ ネ ヤ、 イテセ ネ ヤ、 メノコ モンライケ エアイカㇷ゚ メノコ アナㇰ、 アイヌ ホク コㇿ カ エアイカㇷ゚。",
"タネ シヌイェ クニ パハ ネ コㇿカ、 コパン。",
"スgイモト ニㇱパ、 タン マッカチ エトゥン ワ エンコレ!",
"スギモト ニㇱパ、 タン マッカチ エトゥン ワ エンコレ!",
"タン クミッポホ クエポタラ ワ モシㇼ クホッパ カ コヤイクㇱ。",
].join(" ")
)
}

#[test]
fn test_dropping_h() {
assert_eq!(to_kana("_hine"), "イネ")
}

#[test]
fn test_dropping_y() {
assert_eq!(to_kana("_ya?"), "ア?")
}

#[test]
fn test_linking_h() {
assert_eq!(to_kana("hawean __hi"), "ハウェアニ")
}

#[test]
fn test_linking_y() {
assert_eq!(to_kana("nankor __ya?"), "ナンコラ?")
}

#[test]
fn test_linking_a() {
assert_eq!(to_kana("cis _a cis _a"), "チサ チサ")
}

#[test]
fn test_linking_i() {
assert_eq!(to_kana("oar _isam"), "オアリサㇺ")
}

#[test]
fn test_linking_u() {
assert_eq!(to_kana("or _un"), "オルン")
}

#[test]
fn test_linking_e() {
assert_eq!(to_kana("mat _etun"), "マテトゥン")
}

#[test]
fn test_linking_o() {
assert_eq!(to_kana("pet _or _un"), "ペトルン")
}

#[test]
fn test_linking_and_dropping_a() {
assert_eq!(to_kana("yaypuri ekira __ani"), "ヤイプリ エキラニ")
}

#[test]
fn test_linking_and_dropping_i() {
assert_eq!(to_kana("puni __i"), "プニ")
}

#[test]
fn test_linking_and_dropping_u() {
assert_eq!(to_kana("a=kotanu __un"), "アコタヌン")
}

#[test]
fn test_linking_and_dropping_e() {
assert_eq!(to_kana("i=samake __en anu"), "イサマケン アヌ")
}

#[test]
fn test_linking_and_dropping_o() {
// 実例なし。
assert_eq!(to_kana("sapporo __or"), "サッポロㇿ")
}

#[test]
fn test_linking_r_n() {
assert_eq!(to_kana("a=kor_ nispa"), "アコン ニㇱパ")
}

#[test]
fn test_linking_r_r() {
assert_eq!(to_kana("kor_ rusuy"), "コン ルスイ")
}

#[test]
fn test_linking_r_t() {
assert_eq!(to_kana("or_ ta"), "オッ タ")
}

#[test]
fn test_linking_r_c() {
assert_eq!(to_kana("yar_ cise"), "ヤッ チセ")
}

#[test]
fn test_linking_n_s() {
assert_eq!(to_kana("pon_ su"), "ポイ ス")
}

#[test]
fn test_linking_n_y() {
assert_eq!(to_kana("pon_ yam"), "ポイ ヤㇺ")
}

#[test]
fn test_linking_n_w() {
assert_eq!(to_kana("san _wa"), "サン マ")
}

#[test]
fn test_linking_m_w() {
assert_eq!(to_kana("isam _wa"), "イサン マ")
}

#[test]
fn test_linking_p_w() {
assert_eq!(to_kana("sap _wa"), "サッ パ")
}

#[test]
fn test_special_mp() {
assert_eq!(to_kana("tampaku"), "タンパク")
}

#[test]
fn test_special_mm() {
assert_eq!(to_kana("umma"), "ウンマ")
}
38 changes: 38 additions & 0 deletions ainu-utils/src/kana/linking.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// https://ainugo.nam.go.jp/pages/guide.html
static LINKING: [(&str, &str); 26] = [
("_h", ""),
("_y", ""),
(" __h", ""),
(" __y", ""),
(" _a", "a"),
(" _i", "i"),
(" _u", "u"),
(" _e", "e"),
(" _o", "o"),
(" __a", ""),
(" __i", ""),
(" __u", ""),
(" __e", ""),
(" __o", ""),
("r_ n", "n n"),
("r_ r", "n r"),
("r_ t", "t t"),
("r_ c", "t c"),
("n_ s", "y s"),
("n_ y", "y y"),
("n_ w", "u w"),
("n _w", "n m"),
// ("n _w", "n m"),
("m _w", "n m"),
("p _w", "t p"),
("mp", "np"),
("mm", "nm"),
];

pub fn link(mut input: String) -> String {
for (from, to) in LINKING.iter() {
input = input.replace(from, to);
}

input
}
30 changes: 30 additions & 0 deletions ainu-utils/src/kana/tables.rs → ainu-utils/src/kana/maps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,40 @@ pub static TABLE_1: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
("ku", "ク"),
("ke", "ケ"),
("ko", "コ"),
("ga", "ガ"),
("gi", "ギ"),
("gu", "グ"),
("ge", "ゲ"),
("go", "ゴ"),
("sa", "サ"),
("si", "シ"),
("su", "ス"),
("se", "セ"),
("so", "ソ"),
("za", "ザ"),
("zi", "ジ"),
("zu", "ズ"),
("ze", "ゼ"),
("zo", "ゾ"),
("ta", "タ"),
("tu", "トゥ"),
("te", "テ"),
("to", "ト"),
("da", "ダ"),
("di", "ヂ"),
("du", "ヅ"),
("de", "デ"),
("do", "ド"),
("ca", "チャ"),
("ci", "チ"),
("cu", "チュ"),
("ce", "チェ"),
("co", "チョ"),
("ja", "ジャ"),
("ji", "ジ"),
("ju", "ジュ"),
("je", "ジェ"),
("jo", "ジョ"),
("na", "ナ"),
("ni", "ニ"),
("nu", "ヌ"),
Expand All @@ -50,6 +70,16 @@ pub static TABLE_1: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
("pu", "プ"),
("pe", "ペ"),
("po", "ポ"),
("ba", "バ"),
("bi", "ビ"),
("bu", "ブ"),
("be", "ベ"),
("bo", "ボ"),
("fa", "ファ"),
("fi", "フィ"),
("fu", "フ"),
("fe", "フェ"),
("fo", "フォ"),
("ma", "マ"),
("mi", "ミ"),
("mu", "ム"),
Expand Down
5 changes: 3 additions & 2 deletions ainu-utils/src/kana/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ mod kana;

pub use self::kana::*;

mod sets;
mod tables;
mod constants;
mod linking;
mod maps;

#[cfg(test)]
mod kana_test;

0 comments on commit fb63d74

Please sign in to comment.