diff --git a/.github/workflows/ghpages.yaml b/.github/workflows/ghpages.yaml index ab7baa62..154aa590 100644 --- a/.github/workflows/ghpages.yaml +++ b/.github/workflows/ghpages.yaml @@ -19,7 +19,10 @@ jobs: run: wasm-pack test --firefox --headless - name: Build wasm module working-directory: wasm - run: wasm-pack build --target web --scope toriyama --out-name japanese_address_parser --features debug + run: | + wasm-pack build --target web --scope toriyama --out-name japanese_address_parser_debug --features debug + wasm-pack build --target web --scope toriyama --out-name japanese_address_parser_nightly --features nightly + wasm-pack build --target web --scope toriyama --out-name japanese_address_parser - name: Move files run: | mkdir ./publish diff --git a/core/Cargo.toml b/core/Cargo.toml index d4b35f09..cc5f40ba 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -18,15 +18,18 @@ crate-type = ["rlib", "cdylib"] default = ["city-name-correction"] blocking = ["reqwest/blocking"] city-name-correction = [] +format-house-number = [] [dependencies] itertools = "0.13.0" -js-sys = "0.3.67" rapidfuzz = "0.5.0" regex = "1.10.2" serde.workspace = true reqwest = { version = "0.12.5", default-features = false, features = ["json", "rustls-tls"] } +[target.'cfg(target_arch = "wasm32")'.dependencies] +js-sys = "0.3.67" + [dev-dependencies] tokio.workspace = true wasm-bindgen-test = { workspace = true } diff --git a/core/src/formatter.rs b/core/src/formatter.rs new file mode 100644 index 00000000..2c25fa97 --- /dev/null +++ b/core/src/formatter.rs @@ -0,0 +1 @@ +pub(crate) mod house_number; diff --git a/core/src/formatter/house_number.rs b/core/src/formatter/house_number.rs new file mode 100644 index 00000000..116a5389 --- /dev/null +++ b/core/src/formatter/house_number.rs @@ -0,0 +1,84 @@ +#[cfg(not(target_arch = "wasm32"))] +pub(crate) fn format_house_number(input: &str) -> Result { + let captures = regex::Regex::new(r"(?\d+)\D+(?\d+)(?.*)$") + .unwrap() + .captures(input) + .ok_or("マッチするものがありませんでした")?; + let block_number = captures + .name("block_number") + .ok_or("街区符号を検出できませんでした")?; + let house_number = captures + .name("house_number") + .ok_or("住居番号を検出できませんでした")?; + let rest = match captures.name("rest") { + Some(matched) => matched.as_str(), + None => "", + }; + Ok(format!( + "{}番{}号{}", + block_number.as_str(), + house_number.as_str(), + rest + )) +} + +#[cfg(target_arch = "wasm32")] +pub(crate) fn format_house_number(input: &str) -> Result { + let captures = js_sys::RegExp::new( + r"(?\d+)\D+(?\d+)(?.*)$", + "", + ) + .exec(input) + .ok_or("マッチするものがありませんでした")?; + let block_number = captures + .get(1) + .as_string() + .ok_or("街区符号を検出できませんでした")?; + let house_number = captures + .get(2) + .as_string() + .ok_or("住居番号を検出できませんでした")?; + let rest = captures + .get(3) + .as_string() + .unwrap_or_else(|| "".to_string()); + Ok(format!("{}番{}号{}", block_number, house_number, rest)) +} + +#[cfg(all(test, not(target_arch = "wasm32")))] +mod tests { + use crate::formatter::house_number::format_house_number; + + #[test] + fn format_house_number_1番1号() { + let result = format_house_number("1-1"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "1番1号"); + } + + #[test] + fn format_house_number_3番2号レジデンシャルマンション101号室() { + let result = format_house_number("3-2レジデンシャルマンション101号室"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "3番2号レジデンシャルマンション101号室"); + } +} + +#[cfg(all(test, target_arch = "wasm32"))] +mod wasm_tests { + use crate::formatter::house_number::format_house_number; + use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure}; + + wasm_bindgen_test_configure!(run_in_browser); + + #[wasm_bindgen_test] + fn format_house_number_success() { + let result = format_house_number("1-1"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "1番1号"); + + let result = format_house_number("3-2レジデンシャルマンション101号室"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "3番2号レジデンシャルマンション101号室"); + } +} diff --git a/core/src/lib.rs b/core/src/lib.rs index f340ca78..aa31c5a9 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -3,6 +3,7 @@ //! ## Feature flags //! - `blocking`: Provide method that works synchronously //! - `city-name-correction`*(enabled by default)*: Enable autocorrection if ambiguous city name was typed +//! - `format-house-number`: Enable normalization of addresses after town name #[cfg(all(target_family = "wasm", feature = "blocking"))] compile_error! { @@ -13,6 +14,7 @@ pub mod api; mod domain; #[deprecated(since = "0.1.6", note = "This module will be deleted in v0.2")] pub mod entity; +mod formatter; pub mod parser; mod repository; mod service; diff --git a/core/src/parser.rs b/core/src/parser.rs index 7245e1fe..b4057bd2 100644 --- a/core/src/parser.rs +++ b/core/src/parser.rs @@ -10,7 +10,6 @@ use serde::Serialize; pub(crate) mod adapter; pub(crate) mod filter; -mod read_house_number; impl From> for Address { fn from(value: Tokenizer) -> Self { diff --git a/core/src/parser/read_house_number.rs b/core/src/parser/read_house_number.rs deleted file mode 100644 index b9d1960a..00000000 --- a/core/src/parser/read_house_number.rs +++ /dev/null @@ -1,68 +0,0 @@ -#[allow(dead_code)] -#[cfg(not(target_arch = "wasm32"))] -fn read_house_number_with_regex(input: &str) -> Option<(String, String)> { - let expression = regex::Regex::new(r"(?\d+)\D*(?.*)$").unwrap(); - let captures = expression.captures(input)?; - let house_number = if let Some(matched) = captures.name("house_number") { - matched.as_str() - } else { - return None; - }; - let rest = if let Some(matched) = captures.name("rest") { - matched.as_str() - } else { - "" - }; - Some((rest.to_string(), format!("{}番", house_number))) -} - -#[allow(dead_code)] -#[cfg(target_arch = "wasm32")] -fn read_house_number_with_js_sys_regexp(input: &str) -> Option<(String, String)> { - let expression = js_sys::RegExp::new(r"(?\d+)\D*(?.*)$", ""); - let captures = expression.exec(input)?; - let house_number = captures.get(1).as_string()?; - let rest = captures - .get(2) - .as_string() - .unwrap_or_else(|| "".to_string()); - Some((rest, format!("{}番", house_number))) -} - -#[cfg(all(test, not(target_arch = "wasm32")))] -mod tests { - use crate::parser::read_house_number::read_house_number_with_regex; - - #[test] - fn read_house_number_1番() { - let (rest, house_number) = read_house_number_with_regex("1").unwrap(); - assert_eq!(house_number, "1番"); - assert_eq!(rest, ""); - } - - #[test] - fn read_house_number_3番2() { - let (rest, house_number) = read_house_number_with_regex("3-2").unwrap(); - assert_eq!(house_number, "3番"); - assert_eq!(rest, "2"); - } -} - -#[cfg(all(test, target_arch = "wasm32"))] -mod wasm_tests { - use crate::parser::read_house_number::read_house_number_with_js_sys_regexp; - use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure}; - - wasm_bindgen_test_configure!(run_in_browser); - - #[wasm_bindgen_test] - fn read_house_number_with_js_sys_regexp_success() { - let (rest, house_number) = read_house_number_with_js_sys_regexp("1").unwrap(); - assert_eq!(house_number, "1番"); - assert_eq!(rest, ""); - - let (rest, house_number) = read_house_number_with_js_sys_regexp("3-2").unwrap(); - assert_eq!(house_number, "3番"); - assert_eq!(rest, "2"); - } -} diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index 09886ae0..6cf9cae8 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -1,5 +1,6 @@ use std::marker::PhantomData; +use crate::formatter::house_number::format_house_number; use crate::parser::adapter::orthographical_variant_adapter::{ OrthographicalVariantAdapter, OrthographicalVariants, Variant, }; @@ -24,7 +25,12 @@ impl Tokenizer { prefecture_name: self.prefecture_name.clone(), city_name: self.city_name.clone(), town_name: Some(town_name), - rest, + rest: if cfg!(feature = "format-house-number") && format_house_number(&rest).is_ok() + { + format_house_number(&rest).unwrap() + } else { + rest + }, _state: PhantomData::, }); } @@ -36,7 +42,12 @@ impl Tokenizer { prefecture_name: self.prefecture_name.clone(), city_name: self.city_name.clone(), town_name: Some(town_name), - rest, + rest: if cfg!(feature = "format-house-number") && format_house_number(&rest).is_ok() + { + format_house_number(&rest).unwrap() + } else { + rest + }, _state: PhantomData::, }); } @@ -47,7 +58,12 @@ impl Tokenizer { prefecture_name: self.prefecture_name.clone(), city_name: self.city_name.clone(), town_name: Some(town_name), - rest, + rest: if cfg!(feature = "format-house-number") && format_house_number(&rest).is_ok() + { + format_house_number(&rest).unwrap() + } else { + rest + }, _state: PhantomData::, }); } diff --git a/public/debug.html b/public/debug.html new file mode 100644 index 00000000..6ab8b394 --- /dev/null +++ b/public/debug.html @@ -0,0 +1,68 @@ + + + + + Demo | japanese-address-parser + + + +
+ Debug +
+

YuukiToriyama/japanese-address-parser

+

Rust製の住所パーサーです

+ +

住所を入力してください

+
+ + +
+ +

処理結果

+ + + + + + + + + + + + + + + + + + + + + + + +
入力値ステータスaddress.prefectureaddress.cityaddress.townaddress.restJSON

東京都中央区日本橋一丁目1-1

成功

東京都

中央区

日本橋一丁目

1-1

{"address":{"prefecture":"東京都","city":"中央区","town":"日本橋一丁目","rest":"1-1"}} +
+ + + + + \ No newline at end of file diff --git a/public/index.html b/public/index.html index e94f7508..a63c46f4 100644 --- a/public/index.html +++ b/public/index.html @@ -41,7 +41,25 @@

処理結果

- + + \ No newline at end of file diff --git a/public/nightly.html b/public/nightly.html new file mode 100644 index 00000000..33cc3d03 --- /dev/null +++ b/public/nightly.html @@ -0,0 +1,68 @@ + + + + + Demo | japanese-address-parser + + + +
+ Nightly +
+

YuukiToriyama/japanese-address-parser

+

Rust製の住所パーサーです

+ +

住所を入力してください

+
+ + +
+ +

処理結果

+ + + + + + + + + + + + + + + + + + + + + + + +
入力値ステータスaddress.prefectureaddress.cityaddress.townaddress.restJSON

東京都中央区日本橋一丁目1-1

成功

東京都

中央区

日本橋一丁目

1-1

{"address":{"prefecture":"東京都","city":"中央区","town":"日本橋一丁目","rest":"1-1"}} +
+ + + + + \ No newline at end of file diff --git a/public/style.css b/public/style.css index a32da63d..9749fb20 100644 --- a/public/style.css +++ b/public/style.css @@ -48,4 +48,31 @@ .output tbody tr td:hover { background: #8eeacd; -} \ No newline at end of file +} + +.ribbon { + position: absolute; + top: 0; + right: 0; + width: 100px; + height: 100px; + overflow: hidden; +} + +.ribbon-label { + display: inline-block; + position: absolute; + padding: 5px 0; + left: -18px; + top: 18px; + width: 160px; + text-align: center; + font-size: 20px; + line-height: 20px; + background: #5bc8ac; + color: #fff; + transform: rotate(45deg); + box-shadow: 0 0 0 2px #8eeacd; + border-top: dashed 2px rgba(255, 255, 255, 0.6); + border-bottom: dashed 2px rgba(255, 255, 255, 0.6); +} diff --git a/public/main.js b/public/table_util.js similarity index 66% rename from public/main.js rename to public/table_util.js index 639eed6e..73460abf 100644 --- a/public/main.js +++ b/public/table_util.js @@ -1,20 +1,3 @@ -import init, {Parser} from "../pkg/japanese_address_parser.js" - -const inputTextArea = document.getElementById("input") - -init().then(() => { - document.getElementById("exec").addEventListener("click", () => { - const input = inputTextArea.value - alert("input: " + input) - const parser = new Parser() - parser.parse(input).then(result => { - document.getElementById("result").appendChild( - createRow(input, result) - ) - }) - }) -}) - const createRow = (input, parseResult) => { const tr = document.createElement("tr") tr.appendChild(createCell(`

${input}

`)) diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml index 7df1a490..f1ceadcf 100644 --- a/wasm/Cargo.toml +++ b/wasm/Cargo.toml @@ -15,6 +15,7 @@ crate-type = ["cdylib"] [features] debug = [] +nightly = ["japanese-address-parser/format-house-number"] [dependencies] console_error_panic_hook = "0.1.7"