Skip to content

Commit

Permalink
Merge pull request #435 from YuukiToriyama/feature/refactor-orthograp…
Browse files Browse the repository at this point in the history
…hical-variant-adapter/master

表記ゆれアダプタのリファクタをrelease/v0.1.17にマージ
  • Loading branch information
YuukiToriyama authored Sep 27, 2024
2 parents 93550d0 + fc8e7d8 commit 1d37b91
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 26 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/code-quality-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ name: Code quality check

on:
pull_request:
paths:
- '**.rs'
- '**/Cargo.toml'

jobs:
build:
Expand All @@ -21,3 +24,10 @@ jobs:
reporter: 'github-pr-review'
filter_mode: 'nofilter'
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: Run benchmark
uses: boa-dev/criterion-compare-action@v3
with:
token: ${{ secrets.GITHUB_TOKEN }}
branchName: ${{ github.base_ref }}
cwd: 'core'
benchName: 'core_benchmark'
6 changes: 6 additions & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ rust-version = "1.73.0"

[lib]
crate-type = ["rlib", "cdylib"]
bench = false

[features]
default = ["city-name-correction"]
Expand All @@ -21,6 +22,10 @@ city-name-correction = []
format-house-number = []
eliminate-whitespaces = []

[[bench]]
name = "core_benchmark"
harness = false

[dependencies]
itertools = "0.13.0"
rapidfuzz = "0.5.0"
Expand All @@ -30,6 +35,7 @@ reqwest = { version = "0.12.5", default-features = false, features = ["json", "r
js-sys = "0.3.67"

[dev-dependencies]
criterion = { version = "0.5.1", default-features = false, features = ["html_reports"] }
tokio.workspace = true
wasm-bindgen-test = { workspace = true }

Expand Down
7 changes: 7 additions & 0 deletions core/benches/core_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mod orthographical_variant_adapter;

use crate::orthographical_variant_adapter::bench_orthographical_variant_adapter;
use criterion::{criterion_group, criterion_main};

criterion_group!(benches, bench_orthographical_variant_adapter);
criterion_main!(benches);
48 changes: 48 additions & 0 deletions core/benches/orthographical_variant_adapter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use criterion::measurement::WallTime;
use criterion::{BatchSize, BenchmarkGroup, BenchmarkId, Criterion};
use japanese_address_parser::parser::adapter::orthographical_variant_adapter::{
OrthographicalVariantAdapter, OrthographicalVariants, Variant,
};

pub fn bench_orthographical_variant_adapter(c: &mut Criterion) {
let mut group = c.benchmark_group("orthographical_variant_adapter");
add_tests(
&mut group,
TestSuite {
expected: "松ケ崎東池ノ内町",
inputs: vec![
"松が崎東池ノ内町",
"松ヶ崎東池ノ内町",
"松ケ﨑東池ノ内町",
"松ケ﨑東池の内町",
"松ガ﨑東池の内町",
],
variants_to_be_used: vec![Variant::ケ, Variant::崎, Variant::の],
},
);
group.finish();
}

fn add_tests(group: &mut BenchmarkGroup<WallTime>, test_suite: TestSuite) {
for input in test_suite.inputs {
let benchmark_id = BenchmarkId::new(test_suite.expected, input);
group.bench_with_input(benchmark_id, input, |b, input| {
b.iter_batched(
|| OrthographicalVariantAdapter {
variant_list: test_suite.variants_to_be_used.clone(),
},
|adapter| {
let (region_name, _) = adapter.apply(input, test_suite.expected).unwrap();
assert_eq!(region_name, test_suite.expected);
},
BatchSize::SmallInput,
)
});
}
}

struct TestSuite {
expected: &'static str,
inputs: Vec<&'static str>,
variants_to_be_used: Vec<Variant>,
}
2 changes: 1 addition & 1 deletion core/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::domain::geolonia::error::{Error, ParseErrorKind};
use crate::tokenizer::Tokenizer;
use serde::Serialize;

pub(crate) mod adapter;
pub mod adapter;

impl<T> From<Tokenizer<T>> for Address {
fn from(value: Tokenizer<T>) -> Self {
Expand Down
47 changes: 22 additions & 25 deletions core/src/parser/adapter/orthographical_variant_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,43 +70,40 @@ pub struct OrthographicalVariantAdapter {

impl OrthographicalVariantAdapter {
pub fn apply(self, input: &str, region_name: &str) -> Option<(String, String)> {
let mut filtered_variant_list: Vec<Variant> = vec![];
// 必要なパターンのみを選別する
for variant in self.variant_list.clone() {
if variant.iter().any(|character| input.contains(character)) {
filtered_variant_list.push(variant);
}
}
if filtered_variant_list.is_empty() {
let variant_list: Vec<&Variant> = self
.variant_list
.iter()
.filter(|v| v.iter().any(|c| input.contains(c)))
.collect();
if variant_list.is_empty() {
return None;
}

// マッチ候補を容れておくためのVector
let mut candidates: Vec<String> = vec![region_name.to_string()];
// パターンを一つづつ検証していく
for variant in filtered_variant_list {
for variant in variant_list {
let mut semi_candidates: Vec<String> = vec![];
// variantから順列を作成
// ["ケ", "ヶ", "が"] -> (ケ, ヶ), (ケ, が), (ヶ, ケ), (ヶ, が), (が, ケ), (が, ヶ)
for permutation in variant.iter().permutations(2) {
for candidate in &candidates {
for candidate in candidates.iter().filter(|c| c.contains(permutation[0])) {
// マッチ候補の中でパターンに引っかかるものがあれば文字を置き換えてマッチを試す
if candidate.contains(permutation[0]) {
let edited_region_name = candidate.replace(permutation[0], permutation[1]);
if input.starts_with(&edited_region_name) {
// マッチすれば早期リターン
return Some((
region_name.to_string(),
input
.chars()
.skip(edited_region_name.chars().count())
.collect(),
));
} else {
// マッチしなければsemi_candidatesに置き換え後の文字列をpush
semi_candidates.push(edited_region_name.clone());
};
}
let edited_region_name = candidate.replace(permutation[0], permutation[1]);
if input.starts_with(&edited_region_name) {
// マッチすれば早期リターン
return Some((
region_name.to_string(),
input
.chars()
.skip(edited_region_name.chars().count())
.collect(),
));
} else {
// マッチしなければsemi_candidatesに置き換え後の文字列をpush
semi_candidates.push(edited_region_name);
};
}
}
candidates = semi_candidates;
Expand Down

0 comments on commit 1d37b91

Please sign in to comment.