Skip to content

Commit

Permalink
Merge pull request #447 from YuukiToriyama/feature/improve-tokenizer/…
Browse files Browse the repository at this point in the history
…master

Tokenizerのリファクタをrelease/v0.1.18にマージ
  • Loading branch information
YuukiToriyama authored Oct 4, 2024
2 parents 9f86acb + 329313d commit daf428c
Show file tree
Hide file tree
Showing 11 changed files with 420 additions and 354 deletions.
1 change: 1 addition & 0 deletions core/src/domain.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod common;
pub mod geolonia;
2 changes: 2 additions & 0 deletions core/src/domain/common.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod latlng;
pub mod token;
7 changes: 7 additions & 0 deletions core/src/domain/common/latlng.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#[derive(Clone, Debug, PartialEq)]
pub struct LatLng {
/// 緯度
latitude: f64,
/// 軽度
longitude: f64,
}
31 changes: 31 additions & 0 deletions core/src/domain/common/token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use crate::domain::common::latlng::LatLng;

#[derive(Clone, Debug, PartialEq)]
pub enum Token {
Prefecture(Prefecture),
City(City),
Town(Town),
Rest(String),
}

#[derive(Debug, PartialEq, Clone)]
pub(crate) struct Prefecture {
pub(crate) prefecture_name: String,
pub(crate) representative_point: Option<LatLng>,
}

#[derive(Debug, PartialEq, Clone)]
pub(crate) struct City {
pub(crate) city_name: String,
pub(crate) representative_point: Option<LatLng>,
}

#[derive(Debug, PartialEq, Clone)]
pub(crate) struct Town {
pub(crate) town_name: String,
pub(crate) representative_point: Option<LatLng>,
}

pub(crate) fn append_token(tokens: &[Token], token: Token) -> Vec<Token> {
[tokens.to_owned(), vec![token]].concat()
}
2 changes: 1 addition & 1 deletion core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ compile_error! {
}

pub mod api;
mod domain;
pub(crate) mod domain;
#[deprecated(since = "0.1.6", note = "This module will be deleted in v0.2")]
pub mod entity;
mod formatter;
Expand Down
97 changes: 49 additions & 48 deletions core/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,26 @@ use std::sync::Arc;
use crate::api::AsyncApi;
#[cfg(feature = "blocking")]
use crate::api::BlockingApi;
use crate::domain::common::token::Token;
use crate::domain::geolonia::entity::Address;
use crate::domain::geolonia::error::{Error, ParseErrorKind};
use crate::tokenizer::Tokenizer;
use crate::tokenizer::{End, Tokenizer};
use serde::Serialize;

pub mod adapter;

impl<T> From<Tokenizer<T>> for Address {
fn from(value: Tokenizer<T>) -> Self {
Self {
prefecture: value.prefecture_name.unwrap_or("".to_string()),
city: value.city_name.unwrap_or("".to_string()),
town: value.town_name.unwrap_or("".to_string()),
rest: value.rest,
impl From<Tokenizer<End>> for Address {
fn from(value: Tokenizer<End>) -> Self {
let mut address = Address::new("", "", "", "");
for token in value.tokens {
match token {
Token::Prefecture(prefecture) => address.prefecture = prefecture.prefecture_name,
Token::City(city) => address.city = city.city_name,
Token::Town(town) => address.town = town.town_name,
Token::Rest(rest) => address.rest = rest,
}
}
address
}
}

Expand Down Expand Up @@ -76,27 +81,27 @@ impl Parser {
pub async fn parse(api: Arc<AsyncApi>, input: &str) -> ParseResult {
let tokenizer = Tokenizer::new(input);
// 都道府県を特定
let Ok(tokenizer) = tokenizer.read_prefecture() else {
return ParseResult {
address: Address::from(tokenizer),
error: Some(Error::new_parse_error(ParseErrorKind::Prefecture)),
};
let (prefecture_name, tokenizer) = match tokenizer.read_prefecture() {
Ok(found) => found,
Err(tokenizer) => {
return ParseResult {
address: Address::from(tokenizer),
error: Some(Error::new_parse_error(ParseErrorKind::Prefecture)),
}
}
};
// その都道府県の市町村名リストを取得
let prefecture = match api
.get_prefecture_master(tokenizer.prefecture_name.as_ref().unwrap())
.await
{
let prefecture = match api.get_prefecture_master(&prefecture_name).await {
Err(error) => {
return ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: Some(error),
};
}
Ok(result) => result,
};
// 市町村名を特定
let tokenizer = match tokenizer.read_city(&prefecture.cities) {
let (city_name, tokenizer) = match tokenizer.read_city(&prefecture.cities) {
Ok(found) => found,
Err(not_found) => {
// 市区町村が特定できない場合かつフィーチャフラグが有効な場合、郡名が抜けている可能性を検討
Expand All @@ -105,40 +110,35 @@ pub async fn parse(api: Arc<AsyncApi>, input: &str) -> ParseResult {
_ => {
// それでも見つからない場合は終了
return ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: Some(Error::new_parse_error(ParseErrorKind::City)),
};
}
}
}
};
// その市町村の町名リストを取得
let city = match api
.get_city_master(
tokenizer.prefecture_name.as_ref().unwrap(),
tokenizer.city_name.as_ref().unwrap(),
)
.await
{
let city = match api.get_city_master(&prefecture_name, &city_name).await {
Err(error) => {
return ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: Some(error),
};
}
Ok(result) => result,
};
// 町名を特定
let Ok(tokenizer) = tokenizer.read_town(city.towns.iter().map(|x| x.name.clone()).collect())
let Ok((_, tokenizer)) =
tokenizer.read_town(city.towns.iter().map(|x| x.name.clone()).collect())
else {
return ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: Some(Error::new_parse_error(ParseErrorKind::Town)),
};
};

ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: None,
}
}
Expand Down Expand Up @@ -247,57 +247,58 @@ mod tests {
#[cfg(feature = "blocking")]
pub fn parse_blocking(api: Arc<BlockingApi>, input: &str) -> ParseResult {
let tokenizer = Tokenizer::new(input);
let Ok(tokenizer) = tokenizer.read_prefecture() else {
return ParseResult {
address: Address::from(tokenizer),
error: Some(Error::new_parse_error(ParseErrorKind::Prefecture)),
};
let (prefecture_name, tokenizer) = match tokenizer.read_prefecture() {
Ok(found) => found,
Err(tokenizer) => {
return ParseResult {
address: Address::from(tokenizer),
error: Some(Error::new_parse_error(ParseErrorKind::Prefecture)),
}
}
};
let prefecture = match api.get_prefecture_master(tokenizer.prefecture_name.as_ref().unwrap()) {
let prefecture = match api.get_prefecture_master(&prefecture_name) {
Err(error) => {
return ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: Some(error),
};
}
Ok(result) => result,
};
let tokenizer = match tokenizer.read_city(&prefecture.cities) {
let (city_name, tokenizer) = match tokenizer.read_city(&prefecture.cities) {
Ok(found) => found,
Err(not_found) => {
match not_found.read_city_with_county_name_completion(&prefecture.cities) {
Ok(found) if cfg!(feature = "city-name-correction") => found,
_ => {
return ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: Some(Error::new_parse_error(ParseErrorKind::City)),
};
}
}
}
};
let city = match api.get_city_master(
tokenizer.prefecture_name.as_ref().unwrap(),
tokenizer.city_name.as_ref().unwrap(),
) {
let city = match api.get_city_master(&prefecture_name, &city_name) {
Err(error) => {
return ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: Some(error),
};
}
Ok(result) => result,
};
let Ok(tokenizer) = tokenizer.read_town(city.towns.iter().map(|x| x.name.clone()).collect())
let Ok((_, tokenizer)) =
tokenizer.read_town(city.towns.iter().map(|x| x.name.clone()).collect())
else {
return ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: Some(Error::new_parse_error(ParseErrorKind::Town)),
};
};

ParseResult {
address: Address::from(tokenizer),
address: Address::from(tokenizer.finish()),
error: None,
}
}
Expand Down
27 changes: 22 additions & 5 deletions core/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub(crate) mod read_city_with_county_name_completion;
pub(crate) mod read_prefecture;
pub(crate) mod read_town;

use crate::domain::common::token::{append_token, Token};
use std::marker::PhantomData;

#[derive(Debug)]
Expand All @@ -20,10 +21,26 @@ pub(crate) struct End;

#[derive(Debug)]
pub struct Tokenizer<State> {
input: String,
pub(crate) prefecture_name: Option<String>,
pub(crate) city_name: Option<String>,
pub(crate) town_name: Option<String>,
pub(crate) rest: String,
pub(crate) tokens: Vec<Token>,
rest: String,
_state: PhantomData<State>,
}

impl<T> Tokenizer<T> {
fn get_prefecture_name(&self) -> Option<&str> {
for token in &self.tokens {
if let Token::Prefecture(prefecture) = token {
return Some(&prefecture.prefecture_name);
};
}
None
}

pub(crate) fn finish(&self) -> Tokenizer<End> {
Tokenizer {
tokens: append_token(&self.tokens, Token::Rest(self.rest.clone())),
rest: "".to_string(),
_state: PhantomData::<End>,
}
}
}
Loading

0 comments on commit daf428c

Please sign in to comment.