Skip to content

Commit

Permalink
implement json source
Browse files Browse the repository at this point in the history
  • Loading branch information
recca0120 committed Nov 11, 2024
1 parent fef33f3 commit b035437
Show file tree
Hide file tree
Showing 24 changed files with 160 additions and 269 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
.php_cs export-ignore
.travis.yml export-ignore
phpunit.xml.dist export-ignore
pint.json export-ignore

5 changes: 0 additions & 5 deletions .nitpick.json

This file was deleted.

114 changes: 0 additions & 114 deletions .php_cs

This file was deleted.

21 changes: 0 additions & 21 deletions .scrutinizer.yml

This file was deleted.

3 changes: 0 additions & 3 deletions .styleci.yml

This file was deleted.

44 changes: 0 additions & 44 deletions .travis.yml

This file was deleted.

20 changes: 0 additions & 20 deletions phpcs.xml

This file was deleted.

Binary file added resources/Zip32_11208.json.zip
Binary file not shown.
Binary file removed resources/Zip32_utf8_10501_1.zip
Binary file not shown.
40 changes: 29 additions & 11 deletions resources/converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

include __DIR__.'/../vendor/autoload.php';

use Recca0120\Twzipcode\Sources\CSV;
use Recca0120\Twzipcode\Sources\Csv;
use Recca0120\Twzipcode\Sources\Json;
use Recca0120\Twzipcode\Storages\File;

// https://data.gov.tw/dataset/5948
$downloadUrl = 'https://quality.data.gov.tw/dq_download_json.php?nid=5948&md5_url=e1f6004ad33eb3ff3a824fb992a4b01a';
$extension = 'json';
$file = __DIR__.'/Zip32_11208.'.$extension.'.zip';

set_error_handler(static function ($severity, $message, $file, $line) {
throw new ErrorException($message, $severity, $severity, $file, $line);
});

$start = microtime(true);
$file = __DIR__.'/Zip32_utf8_10501_1.zip';

// https://data.gov.tw/dataset/5948
$url = 'https://quality.data.gov.tw/dq_download_csv.php?nid=5948&md5_url=e1f6004ad33eb3ff3a824fb992a4b01a';

if (file_exists($file) === false) {
touch($file);
function csv($url)
{
$contents = file_get_contents($url);

$encoding = mb_detect_encoding($contents, ['UCS-2LE', 'BIG5', 'UTF-8']);
Expand All @@ -30,11 +30,29 @@
throw new RuntimeException($contents);
}

return $contents;
}

function json($url)
{
return file_get_contents($url);
}

$start = microtime(true);
if (file_exists($file) === false) {
$contents = $extension($downloadUrl);

touch($file);
$zip = new ZipArchive;
$zip->open($file, ZipArchive::OVERWRITE);
$zip->addFromString(pathinfo($file, PATHINFO_FILENAME).'.csv', $contents);
$zip->addFromString(pathinfo($file, PATHINFO_FILENAME).'.'.$extension, $contents);
$zip->close();
}

(new File)->load(new CSV($file));
$lookup = ['csv' => Csv::class, 'json' => Json::class];
$class = $lookup[$extension];

$source = new $class($file);

(new File)->load($source);
echo 'benchmark: '.(microtime(true) - $start)."\n";
Binary file modified resources/data/zip5.rules
Binary file not shown.
29 changes: 0 additions & 29 deletions ruleset.xml

This file was deleted.

3 changes: 0 additions & 3 deletions src/Address.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ class Address
/** @var Normalizer */
public $normalizer;

/** @var Tricky */
public $tricky;

/** @var JArray */
public $tokens = [];

Expand Down
13 changes: 9 additions & 4 deletions src/Rule.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ private function tokenize($rule, Closure $addressResolver)
'[連至單雙全](?=[\d全]|$)',
];

$addressResolver($this->normalize($rule)->replace('/'.implode('|', $pattern).'/u',
$addressResolver($this->normalize($rule)->replace(
'/'.implode('|', $pattern).'/u',
function ($m) use (&$tokens) {
$token = &$m[0];
if ($token === '') {
Expand All @@ -148,7 +149,8 @@ function ($m) use (&$tokens) {
$tokens[] = $token;

return $token === '附號全' ? '' : '';
}));
}
));

return $tokens;
}
Expand Down Expand Up @@ -187,8 +189,11 @@ private function normalizeAddress(Address $address, JArray $ruleAddressTokens)

return new Address(
new JArray($address->tokens()->filter(function ($token) use ($removeUnits) {
return isset($token[Address::UNIT]) === true && in_array($token[Address::UNIT], $removeUnits,
true) === false;
return isset($token[Address::UNIT]) === true && in_array(
$token[Address::UNIT],
$removeUnits,
true
) === false;
})->map(function ($token) {
return implode('', $token);
}))
Expand Down
3 changes: 0 additions & 3 deletions src/Rules.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ class Rules
*/
private $storage;

/**
* @param Storage|null $storage
*/
public function __construct(Storage $storage = null)
{
$this->storage = $storage ?: new File;
Expand Down
2 changes: 1 addition & 1 deletion src/Sources/CSV.php → src/Sources/Csv.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace Recca0120\Twzipcode\Sources;

class CSV extends Source
class Csv extends Source
{
/** @var string */
protected $file;
Expand Down
21 changes: 21 additions & 0 deletions src/Sources/Json.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

namespace Recca0120\Twzipcode\Sources;

class Json extends Csv
{
/**
* @return array{array{zipcode: string, county: string, district: string, text: string}} $rows
*/
protected function rows()
{
return array_map(static function ($data) {
return [
'zipcode' => $data['郵遞區號'],
'county' => $data['縣市名稱'],
'district' => $data['鄉鎮市區'],
'rule' => implode(',', $data),
];
}, json_decode($this->contents(), true));
}
}
Loading

0 comments on commit b035437

Please sign in to comment.