Skip to content

Commit

Permalink
implement json source
Browse files Browse the repository at this point in the history
  • Loading branch information
recca0120 committed Nov 11, 2024
1 parent fef33f3 commit 2577381
Show file tree
Hide file tree
Showing 26 changed files with 179 additions and 274 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
.php_cs export-ignore
.travis.yml export-ignore
phpunit.xml.dist export-ignore
pint.json export-ignore

114 changes: 0 additions & 114 deletions .php_cs

This file was deleted.

21 changes: 0 additions & 21 deletions .scrutinizer.yml

This file was deleted.

3 changes: 0 additions & 3 deletions .styleci.yml

This file was deleted.

44 changes: 0 additions & 44 deletions .travis.yml

This file was deleted.

20 changes: 0 additions & 20 deletions phpcs.xml

This file was deleted.

3 changes: 3 additions & 0 deletions pint.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"preset": "psr12"
}
Binary file added resources/Zip32_11208.zip
Binary file not shown.
Binary file removed resources/Zip32_utf8_10501_1.zip
Binary file not shown.
45 changes: 33 additions & 12 deletions resources/converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

include __DIR__.'/../vendor/autoload.php';

use Recca0120\Twzipcode\Sources\CSV;
use Recca0120\Twzipcode\Sources\Csv;
use Recca0120\Twzipcode\Sources\Json;
use Recca0120\Twzipcode\Storages\File;

// https://data.gov.tw/dataset/5948
$downloadUrl = 'https://quality.data.gov.tw/dq_download_json.php?nid=5948&md5_url=e1f6004ad33eb3ff3a824fb992a4b01a';
$extension = 'json';
$file = __DIR__.'/Zip32_11208.zip';

set_error_handler(static function ($severity, $message, $file, $line) {
throw new ErrorException($message, $severity, $severity, $file, $line);
});

$start = microtime(true);
$file = __DIR__.'/Zip32_utf8_10501_1.zip';

// https://data.gov.tw/dataset/5948
$url = 'https://quality.data.gov.tw/dq_download_csv.php?nid=5948&md5_url=e1f6004ad33eb3ff3a824fb992a4b01a';

if (file_exists($file) === false) {
touch($file);
function csv($url)
{
$contents = file_get_contents($url);

$encoding = mb_detect_encoding($contents, ['UCS-2LE', 'BIG5', 'UTF-8']);
Expand All @@ -30,11 +30,32 @@
throw new RuntimeException($contents);
}

$zip = new ZipArchive;
return $contents;
}

function json($url)
{
return file_get_contents($url);
}

$start = microtime(true);
if (file_exists($file) === false) {
$contents = $extension($downloadUrl);

touch($file);
$zip = new ZipArchive();
$zip->open($file, ZipArchive::OVERWRITE);
$zip->addFromString(pathinfo($file, PATHINFO_FILENAME).'.csv', $contents);
$zip->addFromString(pathinfo($file, PATHINFO_FILENAME).'.'.$extension, $contents);
$zip->close();
}

(new File)->load(new CSV($file));
$lookup = [
'csv' => Csv::class,
'json' => Json::class,
];
$class = $lookup[$extension];

$source = new $class($file);

(new File())->load($source);
echo 'benchmark: '.(microtime(true) - $start)."\n";
Binary file modified resources/data/zip5.rules
Binary file not shown.
29 changes: 0 additions & 29 deletions ruleset.xml

This file was deleted.

8 changes: 4 additions & 4 deletions src/Address.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
class Address
{
/** @var int */
const NO = 0;
public const NO = 0;

/** @var int */
const SUBNO = 1;
public const SUBNO = 1;

/** @var int */
const NAME = 2;
public const NAME = 2;

/** @var int */
const UNIT = 3;
public const UNIT = 3;

/** @var Normalizer */
public $normalizer;
Expand Down
13 changes: 9 additions & 4 deletions src/Rule.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ private function tokenize($rule, Closure $addressResolver)
'[連至單雙全](?=[\d全]|$)',
];

$addressResolver($this->normalize($rule)->replace('/'.implode('|', $pattern).'/u',
$addressResolver($this->normalize($rule)->replace(
'/'.implode('|', $pattern).'/u',
function ($m) use (&$tokens) {
$token = &$m[0];
if ($token === '') {
Expand All @@ -148,7 +149,8 @@ function ($m) use (&$tokens) {
$tokens[] = $token;

return $token === '附號全' ? '' : '';
}));
}
));

return $tokens;
}
Expand Down Expand Up @@ -187,8 +189,11 @@ private function normalizeAddress(Address $address, JArray $ruleAddressTokens)

return new Address(
new JArray($address->tokens()->filter(function ($token) use ($removeUnits) {
return isset($token[Address::UNIT]) === true && in_array($token[Address::UNIT], $removeUnits,
true) === false;
return isset($token[Address::UNIT]) === true && in_array(
$token[Address::UNIT],
$removeUnits,
true
) === false;
})->map(function ($token) {
return implode('', $token);
}))
Expand Down
5 changes: 1 addition & 4 deletions src/Rules.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ class Rules
*/
private $storage;

/**
* @param Storage|null $storage
*/
public function __construct(Storage $storage = null)
{
$this->storage = $storage ?: new File;
$this->storage = $storage ?: new File();
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/Sources/CSV.php → src/Sources/Csv.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace Recca0120\Twzipcode\Sources;

class CSV extends Source
class Csv extends Source
{
/** @var string */
protected $file;
Expand Down
Loading

0 comments on commit 2577381

Please sign in to comment.