Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement json source #10

Merged
merged 1 commit into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@

/build export-ignore
/tests export-ignore
/resources/data/Zip32_11208.json.zip
/resources/data/Zip32_11208.csv.zip
.editorconfig export-ignore
.gitattributes export-ignore
.gitignore export-ignore
.nitpick.json export-ignore
.php_cs export-ignore
.travis.yml export-ignore
phpunit.xml.dist export-ignore

5 changes: 0 additions & 5 deletions .nitpick.json

This file was deleted.

114 changes: 0 additions & 114 deletions .php_cs

This file was deleted.

21 changes: 0 additions & 21 deletions .scrutinizer.yml

This file was deleted.

3 changes: 0 additions & 3 deletions .styleci.yml

This file was deleted.

44 changes: 0 additions & 44 deletions .travis.yml

This file was deleted.

20 changes: 0 additions & 20 deletions phpcs.xml

This file was deleted.

Binary file added resources/Zip32_11208.json.zip
Binary file not shown.
Binary file removed resources/Zip32_utf8_10501_1.zip
Binary file not shown.
40 changes: 29 additions & 11 deletions resources/converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

include __DIR__.'/../vendor/autoload.php';

use Recca0120\Twzipcode\Sources\CSV;
use Recca0120\Twzipcode\Sources\Csv;
use Recca0120\Twzipcode\Sources\Json;
use Recca0120\Twzipcode\Storages\File;

// https://data.gov.tw/dataset/5948
$downloadUrl = 'https://quality.data.gov.tw/dq_download_json.php?nid=5948&md5_url=e1f6004ad33eb3ff3a824fb992a4b01a';
$extension = 'json';
$file = __DIR__.'/Zip32_11208.'.$extension.'.zip';

set_error_handler(static function ($severity, $message, $file, $line) {
throw new ErrorException($message, $severity, $severity, $file, $line);
});

$start = microtime(true);
$file = __DIR__.'/Zip32_utf8_10501_1.zip';

// https://data.gov.tw/dataset/5948
$url = 'https://quality.data.gov.tw/dq_download_csv.php?nid=5948&md5_url=e1f6004ad33eb3ff3a824fb992a4b01a';

if (file_exists($file) === false) {
touch($file);
function csv($url)
{
$contents = file_get_contents($url);

$encoding = mb_detect_encoding($contents, ['UCS-2LE', 'BIG5', 'UTF-8']);
Expand All @@ -30,11 +30,29 @@
throw new RuntimeException($contents);
}

return $contents;
}

function json($url)
{
return file_get_contents($url);
}

$start = microtime(true);
if (file_exists($file) === false) {
$contents = $extension($downloadUrl);

touch($file);
$zip = new ZipArchive;
$zip->open($file, ZipArchive::OVERWRITE);
$zip->addFromString(pathinfo($file, PATHINFO_FILENAME).'.csv', $contents);
$zip->addFromString(pathinfo($file, PATHINFO_FILENAME).'.'.$extension, $contents);
$zip->close();
}

(new File)->load(new CSV($file));
$lookup = ['csv' => Csv::class, 'json' => Json::class];
$class = $lookup[$extension];

$source = new $class($file);

(new File)->load($source);
echo 'benchmark: '.(microtime(true) - $start)."\n";
Binary file modified resources/data/zip5.rules
Binary file not shown.
29 changes: 0 additions & 29 deletions ruleset.xml

This file was deleted.

3 changes: 0 additions & 3 deletions src/Address.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ class Address
/** @var Normalizer */
public $normalizer;

/** @var Tricky */
public $tricky;

/** @var JArray */
public $tokens = [];

Expand Down
13 changes: 9 additions & 4 deletions src/Rule.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ private function tokenize($rule, Closure $addressResolver)
'[連至單雙全](?=[\d全]|$)',
];

$addressResolver($this->normalize($rule)->replace('/'.implode('|', $pattern).'/u',
$addressResolver($this->normalize($rule)->replace(
'/'.implode('|', $pattern).'/u',
function ($m) use (&$tokens) {
$token = &$m[0];
if ($token === '連') {
Expand All @@ -148,7 +149,8 @@ function ($m) use (&$tokens) {
$tokens[] = $token;

return $token === '附號全' ? '號' : '';
}));
}
));

return $tokens;
}
Expand Down Expand Up @@ -187,8 +189,11 @@ private function normalizeAddress(Address $address, JArray $ruleAddressTokens)

return new Address(
new JArray($address->tokens()->filter(function ($token) use ($removeUnits) {
return isset($token[Address::UNIT]) === true && in_array($token[Address::UNIT], $removeUnits,
true) === false;
return isset($token[Address::UNIT]) === true && in_array(
$token[Address::UNIT],
$removeUnits,
true
) === false;
})->map(function ($token) {
return implode('', $token);
}))
Expand Down
3 changes: 0 additions & 3 deletions src/Rules.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ class Rules
*/
private $storage;

/**
* @param Storage|null $storage
*/
public function __construct(Storage $storage = null)
{
$this->storage = $storage ?: new File;
Expand Down
2 changes: 1 addition & 1 deletion src/Sources/CSV.php → src/Sources/Csv.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace Recca0120\Twzipcode\Sources;

class CSV extends Source
class Csv extends Source
{
/** @var string */
protected $file;
Expand Down
Loading
Loading