From ab69e48be75245442c95b990a452f7ec5dbdce7c Mon Sep 17 00:00:00 2001 From: recca0120 Date: Sun, 10 Nov 2024 12:56:59 +0800 Subject: [PATCH] Source --- .github/workflows/tests.yml | 2 +- resources/converter.php | 5 +- src/Contracts/Source.php | 8 ++ src/Contracts/Storage.php | 16 ++-- src/Sources/CSV.php | 23 ++++++ src/Sources/Source.php | 84 +++++++++++++++++++ src/Sources/Text.php | 24 ++++++ src/Storages/File.php | 138 ++++++++------------------------ tests/Moskytw/DirectoryTest.php | 5 +- tests/RulesTest.php | 5 +- tests/Storages/FileTest.php | 8 +- tests/ZipcodeTest.php | 5 +- 12 files changed, 196 insertions(+), 127 deletions(-) create mode 100644 src/Contracts/Source.php create mode 100644 src/Sources/CSV.php create mode 100644 src/Sources/Source.php create mode 100644 src/Sources/Text.php diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8dfb311..d764366 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: true matrix: - php: [ '5.5.9', '5.6', '7.1','7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3' ] + php: [ '5.5.9', '5.6', '7.0', '7.1','7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3' ] stability: [ prefer-stable ] name: PHP ${{ matrix.php }} - ${{ matrix.stability }} diff --git a/resources/converter.php b/resources/converter.php index 7181b3c..5ef2de0 100644 --- a/resources/converter.php +++ b/resources/converter.php @@ -2,6 +2,7 @@ include __DIR__.'/../vendor/autoload.php'; +use Recca0120\Twzipcode\Sources\CSV; use Recca0120\Twzipcode\Storages\File; set_error_handler(static function ($severity, $message, $file, $line) { @@ -14,6 +15,8 @@ // https://data.gov.tw/dataset/5948 $url = 'https://quality.data.gov.tw/dq_download_csv.php?nid=5948&md5_url=e1f6004ad33eb3ff3a824fb992a4b01a'; +$contents = file_get_contents($url); + if (file_exists($file) === false) { touch($file); $contents = file_get_contents($url); @@ -35,5 +38,5 @@ $zip->close(); } -(new File)->loadFile($file); +(new File)->load(new CSV($file)); echo 'benchmark: '.(microtime(true) - $start)."\n"; diff --git a/src/Contracts/Source.php b/src/Contracts/Source.php new file mode 100644 index 0000000..8ef91e9 --- /dev/null +++ b/src/Contracts/Source.php @@ -0,0 +1,8 @@ +file = $file; + $this->extension = pathinfo($this->file, PATHINFO_EXTENSION); + } + + protected function getContents() + { + return $this->extension === 'zip' ? static::unzip($this->file) : file_get_contents($this->file); + } +} diff --git a/src/Sources/Source.php b/src/Sources/Source.php new file mode 100644 index 0000000..4fcb5bc --- /dev/null +++ b/src/Sources/Source.php @@ -0,0 +1,84 @@ + '263', + '新竹縣寶山鄉' => '308', + '臺南市新市區' => '744', + ]; + + public function each(callable $callback) + { + static::eachGroup(static::prepare($this->rows()), $callback); + } + + /** + * @return string + */ + abstract protected function getContents(); + + /** + * @return array{array{zipcode: string, county: string, district: string, text: string}} $rows + */ + protected function rows() + { + $lines = preg_split('/\n|\r\n$/', $this->getContents()); + $lines = array_filter($lines, static function ($line) { + return ! empty(trim($line)); + }); + + return array_map(static function ($line) { + $data = explode(',', $line); + + return ['zipcode' => $data[0], 'county' => $data[1], 'district' => $data[2], 'text' => $line]; + }, $lines); + } + + /** + * @param array{array{zipcode: string, county: string, district: string, text: string}} $rows + * @return array + */ + protected static function prepare($rows) + { + return array_reduce($rows, static function ($results, $row) { + $zip3 = ! empty(self::$tricks[$row['county'].$row['district']]) + ? self::$tricks[$row['county'].$row['district']] + : substr($row['zipcode'], 0, 3); + + $results[$row['county']][$row['district']][$zip3][] = $row['text']; + + return $results; + }, []); + } + + protected static function eachGroup($ruleGroup, $callback) + { + foreach ($ruleGroup as $county => $districts) { + foreach ($districts as $district => $addresses) { + foreach ($addresses as $zipcode => $rule) { + $callback($zipcode, $county, $district, $rule); + } + } + } + } + + /** + * @param string $file + * @return string + */ + protected static function unzip($file) + { + $zip = new ZipArchive; + $zip->open($file); + $contents = $zip->getFromIndex(0); + $zip->close(); + + return $contents; + } +} diff --git a/src/Sources/Text.php b/src/Sources/Text.php new file mode 100644 index 0000000..28f054f --- /dev/null +++ b/src/Sources/Text.php @@ -0,0 +1,24 @@ +text = $text; + } + + protected function getContents() + { + return $this->text; + } +} diff --git a/src/Storages/File.php b/src/Storages/File.php index 7ee8704..5d8beba 100644 --- a/src/Storages/File.php +++ b/src/Storages/File.php @@ -2,13 +2,12 @@ namespace Recca0120\Twzipcode\Storages; -use Closure; use Recca0120\Lodash\JArray; use Recca0120\Lodash\JString; use Recca0120\Twzipcode\Address; +use Recca0120\Twzipcode\Contracts\Source; use Recca0120\Twzipcode\Contracts\Storage; use Recca0120\Twzipcode\Rule; -use ZipArchive; class File implements Storage { @@ -50,36 +49,6 @@ public function zip3(Address $address) return null; } - /** - * @param string $source - * @return $this - */ - public function load($source) - { - $zip5 = []; - $zip3 = []; - $this->each( - $this->prepareSource($source), - function ($zipcode, $county, $district, $rules) use (&$zip5, &$zip3) { - $zip5[$zipcode] = $this->compress(array_map(static function ($rule) { - return new Rule($rule); - }, $rules)); - - if (empty($zip3[$county])) { - $zip3[$county] = substr($zipcode, 0, 1); - } - - if (empty($zip3[$county.$district])) { - $zip3[$county.$district] = substr($zipcode, 0, 3); - } - }); - - $this->store('zip3', $zip3); - $this->store('zip5', $zip5); - - return $this; - } - /** * @param string $zip3 * @return JArray @@ -94,27 +63,53 @@ public function rules($zip3) } /** - * @param string $file * @return $this */ - public function loadFile($file = null) + public function flush() { - $file = $file ?: $this->path.'../Zip32_utf8_10501_1.csv'; - $this->load($this->getSource($file)); + static::$cached = ['zip3' => null, 'zip5' => null]; return $this; } /** + * @param Source $source * @return $this */ - public function flush() + public function load($source) { - static::$cached = ['zip3' => null, 'zip5' => null]; + $zip5 = []; + $zip3 = []; + $source->each(function ($zipcode, $county, $district, $rules) use (&$zip5, &$zip3) { + $zip5[$zipcode] = $this->compress(array_map(static function ($rule) { + return new Rule($rule); + }, $rules)); + + if (empty($zip3[$county])) { + $zip3[$county] = substr($zipcode, 0, 1); + } + + if (empty($zip3[$county.$district])) { + $zip3[$county.$district] = substr($zipcode, 0, 3); + } + }); + + $this->store('zip3', $zip3); + $this->store('zip5', $zip5); return $this; } + /** + * @param string $filename + * @param array $data + * @return void + */ + private function store($filename, $data) + { + file_put_contents($this->path.$filename.$this->suffix, $this->compress($data)); + } + /** * @param string $filename * @return void @@ -134,63 +129,6 @@ private function restore($filename) )); } - /** - * @param string $file - * @return string - */ - private function getSource($file) - { - $extension = pathinfo($file, PATHINFO_EXTENSION); - - if ($extension === 'zip') { - $zip = new ZipArchive; - $zip->open($file); - $contents = $zip->getFromIndex(0); - $zip->close(); - } else { - $contents = file_get_contents($file); - } - - return $contents; - } - - /** - * @param string $source - * @return array - */ - private function prepareSource($source) - { - $tricks = ['宜蘭縣壯圍鄉' => '263', '新竹縣寶山鄉' => '308', '臺南市新市區' => '744']; - $results = []; - $rules = preg_split('/\n|\r\n$/', $source); - foreach ($rules as $rule) { - if (! empty(trim($rule))) { - list($zipcode, $county, $district) = explode(',', $rule); - $zip3 = ! empty($tricks[$county.$district]) - ? $tricks[$county.$district] - : substr($zipcode, 0, 3); - $results[$county][$district][$zip3][] = $rule; - } - } - - return $results; - } - - /** - * @param array $ruleGroup - * @param Closure $callback - */ - private function each($ruleGroup, $callback) - { - foreach ($ruleGroup as $county => $districts) { - foreach ($districts as $district => $addresses) { - foreach ($addresses as $zipcode => $rule) { - $callback($zipcode, $county, $district, $rule); - } - } - } - } - /** * @param array $array * @return string @@ -208,14 +146,4 @@ private function decompress($compressed) { return unserialize(gzuncompress($compressed)); } - - /** - * @param string $filename - * @param array $data - * @return void - */ - private function store($filename, $data) - { - file_put_contents($this->path.$filename.$this->suffix, $this->compress($data)); - } } diff --git a/tests/Moskytw/DirectoryTest.php b/tests/Moskytw/DirectoryTest.php index ef27bb5..f9f8ba2 100644 --- a/tests/Moskytw/DirectoryTest.php +++ b/tests/Moskytw/DirectoryTest.php @@ -8,6 +8,7 @@ use Moskytw\Directory; use org\bovigo\vfs\vfsStream; use PHPUnit\Framework\TestCase; +use Recca0120\Twzipcode\Sources\Text; class DirectoryTest extends TestCase { @@ -19,7 +20,7 @@ protected function beforeEach() { $root = vfsStream::setup(); $this->directory = new Directory($root->url()); - $this->directory->load(' + $this->directory->load(new Text(' 10058,臺北市,中正區,八德路1段,全 10079,臺北市,中正區,三元街,單全 10070,臺北市,中正區,三元街,雙 48號以下 @@ -82,7 +83,7 @@ protected function beforeEach() 81357,高雄市,左營區,大順一路,單 91號至 95號 81357,高雄市,左營區,大順一路,雙 96號至 568號 81357,高雄市,左營區,大順一路,單 201號至 389巷 - '); + ')); } public function test_find() diff --git a/tests/RulesTest.php b/tests/RulesTest.php index 952851c..07faf3d 100644 --- a/tests/RulesTest.php +++ b/tests/RulesTest.php @@ -5,6 +5,7 @@ use org\bovigo\vfs\vfsStream; use PHPUnit\Framework\TestCase; use Recca0120\Twzipcode\Rules; +use Recca0120\Twzipcode\Sources\Text; use Recca0120\Twzipcode\Storages\File; class RulesTest extends TestCase @@ -16,7 +17,7 @@ protected function beforeEach() $root = vfsStream::setup(); $storage = new File($root->url()); $this->rules = new Rules($storage); - $storage->flush()->load(' + $storage->flush()->load(new Text(' 10058,臺北市,中正區,八德路1段,全 10079,臺北市,中正區,三元街,單全 10070,臺北市,中正區,三元街,雙 48號以下 @@ -79,7 +80,7 @@ protected function beforeEach() 81357,高雄市,左營區,大順一路,單 91號至 95號 81357,高雄市,左營區,大順一路,雙 96號至 568號 81357,高雄市,左營區,大順一路,單 201號至 389巷 - '); + ')); } public function testMatch() diff --git a/tests/Storages/FileTest.php b/tests/Storages/FileTest.php index 6c98448..7dfc3a6 100644 --- a/tests/Storages/FileTest.php +++ b/tests/Storages/FileTest.php @@ -7,6 +7,8 @@ use org\bovigo\vfs\vfsStream; use PHPUnit\Framework\TestCase; use Recca0120\Twzipcode\Address; +use Recca0120\Twzipcode\Sources\CSV; +use Recca0120\Twzipcode\Sources\Text; use Recca0120\Twzipcode\Storages\File as Storage; class FileTest extends TestCase @@ -19,7 +21,7 @@ protected function beforeEach() { $root = vfsStream::setup(); $this->storage = new Storage($root->url()); - $this->storage->flush()->load(' + $this->storage->flush()->load(new Text(' 10058,臺北市,中正區,八德路1段,全 10079,臺北市,中正區,三元街,單全 10070,臺北市,中正區,三元街,雙 48號以下 @@ -82,7 +84,7 @@ protected function beforeEach() 81357,高雄市,左營區,大順一路,單 91號至 95號 81357,高雄市,左營區,大順一路,雙 96號至 568號 81357,高雄市,左營區,大順一路,單 201號至 389巷 - '); + ')); } public function testDefaultPath() @@ -123,7 +125,7 @@ public function testLoadResources() Storage::$cached = ['zip3' => null, 'zip5' => null]; $root = vfsStream::setup(); $storage = new Storage($root->url()); - $storage->flush()->loadFile(__DIR__.'/../../resources/Zip32_utf8_10501_1.zip'); + $storage->flush()->load(new CSV(__DIR__.'/../../resources/Zip32_utf8_10501_1.zip')); $address = m::mock(Address::class); diff --git a/tests/ZipcodeTest.php b/tests/ZipcodeTest.php index 1b3aeae..d990aec 100644 --- a/tests/ZipcodeTest.php +++ b/tests/ZipcodeTest.php @@ -5,6 +5,7 @@ use org\bovigo\vfs\vfsStream; use PHPUnit\Framework\TestCase; use Recca0120\Twzipcode\Rules; +use Recca0120\Twzipcode\Sources\Text; use Recca0120\Twzipcode\Storages\File; use Recca0120\Twzipcode\Zipcode; @@ -17,7 +18,7 @@ protected function beforeEach() $root = vfsStream::setup(); $storage = new File($root->url()); $this->rules = new Rules($storage); - $storage->flush()->load(' + $storage->flush()->load(new Text(' 10043,臺北市,中正區,中華路1段,單 25之 3號以下 10042,臺北市,中正區,中華路1段,單 27號以上 10065,臺北市,中正區,中華路2段,單 79號以下 @@ -41,7 +42,7 @@ protected function beforeEach() 41271,臺中市,大里區,塗城路,單 507號以上 41274,臺中市,大里區,塗城路,雙 274號以下 41275,臺中市,大里區,塗城路,雙 276號以上 - '); + ')); } public function testZipcode()