Skip to content

Commit

Permalink
Source
Browse files Browse the repository at this point in the history
  • Loading branch information
recca0120 committed Nov 10, 2024
1 parent 5b05951 commit 5c1b791
Show file tree
Hide file tree
Showing 11 changed files with 196 additions and 125 deletions.
5 changes: 4 additions & 1 deletion resources/converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

include __DIR__.'/../vendor/autoload.php';

use Recca0120\Twzipcode\Sources\CSV;
use Recca0120\Twzipcode\Storages\File;

set_error_handler(static function ($severity, $message, $file, $line) {
Expand All @@ -14,6 +15,8 @@
// https://data.gov.tw/dataset/5948
$url = 'https://quality.data.gov.tw/dq_download_csv.php?nid=5948&md5_url=e1f6004ad33eb3ff3a824fb992a4b01a';

$contents = file_get_contents($url);

if (file_exists($file) === false) {
touch($file);
$contents = file_get_contents($url);
Expand All @@ -35,5 +38,5 @@
$zip->close();
}

(new File)->loadFile($file);
(new File)->load(new CSV($file));
echo 'benchmark: '.(microtime(true) - $start)."\n";
8 changes: 8 additions & 0 deletions src/Contracts/Source.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?php

namespace Recca0120\Twzipcode\Contracts;

interface Source
{
public function each(callable $callback);
}
16 changes: 5 additions & 11 deletions src/Contracts/Storage.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,15 @@ public function zip3(Address $address);
public function rules($zip3);

/**
* load.
*
* @param string $source
* @return $this
*/
public function load($source);

/**
* @param string|null $file
* @return $this
*/
public function loadFile($file = null);
public function flush();

/**
* load.
*
* @param Source $source
* @return $this
*/
public function flush();
public function load($source);
}
23 changes: 23 additions & 0 deletions src/Sources/CSV.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

namespace Recca0120\Twzipcode\Sources;

class CSV extends Source
{
/** @var string */
protected $file;

/** @var string */
private $extension;

public function __construct($file)
{
$this->file = $file;
$this->extension = pathinfo($this->file, PATHINFO_EXTENSION);
}

protected function getContents()
{
return $this->extension === 'zip' ? static::unzip($this->file) : file_get_contents($this->file);
}
}
84 changes: 84 additions & 0 deletions src/Sources/Source.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?php

namespace Recca0120\Twzipcode\Sources;

use Recca0120\Twzipcode\Contracts\Source as SourceContract;
use ZipArchive;

abstract class Source implements SourceContract
{
private static $tricks = [
'宜蘭縣壯圍鄉' => '263',
'新竹縣寶山鄉' => '308',
'臺南市新市區' => '744',
];

public function each(callable $callback)
{
static::eachGroup(static::prepare($this->rows()), $callback);
}

/**
* @return string
*/
abstract protected function getContents();

/**
* @return array{array{zipcode: string, county: string, district: string, text: string}} $rows
*/
protected function rows()
{
$lines = preg_split('/\n|\r\n$/', $this->getContents());
$lines = array_filter($lines, static function ($line) {
return ! empty(trim($line));
});

return array_map(static function ($line) {
$data = explode(',', $line);

return ['zipcode' => $data[0], 'county' => $data[1], 'district' => $data[2], 'text' => $line];
}, $lines);
}

/**
* @param array{array{zipcode: string, county: string, district: string, text: string}} $rows
* @return array
*/
protected static function prepare($rows)
{
return array_reduce($rows, static function ($results, $row) {
$zip3 = ! empty(self::$tricks[$row['county'].$row['district']])
? self::$tricks[$row['county'].$row['district']]
: substr($row['zipcode'], 0, 3);

$results[$row['county']][$row['district']][$zip3][] = $row['text'];

return $results;
}, []);
}

protected static function eachGroup($ruleGroup, $callback)
{
foreach ($ruleGroup as $county => $districts) {
foreach ($districts as $district => $addresses) {
foreach ($addresses as $zipcode => $rule) {
$callback($zipcode, $county, $district, $rule);
}
}
}
}

/**
* @param string $file
* @return string
*/
protected static function unzip($file)
{
$zip = new ZipArchive;
$zip->open($file);
$contents = $zip->getFromIndex(0);
$zip->close();

return $contents;
}
}
24 changes: 24 additions & 0 deletions src/Sources/Text.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

namespace Recca0120\Twzipcode\Sources;

class Text extends Source
{
/**
* @var string
*/
private $text;

/**
* @param string $text
*/
public function __construct($text)
{
$this->text = $text;
}

protected function getContents()
{
return $this->text;
}
}
138 changes: 34 additions & 104 deletions src/Storages/File.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@

namespace Recca0120\Twzipcode\Storages;

use Closure;
use Recca0120\Lodash\JArray;
use Recca0120\Lodash\JString;
use Recca0120\Twzipcode\Address;
use Recca0120\Twzipcode\Contracts\Source;
use Recca0120\Twzipcode\Contracts\Storage;
use Recca0120\Twzipcode\Rule;
use ZipArchive;

class File implements Storage
{
Expand Down Expand Up @@ -50,36 +49,6 @@ public function zip3(Address $address)
return null;
}

/**
* @param string $source
* @return $this
*/
public function load($source)
{
$zip5 = [];
$zip3 = [];
$this->each(
$this->prepareSource($source),
function ($zipcode, $county, $district, $rules) use (&$zip5, &$zip3) {
$zip5[$zipcode] = $this->compress(array_map(static function ($rule) {
return new Rule($rule);
}, $rules));

if (empty($zip3[$county])) {
$zip3[$county] = substr($zipcode, 0, 1);
}

if (empty($zip3[$county.$district])) {
$zip3[$county.$district] = substr($zipcode, 0, 3);
}
});

$this->store('zip3', $zip3);
$this->store('zip5', $zip5);

return $this;
}

/**
* @param string $zip3
* @return JArray
Expand All @@ -94,27 +63,53 @@ public function rules($zip3)
}

/**
* @param string $file
* @return $this
*/
public function loadFile($file = null)
public function flush()
{
$file = $file ?: $this->path.'../Zip32_utf8_10501_1.csv';
$this->load($this->getSource($file));
static::$cached = ['zip3' => null, 'zip5' => null];

return $this;
}

/**
* @param Source $source
* @return $this
*/
public function flush()
public function load($source)
{
static::$cached = ['zip3' => null, 'zip5' => null];
$zip5 = [];
$zip3 = [];
$source->each(function ($zipcode, $county, $district, $rules) use (&$zip5, &$zip3) {
$zip5[$zipcode] = $this->compress(array_map(static function ($rule) {
return new Rule($rule);
}, $rules));

if (empty($zip3[$county])) {
$zip3[$county] = substr($zipcode, 0, 1);
}

if (empty($zip3[$county.$district])) {
$zip3[$county.$district] = substr($zipcode, 0, 3);
}
});

$this->store('zip3', $zip3);
$this->store('zip5', $zip5);

return $this;
}

/**
* @param string $filename
* @param array $data
* @return void
*/
private function store($filename, $data)
{
file_put_contents($this->path.$filename.$this->suffix, $this->compress($data));
}

/**
* @param string $filename
* @return void
Expand All @@ -134,63 +129,6 @@ private function restore($filename)
));
}

/**
* @param string $file
* @return string
*/
private function getSource($file)
{
$extension = pathinfo($file, PATHINFO_EXTENSION);

if ($extension === 'zip') {
$zip = new ZipArchive;
$zip->open($file);
$contents = $zip->getFromIndex(0);
$zip->close();
} else {
$contents = file_get_contents($file);
}

return $contents;
}

/**
* @param string $source
* @return array
*/
private function prepareSource($source)
{
$tricks = ['宜蘭縣壯圍鄉' => '263', '新竹縣寶山鄉' => '308', '臺南市新市區' => '744'];
$results = [];
$rules = preg_split('/\n|\r\n$/', $source);
foreach ($rules as $rule) {
if (! empty(trim($rule))) {
list($zipcode, $county, $district) = explode(',', $rule);
$zip3 = ! empty($tricks[$county.$district])
? $tricks[$county.$district]
: substr($zipcode, 0, 3);
$results[$county][$district][$zip3][] = $rule;
}
}

return $results;
}

/**
* @param array $ruleGroup
* @param Closure $callback
*/
private function each($ruleGroup, $callback)
{
foreach ($ruleGroup as $county => $districts) {
foreach ($districts as $district => $addresses) {
foreach ($addresses as $zipcode => $rule) {
$callback($zipcode, $county, $district, $rule);
}
}
}
}

/**
* @param array $array
* @return string
Expand All @@ -209,13 +147,5 @@ private function decompress($compressed)
return unserialize(gzuncompress($compressed));
}

/**
* @param string $filename
* @param array $data
* @return void
*/
private function store($filename, $data)
{
file_put_contents($this->path.$filename.$this->suffix, $this->compress($data));
}

}
Loading

0 comments on commit 5c1b791

Please sign in to comment.