Skip to content

Commit 9f2c96e

Browse files
authored
Merge pull request #141 from feyst/master
Add functionality to parse source as specified encoding and get result as specified encoding
2 parents 8e0197a + eeadc31 commit 9f2c96e

File tree

8 files changed

+93
-11
lines changed

8 files changed

+93
-11
lines changed

.github/workflows/ci.yml

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
pull_request:
6+
types:
7+
- opened
8+
- synchronize
9+
10+
jobs:
11+
build-test:
12+
runs-on: ubuntu-latest
13+
14+
steps:
15+
- uses: actions/checkout@v3
16+
- name: Cache Composer dependencies
17+
uses: actions/cache@v3
18+
with:
19+
path: /tmp/composer-cache
20+
key: ${{ runner.os }}-${{ hashFiles('**/composer.lock') }}
21+
- uses: php-actions/composer@v6
22+
- name: PHPUnit tests
23+
run: ./vendor/bin/phpunit
24+

phpunit.xml

-3
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@
22
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
33
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.3/phpunit.xsd"
44
bootstrap="tests/bootstrap.php"
5-
cacheDirectory=".phpunit.cache"
65
executionOrder="depends,defects"
7-
requireCoverageMetadata="false"
8-
beStrictAboutCoverageMetadata="false"
96
beStrictAboutOutputDuringTests="true"
107
failOnRisky="true"
118
failOnWarning="true">

src/EDI/Interpreter.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,7 @@ private function processXmlSegment(\SimpleXMLElement $elm, array &$message, int
631631
} else {
632632
if (! $segmentVisited && isset($elm['required'])) {
633633
$segmentVisited = true;
634-
if (\call_user_func($this->comparisonFunction, $message[$segmentIdx+1], $elm)) {
634+
if (isset($message[$segmentIdx+1]) && \call_user_func($this->comparisonFunction, $message[$segmentIdx+1], $elm)) {
635635
$errors[] = [
636636
'text' => $this->messageTextConf['SPURIOUSSEGMENT'].($this->patchFiles ? ' (skipped)' : ''),
637637
'position' => $segmentIdx,

src/EDI/Parser.php

+33-4
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ class Parser
100100
*/
101101
private $messageNumber;
102102

103+
private ?string $sourceEncoding = null;
104+
103105
/**
104106
* @var array<string,string>
105107
*/
@@ -141,8 +143,13 @@ class Parser
141143
*/
142144
public function parse(): self
143145
{
146+
$rawSegments = $this->getRawSegments();
147+
if ($this->sourceEncoding && isset(self::$charsets[$this->syntaxID]) && self::$charsets[$this->syntaxID] !== $this->sourceEncoding) {
148+
$rawSegments = $this->convertEncoding($this->rawSegments, $this->sourceEncoding, self::$charsets[$this->syntaxID]);
149+
}
150+
144151
$i = 0;
145-
foreach ($this->getRawSegments() as $line) {
152+
foreach ($rawSegments as $line) {
146153
$i++;
147154

148155
// Null byte and carriage return removal. (CR+LF)
@@ -310,16 +317,38 @@ public function setStrict(bool $strict):void
310317
$this->strict = $strict;
311318
}
312319

320+
public function setSourceEncoding(string $sourceEncoding): void
321+
{
322+
$this->sourceEncoding = $sourceEncoding;
323+
}
324+
313325
/**
314326
* Get parsed lines/segments.
315327
*/
316-
public function get(): array
328+
public function get(?string $encoding = null): array
317329
{
318330
if (empty($this->parsedfile)) {
319331
$this->parse();
320332
}
321-
322-
return $this->parsedfile;
333+
334+
if (null === $encoding) {
335+
return $this->parsedfile;
336+
}
337+
338+
return $this->convertEncoding($this->parsedfile, self::$charsets[$this->syntaxID], $encoding);
339+
}
340+
341+
private function convertEncoding($data, string $from, string $to)
342+
{
343+
if (is_array($data)) {
344+
foreach ($data as $k => $v) {
345+
$data[$k] = $this->convertEncoding($v, $from, $to);
346+
}
347+
} elseif (is_string($data)) {
348+
$data = function_exists('iconv') ? iconv($from, $to . '//TRANSLIT', $data) : mb_convert_encoding($data, $to, $from);
349+
}
350+
351+
return $data;
323352
}
324353

325354
/**

tests/EDITest/InterpreterTest.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ public function testDESADV()
9191
'JSON does not match expected output'
9292
);
9393

94-
static::assertSame(3598, \strlen($interpreter->getJson()));
95-
static::assertSame(9383, \strlen($interpreter->getJson(true)));
94+
static::assertSame(3594, \strlen($interpreter->getJson()));
95+
static::assertSame(9379, \strlen($interpreter->getJson(true)));
9696

9797
static::assertCount(2, $interpreter->getMessages());
9898
static::assertCount(0, $interpreter->getErrors());

tests/EDITest/ParserTest.php

+9
Original file line numberDiff line numberDiff line change
@@ -291,4 +291,13 @@ public function testReleaseCharacter()
291291

292292
static::assertSame($loaded[15][2], 'FIELD 1.1?:FIELD 1.2');
293293
}
294+
295+
public function testUtf8EncodedSourceAndOutput()
296+
{
297+
$p = new Parser();
298+
$p->load(__DIR__ . '/../files/example_utf8.edi');
299+
$p->setSourceEncoding('UTF-8');
300+
$loaded = $p->get('UTF-8');
301+
static::assertSame($loaded[11][3][3], 'MUNCIË THE MIDDLE');
302+
}
294303
}

tests/files/D96ADESADV.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@
227227
"messageTrailer": {
228228
"segmentIdx": 20,
229229
"segmentCode": "UNT",
230-
"segmentGroup": "SG16",
230+
"segmentGroup": "",
231231
"numberOfSegmentsInTheMessage": "21",
232232
"messageReferenceNumber": "142"
233233
}

tests/files/example_utf8.edi

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
UNB+UNOC:1+1556150:31B+8888888:ZZ+160727:0953+1'
2+
UNH+142+DESADV:0:96A:UN'
3+
BGM+351+Y02197250+700101'
4+
DTM+11:20160726:102'
5+
RFF+ON:6877871'
6+
RFF+PK:VEE0214439'
7+
NAD+SU+1694901+31B'
8+
NAD+BY+01131116+31B'
9+
CPS+IE2156580387'
10+
LIN+001'
11+
PIA+5+9780738507330'
12+
IMD+F+81+:::MUNCIË THE MIDDLE'
13+
QTY+12:1'
14+
RFF+LI:6877871'
15+
RFF+ON:6877871'
16+
LIN+002'
17+
PIA+5+9781568361871'
18+
IMD+F+81+:::ROADS TO SATA A 2'
19+
QTY+12:1'
20+
RFF+LI:6905456'
21+
RFF+ON:6905456'
22+
UNT+21+142'
23+
UNZ+1+1'

0 commit comments

Comments
 (0)