Skip to content

Commit 41cad6f

Browse files
committed
Add CP1160 code page (Thai) which is same as CP838 with FE is replaced with the "€" (euro) character.
1 parent 4814eb1 commit 41cad6f

File tree

6 files changed

+101
-6
lines changed

6 files changed

+101
-6
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -1648,6 +1648,7 @@ The output looks like this:
16481648
| .option("ebcdic_code_page", "cp1145") | EBCDIC 1145 | Same as code page 284 with € at the position of the international currency symbol ¤. |
16491649
| .option("ebcdic_code_page", "cp1147") | EBCDIC 1147 | Same as code page 297 with € at the position of the international currency symbol ¤. |
16501650
| .option("ebcdic_code_page", "cp1148") | EBCDIC 1148 | Same as code page 500 with € at the position of the international currency symbol ¤. |
1651+
| .option("ebcdic_code_page", "cp1160") | EBCDIC 1160 | Same as code page 838 with € at the position 0xFE. |
16511652
| .option("ebcdic_code_page", "cp1364") | EBCDIC 1364 | Double-byte code page CCSID-1364, Korean. |
16521653
| .option("ebcdic_code_page", "cp1388") | EBCDIC 1388 | Double-byte code page CCSID-1388, Simplified Chinese. |
16531654

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ object CodePage extends Logging {
7070
case "cp1146" => new CodePage1146
7171
case "cp1147" => new CodePage1147
7272
case "cp1148" => new CodePage1148
73+
case "cp1160" => new CodePage1160
7374
case "cp1364" => new CodePage1364
7475
case "cp1388" => new CodePage1388
7576
case codePage => throw new IllegalArgumentException(s"The code page '$codePage' is not one of the builtin EBCDIC code pages.")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Copyright 2018 ABSA Group Limited
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package za.co.absa.cobrix.cobol.parser.encoding.codepage
18+
19+
/**
20+
* EBCDIC code page 1160 with support for Thai script used in IBM mainframes which is same as 838
21+
* with € at the position 0xFE.
22+
*/
23+
class CodePage1160 extends SingleByteCodePage(CodePage1160.ebcdicToAsciiMapping) {
24+
override def codePageShortName: String = "cp1160"
25+
}
26+
27+
object CodePage1160 {
28+
val ebcdicToAsciiMapping: Array[Char] = {
29+
import EbcdicNonPrintable._
30+
31+
/* This is the EBCDIC Code Page 1160 to ASCII conversion table
32+
from https://en.wikibooks.org/wiki/Character_Encodings/Code_Tables/EBCDIC/EBCDIC_838 */
33+
val ebcdic2ascii: Array[Char] = {
34+
val c01 = '\u0E48'
35+
val c02 = '\u0E4E'
36+
val c03 = '\u0E31'
37+
val c04 = '\u0E34'
38+
val c05 = '\u0E49'
39+
val c06 = '\u0E35'
40+
val c07 = '\u0E36'
41+
val c08 = '\u0E37'
42+
val c09 = '\u0E38'
43+
val c10 = '\u0E39'
44+
val c11 = '\u0E3A'
45+
val c12 = '\u0E47'
46+
val c13 = '\u0E48'
47+
val c14 = '\u0E49'
48+
val c15 = '\u0E4A'
49+
val c16 = '\u0E4B'
50+
val c18 = '\u0E4D'
51+
52+
Array[Char](
53+
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, ccr, spc, spc, // 0 - 15
54+
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 16 - 31
55+
spc, spc, spc, spc, spc, clf, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 32 - 47
56+
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 48 - 63
57+
spc, spc, 'ก', 'ข', 'ฃ', 'ค', 'ฅ', 'ฆ', 'ง', '[', '¢', '.', '<', '(', '+', '|', // 64 - 79
58+
'&', c01, 'จ', 'ฉ', 'ช', 'ซ', 'ฌ', 'ญ', 'ฎ', ']', '!', '$', '*', ')', ';', '¬', // 80 - 95
59+
'-', '/', 'ฏ', 'ฐ', 'ฑ', 'ฒ', 'ณ', 'ด', 'ต', '^', '¦', ',', '%', '_', '>', '?', // 96 - 111
60+
'฿', c02, 'ถ', 'ท', 'ธ', 'น', 'บ', 'ป', 'ผ', '`', ':', '#', '@', qts, '=', qtd, // 112 - 127
61+
'๏', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'ฝ', 'พ', 'ฟ', 'ภ', 'ม', 'ย', // 128 - 143
62+
'๚', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'ร', 'ฤ', 'ล', 'ฦ', 'ว', 'ศ', // 144 - 159
63+
'๛', '~', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ษ', 'ส', 'ห', 'ฬ', 'อ', 'ฮ', // 160 - 175
64+
'๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', '๘', '๙', 'ฯ', 'ะ', c03, 'า', 'ำ', c04, // 176 - 191
65+
'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', c05, c06, c07, c08, c09, c10, // 192 - 207
66+
'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', c11, 'เ', 'แ', 'โ', 'ใ', 'ไ', // 208 - 223
67+
bsh, c15, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'ๅ', 'ๆ', c12, c13, c14, c15, // 224 - 239
68+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', c16, c17, c18, c16, '€', spc) // 240 - 255
69+
}
70+
ebcdic2ascii
71+
}
72+
}

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage838.scala

+2-6
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,11 @@ class CodePage838 extends SingleByteCodePage(CodePage838.ebcdicToAsciiMapping) {
2525

2626
object CodePage838 {
2727
val ebcdicToAsciiMapping: Array[Char] = {
28+
import EbcdicNonPrintable._
29+
2830
/* This is the EBCDIC Code Page 838 to ASCII conversion table with non-printable characters mapping
2931
from https://en.everybodywiki.com/EBCDIC_838 */
3032
val ebcdic2ascii: Array[Char] = {
31-
val clf = '\r'
32-
val ccr = '\n'
33-
val spc = ' '
34-
val qts = '\''
35-
val qtd = '\"'
36-
val bsh = '\\'
3733
val c01 = '\u0E48'
3834
val c02 = '\u0E4E'
3935
val c03 = '\u0E31'

cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala

+20
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,16 @@ class StringDecodersSpec extends AnyWordSpec {
241241
assert(actual == expected)
242242
}
243243

244+
"decode a CP838 string special characters" in {
245+
val expected = " ¢$~๐๑ฯัข#|แํ¬๕¦!]๐¢็{}ลึ~ฆ@ว๏ดฐ "
246+
val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,
247+
0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x40).map(_.toByte)
248+
249+
val actual = decodeEbcdicString(bytes, KeepAll, new CodePage838, improvedNullDetection = false)
250+
251+
assert(actual == expected)
252+
}
253+
244254
"decode a CP1140 string special characters" in {
245255
val expected = "âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);¬-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µ~stuvwxyz¡¿ÐÝÞ®^£¥·©§¶¼½¾[]¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ"
246256
val bytes = Array(
@@ -405,6 +415,16 @@ class StringDecodersSpec extends AnyWordSpec {
405415

406416
assert(actual == expected)
407417
}
418+
419+
"decode a CP1160 string special characters" in {
420+
val expected = " ¢$~๐๑ฯัข#|แํ¬๕¦!]๐¢็{}ลึ~ฆ@ว๏ดฐ€ "
421+
val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,
422+
0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0xFE, 0x40).map(_.toByte)
423+
424+
val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1160, improvedNullDetection = false)
425+
426+
assert(actual == expected)
427+
}
408428
}
409429
}
410430

cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageSingleByteSpec.scala

+5
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,11 @@ class CodePageSingleByteSpec extends AnyFunSuite {
167167
assert(codePage.codePageShortName == "cp1148")
168168
}
169169

170+
test("Ensure codepage 'cp1160' gives the associated CodePage") {
171+
val codePage = CodePage.getCodePageByName("cp1160")
172+
assert(codePage.codePageShortName == "cp1160")
173+
}
174+
170175
test("Ensure codepage 'cp1364' gives the associated CodePage") {
171176
val codePage = CodePage.getCodePageByName("cp1364")
172177
assert(codePage.codePageShortName == "cp1364")

0 commit comments

Comments
 (0)