Skip to content

Commit 2f9309a

Browse files
committed
Add CP1144 code page (Italy) which is same as CP280 with 5A is replaced with the "€" (euro) character.
1 parent e690708 commit 2f9309a

File tree

6 files changed

+86
-1
lines changed

6 files changed

+86
-1
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,7 @@ The output looks like this:
16441644
| .option("ebcdic_code_page", "cp1141") | EBCDIC 1141 | Same as code page 273 with € at the position of the international currency symbol ¤. |
16451645
| .option("ebcdic_code_page", "cp1142") | EBCDIC 1142 | Same as code page 277 with € at the position of the international currency symbol ¤. |
16461646
| .option("ebcdic_code_page", "cp1143") | EBCDIC 1143 | Same as code page 278 with € at the position of the international currency symbol ¤. |
1647+
| .option("ebcdic_code_page", "cp1144") | EBCDIC 1144 | Same as code page 280 with € at the position of the international currency symbol ¤. |
16471648
| .option("ebcdic_code_page", "cp1145") | EBCDIC 1145 | Same as code page 284 with € at the position of the international currency symbol ¤. |
16481649
| .option("ebcdic_code_page", "cp1148") | EBCDIC 1148 | Same as code page 500 with € at the position of the international currency symbol ¤. |
16491650
| .option("ebcdic_code_page", "cp1364") | EBCDIC 1364 | Double-byte code page CCSID-1364, Korean. |

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ object CodePage extends Logging {
6565
case "cp1141" => new CodePage1141
6666
case "cp1142" => new CodePage1142
6767
case "cp1143" => new CodePage1143
68+
case "cp1144" => new CodePage1144
6869
case "cp1145" => new CodePage1145
6970
case "cp1146" => new CodePage1146
7071
case "cp1148" => new CodePage1148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Copyright 2018 ABSA Group Limited
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package za.co.absa.cobrix.cobol.parser.encoding.codepage
18+
19+
/**
20+
* EBCDIC code page 1144. Italy.
21+
*
22+
* It corresponds to code page 280 and only differs from it in position 9F, where the euro sign € is located instead
23+
* of the international currency symbol ¤.
24+
*/
25+
class CodePage1144 extends SingleByteCodePage(CodePage1144.ebcdicToAsciiMapping) {
26+
override def codePageShortName: String = "cp1144"
27+
}
28+
29+
object CodePage1144 {
30+
val ebcdicToAsciiMapping: Array[Char] = {
31+
import EbcdicNonPrintable._
32+
33+
/* This is the EBCDIC Code Page 1144 to ASCII conversion table
34+
from https://en.wikibooks.org/wiki/Character_Encodings/Code_Tables/EBCDIC/EBCDIC_280 */
35+
val ebcdic2ascii: Array[Char] = {
36+
// Non-printable characters map used: http://www.pacsys.com/asciitab.htm
37+
Array[Char](
38+
c00, c01, c02, c03, spc, c09, spc, del, spc, spc, spc, c0b, c0c, ccr, c0e, c0f, // 0 - 15
39+
c10, c11, c12, c13, spc, nel, c08, spc, c18, c19, spc, spc, c1c, c1d, c1e, c1f, // 16 - 31
40+
spc, spc, spc, spc, spc, clf, c17, c1b, spc, spc, spc, spc, spc, c05, c06, c07, // 32 - 47
41+
spc, spc, c16, spc, spc, spc, spc, c04, spc, spc, spc, spc, c14, c15, spc, c1a, // 48 - 63
42+
' ', rsp, 'â', 'ä', '{', 'á', 'ã', 'å', bsh, 'ñ', '°', '.', '<', '(', '+', '!', // 64 - 79
43+
'&', ']', 'ê', 'ë', '}', 'í', 'î', 'ï', '~', 'ß', 'é', '$', '*', ')', ';', '^', // 80 - 95
44+
'-', '/', 'Â', 'Ä', 'À', 'Á', 'Ã', 'Å', 'Ç', 'Ñ', 'ò', ',', '%', '_', '>', '?', // 96 - 111
45+
'ø', 'É', 'Ê', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'ù', ':', '£', '§', qts, '=', qtd, // 112 - 127
46+
'Ø', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', '«', '»', 'ð', 'ý', 'þ', '±', // 128 - 143
47+
'[', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'ª', 'º', 'æ', '¸', 'Æ', '€', // 144 - 159
48+
'µ', 'ì', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¡', '¿', 'Ð', 'Ý', 'Þ', '®', // 160 - 175
49+
'¢', '#', '¥', '·', '©', '@', '¶', '¼', '½', '¾', '¬', '|', '¯', '¨', '´', '×', // 176 - 191
50+
'à', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', shy, 'ô', 'ö', '¦', 'ó', 'õ', // 192 - 207
51+
'è', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', '¹', 'û', 'ü', '`', 'ú', 'ÿ', // 208 - 223
52+
'ç', '÷', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '²', 'Ô', 'Ö', 'Ò', 'Ó', 'Õ', // 224 - 239
53+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', 'Û', 'Ü', 'Ù', 'Ú', spc) // 240 - 255
54+
}
55+
ebcdic2ascii
56+
}
57+
}

cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala

+22
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,28 @@ class StringDecodersSpec extends AnyWordSpec {
329329
assert(actual == expected)
330330
}
331331

332+
"decode a CP1144 string example" in {
333+
val expected = "âä{áãå\\ñ°.<(+!&]êë}íîï~ßé$*);^-/ÂÄÀÁÃÅÇÑò,%_>?øÉÊËÈÍÎÏÌù:£§'=\"Øabcdefghi«»ðýþ±[jklmnopqrªºæ¸Æ€µìstuvwxyz¡¿ÐÝÞ®¢#¥·©@¶¼½¾¬|¯¨´×àABCDEFGHI\u00ADôö¦óõèJKLMNOPQR¹ûü`úÿç÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ"
334+
val bytes = Array(
335+
0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
336+
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
337+
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
338+
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
339+
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
340+
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
341+
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
342+
0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
343+
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
344+
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
345+
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
346+
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
347+
).map(_.toByte)
348+
349+
val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1144, improvedNullDetection = false)
350+
351+
assert(actual == expected)
352+
}
353+
332354
"decode a CP1145 string special characters" in {
333355
val expected = " äÑ|üܬ§ñ]ߢ[Ö{}æö¨å@ÆØÅÄÉ€ "
334356
val bytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,

cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageSingleByteSpec.scala

+5
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ class CodePageSingleByteSpec extends AnyFunSuite {
142142
assert(codePage.codePageShortName == "cp1143")
143143
}
144144

145+
test("Ensure codepage 'cp1144' gives the associated CodePage") {
146+
val codePage = CodePage.getCodePageByName("cp1144")
147+
assert(codePage.codePageShortName == "cp1144")
148+
}
149+
145150
test("Ensure codepage 'cp1145' gives the associated CodePage") {
146151
val codePage = CodePage.getCodePageByName("cp1145")
147152
assert(codePage.codePageShortName == "cp1145")

spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/index/IndexBuilder.scala

-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ private[source] object IndexBuilder extends Logging {
5656
buildIndexForVarLenReader(filesList, reader, sqlContext)
5757
case _ =>
5858
buildIndexForFullFiles(filesList, sqlContext)
59-
case _ => null
6059
}
6160
}
6261

0 commit comments

Comments
 (0)