Skip to content

Commit 1074bbc

Browse files
committed
#25 Add metadata for numeric fields exposed as strings.
1 parent 6f27ea7 commit 1074bbc

File tree

4 files changed

+18
-1
lines changed

4 files changed

+18
-1
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@
110110
<!-- Frameworks and libraries -->
111111
<scala.version>2.12.20</scala.version>
112112
<scala.compat.version>2.12</scala.compat.version>
113-
<spark.version>3.4.4</spark.version>
113+
<spark.version>3.5.2</spark.version>
114114
<scalatest.version>3.2.14</scalatest.version>
115115
<specs.version>2.4.16</specs.version>
116116
<guava.version>15.0</guava.version>

spark-cobol/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@
5757
<artifactId>cobol-parser_${scala.compat.version}</artifactId>
5858
<version>${project.version}</version>
5959
</dependency>
60+
61+
<dependency>
62+
<groupId>org.slf4j</groupId>
63+
<artifactId>slf4j-log4j12</artifactId>
64+
<scope>test</scope>
65+
</dependency>
6066
</dependencies>
6167

6268
<build>

spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ class CobolSchema(copybook: Copybook,
192192
case _ => StringType
193193
}
194194
case dt: Integral if isDisplayAlwaysString =>
195+
if (metadataPolicy != MetadataPolicy.NoMetadata)
196+
addIntegralStringMetadata(metadata, dt)
195197
StringType
196198
case dt: Integral if strictIntegralPrecision =>
197199
DecimalType(precision = dt.precision, scale = 0)
@@ -232,6 +234,11 @@ class CobolSchema(copybook: Copybook,
232234
metadataBuilder.putLong(MAX_LENGTH, a.length)
233235
}
234236

237+
private def addIntegralStringMetadata(metadataBuilder: MetadataBuilder, i: Integral): MetadataBuilder = {
238+
val maxLength = if (i.signPosition.isDefined) i.precision + 1 else i.precision
239+
metadataBuilder.putLong(MAX_LENGTH, maxLength)
240+
}
241+
235242
private def addExtendedMetadata(metadataBuilder: MetadataBuilder, s: Statement): MetadataBuilder = {
236243
metadataBuilder.putLong("level", s.level)
237244
if (s.originalName.nonEmpty && s.originalName != s.name)

spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/Test01DisplayPicAsStrings.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package za.co.absa.cobrix.spark.cobol.source
1818

1919
import org.scalatest.funsuite.AnyFunSuite
2020
import org.slf4j.{Logger, LoggerFactory}
21+
import za.co.absa.cobrix.spark.cobol.parameters.MetadataFields.MAX_LENGTH
2122
import za.co.absa.cobrix.spark.cobol.source.base.{SimpleComparisonBase, SparkTestBase}
2223
import za.co.absa.cobrix.spark.cobol.source.fixtures.BinaryFileFixture
2324
import za.co.absa.cobrix.spark.cobol.utils.SparkUtils
@@ -120,6 +121,9 @@ class Test01DisplayPicAsStrings extends AnyFunSuite with SparkTestBase with Bina
120121
val actualSchema = df.schema.treeString
121122
val actualData = SparkUtils.prettyJSON(df.toJSON.collect().mkString("[", ",", "]"))
122123

124+
assert(df.schema.fields.head.metadata.getLong(MAX_LENGTH) == 4)
125+
assert(df.schema.fields(1).metadata.getLong(MAX_LENGTH) == 5)
126+
123127
assertEqualsMultiline(actualSchema, expectedSchema)
124128
assertEqualsMultiline(actualData, expectedData)
125129
}

0 commit comments

Comments
 (0)