Skip from_json overflow tests for [databricks] 14.3

Fixes #11533. This commit addresses the test failures reported in #11533, for the following tests: - `json_matrix_test.py::test_from_json_long_structs()` - `json_matrix_test.py::test_scan_json_long_structs()` These failures are a result of #11711. When the JSON parser attempts to read integral struct members from a JSON file, if the parsing leads to an overflow, then the `STRUCT` column value is deemed null on Databricks 14.3 (i.e. *without* `spark-rapids` active). This behaviour differs from that exhibited by Apache Spark versions exceeding 3.4.1. This commit breaks out the problematic JSON test rows into a separate file, whose read is tested in an `xfail` for Databricks 14.3. The remaining rows are tested on all versions. The true fix for #11711 will be addressed later. Signed-off-by: MithunR <[email protected]>
NVIDIA · Nov 12, 2024 · 89fbe4f · 89fbe4f
1 parent 862dab0
commit 89fbe4f
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 3 deletions.
diff --git a/integration_tests/src/main/python/json_matrix_test.py b/integration_tests/src/main/python/json_matrix_test.py
@@ -1257,7 +1257,11 @@ def test_from_json_string_arrays(std_input_path, input_file):
     "single_quoted_strings.json",
     "boolean_formatted.json",
     "int_array_formatted.json",
-    pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
+    "int_struct_formatted.json",
+    pytest.param("int_struct_formatted_overflows.json", marks=pytest.mark.xfail(
+        condition=is_before_spark_342() or is_databricks_version_or_later(14, 3),
+        reason='Before Spark 3.4.2? https://github.com/NVIDIA/spark-rapids/issues/10588. '
+               'Databricks 14.3 or later? https://github.com/NVIDIA/spark-rapids/issues/11711.')),
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1286,7 +1290,11 @@ def test_scan_json_long_structs(std_input_path, read_func, spark_tmp_table_facto
     "single_quoted_strings.json",
     "boolean_formatted.json",
     "int_array_formatted.json",
-    pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
+    "int_struct_formatted.json",
+    pytest.param("int_struct_formatted_overflows.json", marks=pytest.mark.xfail(
+        condition=is_before_spark_342() or is_databricks_version_or_later(14, 3),
+        reason='Before Spark 3.4.2? https://github.com/NVIDIA/spark-rapids/issues/10588. '
+               'Databricks 14.3 or later? https://github.com/NVIDIA/spark-rapids/issues/11711.')),
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
     "bad_whitespace.json",
     "escaped_strings.json",

diff --git a/integration_tests/src/test/resources/int_struct_formatted.json b/integration_tests/src/test/resources/int_struct_formatted.json
@@ -2,4 +2,3 @@
 {"data": {"A": 1}}
 {"data": {"B": 50}}
 {"data": {"B": -128, "A": 127}}
-{"data": {"B": 99999999999999999999, "A": -9999999999999999999}}
diff --git a/integration_tests/src/test/resources/int_struct_formatted_overflows.json b/integration_tests/src/test/resources/int_struct_formatted_overflows.json
@@ -0,0 +1,2 @@
+{"data": {"B": 99999999999999999999, "A": -9999999999999999999}}
+{"data": {"A": 0, "B": "0"}}