Skip to content

Commit 0b79825

Browse files
committed
SNOW-1803811: Allow mixed-case field names for struct type columns
1 parent 7a994fa commit 0b79825

File tree

4 files changed

+20
-14
lines changed

4 files changed

+20
-14
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
- Added `save` method to `DataFrameWriter` to work in conjunction with `format`.
4545
- Added support to read keyword arguments to `options` method for `DataFrameReader` and `DataFrameWriter`.
4646
- Relaxed the cloudpickle dependency for Python 3.11 to simplify build requirements. However, for Python 3.11, `cloudpickle==2.2.1` remains the only supported version.
47+
- Added support for mixed case field names in struct type columns.
4748

4849
#### Bug Fixes
4950

src/snowflake/snowpark/_internal/type_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def convert_sp_to_sf_type(datatype: DataType) -> str:
292292
if isinstance(datatype, StructType):
293293
if datatype.structured:
294294
fields = ", ".join(
295-
f"{field.name} {convert_sp_to_sf_type(field.datatype)}"
295+
f"{field.raw_name} {convert_sp_to_sf_type(field.datatype)}"
296296
for field in datatype.fields
297297
)
298298
return f"OBJECT({fields})"

src/snowflake/snowpark/types.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ class ColumnIdentifier:
482482
"""Represents a column identifier."""
483483

484484
def __init__(self, normalized_name: str) -> None:
485+
self.raw_name = normalized_name
485486
self.normalized_name = quote_name(normalized_name)
486487
self._original_name = normalized_name
487488

@@ -566,6 +567,10 @@ def name(self) -> str:
566567
"""Returns the column name."""
567568
return self.column_identifier.name
568569

570+
@property
571+
def raw_name(self) -> str:
572+
return self.column_identifier.raw_name
573+
569574
@name.setter
570575
def name(self, n: str) -> None:
571576
self.column_identifier = ColumnIdentifier(n)

tests/integ/scala/test_datatype_suite.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
_STRUCTURE_DATAFRAME_QUERY = """
6161
select
6262
object_construct('k1', 1) :: map(varchar, int) as map,
63-
object_construct('A', 'foo', 'B', 0.05) :: object(A varchar, B float) as obj,
63+
object_construct('A', 'foo', 'b', 0.05) :: object(A varchar, b float) as obj,
6464
[1.0, 3.1, 4.5] :: array(float) as arr
6565
"""
6666

@@ -71,10 +71,10 @@ def _create_test_dataframe(s):
7171
object_construct(lit("k1"), lit(1))
7272
.cast(MapType(StringType(), IntegerType(), structured=True))
7373
.alias("map"),
74-
object_construct(lit("A"), lit("foo"), lit("B"), lit(0.05))
74+
object_construct(lit("A"), lit("foo"), lit("b"), lit(0.05))
7575
.cast(
7676
StructType(
77-
[StructField("A", StringType()), StructField("B", DoubleType())],
77+
[StructField("A", StringType()), StructField("b", DoubleType())],
7878
structured=True,
7979
)
8080
)
@@ -106,7 +106,7 @@ def _create_test_dataframe(s):
106106
StructType(
107107
[
108108
StructField("A", StringType(16777216), nullable=True),
109-
StructField("B", DoubleType(), nullable=True),
109+
StructField('"b"', DoubleType(), nullable=True),
110110
],
111111
structured=True,
112112
),
@@ -386,7 +386,7 @@ def test_structured_dtypes_select(structured_type_session, examples):
386386
flattened_df = df.select(
387387
df.map["k1"].alias("value1"),
388388
df.obj["A"].alias("a"),
389-
col("obj")["B"].alias("b"),
389+
col("obj")["b"].alias("b"),
390390
df.arr[0].alias("value2"),
391391
df.arr[1].alias("value3"),
392392
col("arr")[2].alias("value4"),
@@ -395,7 +395,7 @@ def test_structured_dtypes_select(structured_type_session, examples):
395395
[
396396
StructField("VALUE1", LongType(), nullable=True),
397397
StructField("A", StringType(16777216), nullable=True),
398-
StructField("B", DoubleType(), nullable=True),
398+
StructField("b", DoubleType(), nullable=True),
399399
StructField("VALUE2", DoubleType(), nullable=True),
400400
StructField("VALUE3", DoubleType(), nullable=True),
401401
StructField("VALUE4", DoubleType(), nullable=True),
@@ -424,12 +424,12 @@ def test_structured_dtypes_pandas(structured_type_session, structured_type_suppo
424424
if structured_type_support:
425425
assert (
426426
pdf.to_json()
427-
== '{"MAP":{"0":[["k1",1.0]]},"OBJ":{"0":{"A":"foo","B":0.05}},"ARR":{"0":[1.0,3.1,4.5]}}'
427+
== '{"MAP":{"0":[["k1",1.0]]},"OBJ":{"0":{"A":"foo","b":0.05}},"ARR":{"0":[1.0,3.1,4.5]}}'
428428
)
429429
else:
430430
assert (
431431
pdf.to_json()
432-
== '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"B\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}'
432+
== '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"b\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}'
433433
)
434434

435435

@@ -467,7 +467,7 @@ def test_structured_dtypes_iceberg(
467467
)
468468
assert save_ddl[0][0] == (
469469
f"create or replace ICEBERG TABLE {table_name.upper()} (\n\t"
470-
"MAP MAP(STRING, LONG),\n\tOBJ OBJECT(A STRING, B DOUBLE),\n\tARR ARRAY(DOUBLE)\n)\n "
470+
"MAP MAP(STRING, LONG),\n\tOBJ OBJECT(A STRING, b DOUBLE),\n\tARR ARRAY(DOUBLE)\n)\n "
471471
"EXTERNAL_VOLUME = 'PYTHON_CONNECTOR_ICEBERG_EXVOL'\n CATALOG = 'SNOWFLAKE'\n "
472472
"BASE_LOCATION = 'python_connector_merge_gate/';"
473473
)
@@ -733,8 +733,8 @@ def test_structured_dtypes_iceberg_create_from_values(
733733
_, __, expected_schema = STRUCTURED_TYPES_EXAMPLES[True]
734734
table_name = f"snowpark_structured_dtypes_{uuid.uuid4().hex[:5]}"
735735
data = [
736-
({"x": 1}, {"A": "a", "B": 1}, [1, 1, 1]),
737-
({"x": 2}, {"A": "b", "B": 2}, [2, 2, 2]),
736+
({"x": 1}, {"A": "a", "b": 1}, [1, 1, 1]),
737+
({"x": 2}, {"A": "b", "b": 2}, [2, 2, 2]),
738738
]
739739
try:
740740
create_df = structured_type_session.create_dataframe(
@@ -945,8 +945,8 @@ def test_structured_type_print_schema(
945945
" | |-- key: StringType()\n"
946946
" | |-- value: ArrayType\n"
947947
" | | |-- element: StructType\n"
948-
' | | | |-- "FIELD1": StringType() (nullable = True)\n'
949-
' | | | |-- "FIELD2": LongType() (nullable = True)\n'
948+
' | | | |-- "Field1": StringType() (nullable = True)\n'
949+
' | | | |-- "Field2": LongType() (nullable = True)\n'
950950
)
951951

952952
# Test that depth works as expected

0 commit comments

Comments
 (0)