From d2649c3e90b64d207a29bcd9a78244d88c0eb656 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Thu, 4 Dec 2025 15:56:21 -0800 Subject: [PATCH 01/14] add private var in numeric type and test --- src/snowflake/snowpark/_internal/type_utils.py | 2 +- src/snowflake/snowpark/types.py | 4 +++- tests/integ/test_datatypes.py | 13 +++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 926a0d97b0..6adde546d3 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -307,7 +307,7 @@ def convert_sf_to_sp_type( if column_type_name == "REAL": return DoubleType() if (column_type_name == "FIXED" or column_type_name == "NUMBER") and scale == 0: - return LongType() + return LongType(precision=precision, scale=scale) raise NotImplementedError( "Unsupported type: {}, precision: {}, scale: {}".format( column_type_name, precision, scale diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index a989532141..c33a8aef1a 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -169,7 +169,9 @@ def _fill_ast(self, ast: proto.DataType) -> None: class _NumericType(_AtomicType): - pass + def __init__(self, **kwargs) -> None: + self._precision = kwargs.get("precision", None) + self._scale = kwargs.get("scale", None) class TimestampTimeZone(Enum): diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py index 4f19c65237..ca3b3d9f8b 100644 --- a/tests/integ/test_datatypes.py +++ b/tests/integ/test_datatypes.py @@ -408,3 +408,16 @@ def test_join_basic(session): ] ) ) + + +def test_numeric_type_store_precision_and_scale(session): + table_name = Utils.random_table_name() + df = session.create_dataframe( + [Decimal("9" * 38)], + StructType([StructField("large_value", DecimalType(38, 0), True)]), + ) + df.write.save_as_table(table_name, mode="overwrite", table_type="temp") + result = session.sql(f"select * from {table_name}") + datatype = result.schema.fields[0].datatype + assert isinstance(datatype, LongType) + assert datatype._precision == 38 and datatype._scale == 0 From 6065ebf62fdd23a55313d17b3b9531d3ec7ee3c9 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Thu, 4 Dec 2025 16:10:51 -0800 Subject: [PATCH 02/14] add private var in numeric type and test --- tests/integ/test_datatypes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py index ca3b3d9f8b..843a88a23c 100644 --- a/tests/integ/test_datatypes.py +++ b/tests/integ/test_datatypes.py @@ -3,6 +3,8 @@ # from decimal import Decimal +import pytest + from snowflake.snowpark import DataFrame, Row from snowflake.snowpark.functions import lit from snowflake.snowpark.types import ( @@ -410,6 +412,10 @@ def test_join_basic(session): ) +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="session.sql not supported in local testing", +) def test_numeric_type_store_precision_and_scale(session): table_name = Utils.random_table_name() df = session.create_dataframe( From 7b8960ea56542909a7a1da5531da030132b15db7 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 09:47:14 -0800 Subject: [PATCH 03/14] add test --- tests/integ/test_datatypes.py | 58 ++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py index 843a88a23c..1bf887883e 100644 --- a/tests/integ/test_datatypes.py +++ b/tests/integ/test_datatypes.py @@ -1,6 +1,9 @@ # # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved. # +import csv +import os +import tempfile from decimal import Decimal import pytest @@ -414,7 +417,7 @@ def test_join_basic(session): @pytest.mark.skipif( "config.getoption('local_testing_mode', default=False)", - reason="session.sql not supported in local testing", + reason="session.sql not supported by local testing mode", ) def test_numeric_type_store_precision_and_scale(session): table_name = Utils.random_table_name() @@ -427,3 +430,56 @@ def test_numeric_type_store_precision_and_scale(session): datatype = result.schema.fields[0].datatype assert isinstance(datatype, LongType) assert datatype._precision == 38 and datatype._scale == 0 + + +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="relaxed_types not supported by local testing mode", +) +def test_numeric_type_store_precision_and_scale_read_file(session): + stage_name = Utils.random_stage_name() + header = ("BIG_NUM",) + test_data = [("9" * 38,)] + + def write_csv(data): + with tempfile.NamedTemporaryFile( + mode="w+", + delete=False, + suffix=".csv", + newline="", + ) as file: + writer = csv.writer(file) + writer.writerow(header) + for row in data: + writer.writerow(row) + return file.name + + file_path = write_csv(test_data) + + try: + Utils.create_stage(session, stage_name, is_temporary=True) + result = session.file.put( + file_path, f"@{stage_name}", auto_compress=False, overwrite=True + ) + + # Infer schema from only the short file + constrained_reader = session.read.options( + { + "INFER_SCHEMA": True, + "INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]}, + "PARSE_HEADER": True, + # Only load the short file + "PATTERN": f".*{result[0].target}", + } + ) + + # df1 uses constrained types + df1 = constrained_reader.csv(f"@{stage_name}/") + datatype = df1.schema.fields[0].datatype + assert isinstance(datatype, LongType) + assert datatype._precision == 38 and datatype._scale == 0 + + finally: + Utils.drop_stage(session, stage_name) + if os.path.exists(file_path): + os.remove(file_path) From eacb90439e485b4ed1e3e8a3f37d0ec44403fca2 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 11:22:36 -0800 Subject: [PATCH 04/14] add feature flag to control --- .../snowpark/_internal/type_utils.py | 5 +- src/snowflake/snowpark/context.py | 1 + tests/integ/test_datatypes.py | 80 +++++++++++-------- 3 files changed, 51 insertions(+), 35 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 6adde546d3..bea8366f8f 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -307,7 +307,10 @@ def convert_sf_to_sp_type( if column_type_name == "REAL": return DoubleType() if (column_type_name == "FIXED" or column_type_name == "NUMBER") and scale == 0: - return LongType(precision=precision, scale=scale) + if context._store_precision_and_scale_in_numeric_type: + return LongType(precision=precision, scale=scale) + else: + return LongType() raise NotImplementedError( "Unsupported type: {}, precision: {}, scale: {}".format( column_type_name, precision, scale diff --git a/src/snowflake/snowpark/context.py b/src/snowflake/snowpark/context.py index 86e92b6aa4..33e34854f5 100644 --- a/src/snowflake/snowpark/context.py +++ b/src/snowflake/snowpark/context.py @@ -24,6 +24,7 @@ # If _should_continue_registration is not None, i.e. a caller environment has assigned it an alternate callable, then the callback is responsible for determining the rest of the Snowpark workflow. _should_continue_registration: Optional[Callable[..., bool]] = None +_store_precision_and_scale_in_numeric_type: bool = False # Internal-only global flag that determines if structured type semantics should be used _use_structured_type_semantics = False diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py index 1bf887883e..72a41552ec 100644 --- a/tests/integ/test_datatypes.py +++ b/tests/integ/test_datatypes.py @@ -5,10 +5,11 @@ import os import tempfile from decimal import Decimal +from unittest.mock import patch import pytest -from snowflake.snowpark import DataFrame, Row +from snowflake.snowpark import DataFrame, Row, context from snowflake.snowpark.functions import lit from snowflake.snowpark.types import ( BooleanType, @@ -419,27 +420,37 @@ def test_join_basic(session): "config.getoption('local_testing_mode', default=False)", reason="session.sql not supported by local testing mode", ) -def test_numeric_type_store_precision_and_scale(session): +@pytest.mark.parametrize( + "massive_number, precision", [("9" * 38, 38), ("5" * 20, 20), ("7" * 10, 10)] +) +def test_numeric_type_store_precision_and_scale(session, massive_number, precision): table_name = Utils.random_table_name() - df = session.create_dataframe( - [Decimal("9" * 38)], - StructType([StructField("large_value", DecimalType(38, 0), True)]), - ) - df.write.save_as_table(table_name, mode="overwrite", table_type="temp") - result = session.sql(f"select * from {table_name}") - datatype = result.schema.fields[0].datatype - assert isinstance(datatype, LongType) - assert datatype._precision == 38 and datatype._scale == 0 + try: + with patch.object(context, "_store_precision_and_scale_in_numeric_type", True): + df = session.create_dataframe( + [Decimal(massive_number)], + StructType( + [StructField("large_value", DecimalType(precision, 0), True)] + ), + ) + df.write.save_as_table(table_name, mode="overwrite", table_type="temp") + result = session.sql(f"select * from {table_name}") + datatype = result.schema.fields[0].datatype + assert isinstance(datatype, LongType) + assert datatype._precision == 38 and datatype._scale == 0 + finally: + session.sql(f"drop table {table_name}").collect() @pytest.mark.skipif( "config.getoption('local_testing_mode', default=False)", reason="relaxed_types not supported by local testing mode", ) -def test_numeric_type_store_precision_and_scale_read_file(session): +@pytest.mark.parametrize("massive_number", ["9" * 38, "5" * 20, "7" * 10]) +def test_numeric_type_store_precision_and_scale_read_file(session, massive_number): stage_name = Utils.random_stage_name() header = ("BIG_NUM",) - test_data = [("9" * 38,)] + test_data = [(massive_number,)] def write_csv(data): with tempfile.NamedTemporaryFile( @@ -457,27 +468,28 @@ def write_csv(data): file_path = write_csv(test_data) try: - Utils.create_stage(session, stage_name, is_temporary=True) - result = session.file.put( - file_path, f"@{stage_name}", auto_compress=False, overwrite=True - ) - - # Infer schema from only the short file - constrained_reader = session.read.options( - { - "INFER_SCHEMA": True, - "INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]}, - "PARSE_HEADER": True, - # Only load the short file - "PATTERN": f".*{result[0].target}", - } - ) - - # df1 uses constrained types - df1 = constrained_reader.csv(f"@{stage_name}/") - datatype = df1.schema.fields[0].datatype - assert isinstance(datatype, LongType) - assert datatype._precision == 38 and datatype._scale == 0 + with patch.object(context, "_store_precision_and_scale_in_numeric_type", True): + Utils.create_stage(session, stage_name, is_temporary=True) + result = session.file.put( + file_path, f"@{stage_name}", auto_compress=False, overwrite=True + ) + + # Infer schema from only the short file + constrained_reader = session.read.options( + { + "INFER_SCHEMA": True, + "INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]}, + "PARSE_HEADER": True, + # Only load the short file + "PATTERN": f".*{result[0].target}", + } + ) + + # df1 uses constrained types + df1 = constrained_reader.csv(f"@{stage_name}/") + datatype = df1.schema.fields[0].datatype + assert isinstance(datatype, LongType) + assert datatype._precision == 38 and datatype._scale == 0 finally: Utils.drop_stage(session, stage_name) From 0defc75d41db6a998de397abf49b420e8646b67c Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 11:50:56 -0800 Subject: [PATCH 05/14] modify __eq__ instead of feature flag --- src/snowflake/snowpark/context.py | 1 - .../plugin/_internal/snowpark_pandas_types.py | 7 +- src/snowflake/snowpark/types.py | 8 +++ tests/integ/test_datatypes.py | 67 +++++++++---------- 4 files changed, 45 insertions(+), 38 deletions(-) diff --git a/src/snowflake/snowpark/context.py b/src/snowflake/snowpark/context.py index 33e34854f5..86e92b6aa4 100644 --- a/src/snowflake/snowpark/context.py +++ b/src/snowflake/snowpark/context.py @@ -24,7 +24,6 @@ # If _should_continue_registration is not None, i.e. a caller environment has assigned it an alternate callable, then the callback is responsible for determining the rest of the Snowpark workflow. _should_continue_registration: Optional[Callable[..., bool]] = None -_store_precision_and_scale_in_numeric_type: bool = False # Internal-only global flag that determines if structured type semantics should be used _use_structured_type_semantics = False diff --git a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py index 8593089062..c136b56305 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py @@ -133,7 +133,12 @@ def __init__(self) -> None: super().__init__() def __eq__(self, other: Any) -> bool: - return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + def filtered(d: dict) -> dict: + return {k: v for k, v in d.items() if k not in ("_precision", "_scale")} + + return isinstance(other, self.__class__) and filtered( + self.__dict__ + ) == filtered(other.__dict__) def __ne__(self, other: Any) -> bool: return not self.__eq__(other) diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index c33a8aef1a..cf3ffb7b9c 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -173,6 +173,14 @@ def __init__(self, **kwargs) -> None: self._precision = kwargs.get("precision", None) self._scale = kwargs.get("scale", None) + def __eq__(self, other): + def filtered(d: dict) -> dict: + return {k: v for k, v in d.items() if k not in ("_precision", "_scale")} + + return isinstance(other, self.__class__) and filtered( + self.__dict__ + ) == filtered(other.__dict__) + class TimestampTimeZone(Enum): """ diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py index 72a41552ec..44e974c449 100644 --- a/tests/integ/test_datatypes.py +++ b/tests/integ/test_datatypes.py @@ -5,11 +5,10 @@ import os import tempfile from decimal import Decimal -from unittest.mock import patch import pytest -from snowflake.snowpark import DataFrame, Row, context +from snowflake.snowpark import DataFrame, Row from snowflake.snowpark.functions import lit from snowflake.snowpark.types import ( BooleanType, @@ -426,18 +425,15 @@ def test_join_basic(session): def test_numeric_type_store_precision_and_scale(session, massive_number, precision): table_name = Utils.random_table_name() try: - with patch.object(context, "_store_precision_and_scale_in_numeric_type", True): - df = session.create_dataframe( - [Decimal(massive_number)], - StructType( - [StructField("large_value", DecimalType(precision, 0), True)] - ), - ) - df.write.save_as_table(table_name, mode="overwrite", table_type="temp") - result = session.sql(f"select * from {table_name}") - datatype = result.schema.fields[0].datatype - assert isinstance(datatype, LongType) - assert datatype._precision == 38 and datatype._scale == 0 + df = session.create_dataframe( + [Decimal(massive_number)], + StructType([StructField("large_value", DecimalType(precision, 0), True)]), + ) + df.write.save_as_table(table_name, mode="overwrite", table_type="temp") + result = session.sql(f"select * from {table_name}") + datatype = result.schema.fields[0].datatype + assert isinstance(datatype, LongType) + assert datatype._precision == 38 and datatype._scale == 0 finally: session.sql(f"drop table {table_name}").collect() @@ -468,28 +464,27 @@ def write_csv(data): file_path = write_csv(test_data) try: - with patch.object(context, "_store_precision_and_scale_in_numeric_type", True): - Utils.create_stage(session, stage_name, is_temporary=True) - result = session.file.put( - file_path, f"@{stage_name}", auto_compress=False, overwrite=True - ) - - # Infer schema from only the short file - constrained_reader = session.read.options( - { - "INFER_SCHEMA": True, - "INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]}, - "PARSE_HEADER": True, - # Only load the short file - "PATTERN": f".*{result[0].target}", - } - ) - - # df1 uses constrained types - df1 = constrained_reader.csv(f"@{stage_name}/") - datatype = df1.schema.fields[0].datatype - assert isinstance(datatype, LongType) - assert datatype._precision == 38 and datatype._scale == 0 + Utils.create_stage(session, stage_name, is_temporary=True) + result = session.file.put( + file_path, f"@{stage_name}", auto_compress=False, overwrite=True + ) + + # Infer schema from only the short file + constrained_reader = session.read.options( + { + "INFER_SCHEMA": True, + "INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]}, + "PARSE_HEADER": True, + # Only load the short file + "PATTERN": f".*{result[0].target}", + } + ) + + # df1 uses constrained types + df1 = constrained_reader.csv(f"@{stage_name}/") + datatype = df1.schema.fields[0].datatype + assert isinstance(datatype, LongType) + assert datatype._precision == 38 and datatype._scale == 0 finally: Utils.drop_stage(session, stage_name) From 5d61d3d25d379b687e9ec14ce6d7aedf81eea3d6 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 11:55:08 -0800 Subject: [PATCH 06/14] remove reduntant code --- src/snowflake/snowpark/_internal/type_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index bea8366f8f..6adde546d3 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -307,10 +307,7 @@ def convert_sf_to_sp_type( if column_type_name == "REAL": return DoubleType() if (column_type_name == "FIXED" or column_type_name == "NUMBER") and scale == 0: - if context._store_precision_and_scale_in_numeric_type: - return LongType(precision=precision, scale=scale) - else: - return LongType() + return LongType(precision=precision, scale=scale) raise NotImplementedError( "Unsupported type: {}, precision: {}, scale: {}".format( column_type_name, precision, scale From ac2e712d3e337c58c176a2accabee42c7b3390ed Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 12:00:34 -0800 Subject: [PATCH 07/14] remove reduntant --- .../modin/plugin/_internal/snowpark_pandas_types.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py index c136b56305..72454065d8 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py @@ -132,14 +132,6 @@ class TimedeltaType(SnowparkPandasType, LongType): def __init__(self) -> None: super().__init__() - def __eq__(self, other: Any) -> bool: - def filtered(d: dict) -> dict: - return {k: v for k, v in d.items() if k not in ("_precision", "_scale")} - - return isinstance(other, self.__class__) and filtered( - self.__dict__ - ) == filtered(other.__dict__) - def __ne__(self, other: Any) -> bool: return not self.__eq__(other) From 1be3e9505659f6ddc1610a7e82a33edc3de2dfc9 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 13:50:37 -0800 Subject: [PATCH 08/14] fix tests --- .../snowpark/modin/plugin/_internal/snowpark_pandas_types.py | 4 ++-- src/snowflake/snowpark/types.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py index 72454065d8..2a2e3b1c00 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py @@ -5,7 +5,7 @@ import datetime import inspect from abc import ABCMeta, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Callable, NamedTuple, Optional, Tuple, Type, Union import numpy as np @@ -121,7 +121,7 @@ class TimedeltaType(SnowparkPandasType, LongType): two times. """ - snowpark_type: DataType = LongType() + snowpark_type: DataType = field(default_factory=LongType) pandas_type: np.dtype = np.dtype("timedelta64[ns]") types_to_convert_with_from_pandas: Tuple[Type] = ( # type: ignore[assignment] native_pd.Timedelta, diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index cf3ffb7b9c..d9a16b1bb4 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -181,6 +181,9 @@ def filtered(d: dict) -> dict: self.__dict__ ) == filtered(other.__dict__) + def __hash__(self): + return hash(repr(self)) + class TimestampTimeZone(Enum): """ From 3ee94c52676a828f87a7e47cbd76ed42d0dcd50b Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 14:02:49 -0800 Subject: [PATCH 09/14] move to _integraltype --- .../snowpark/_internal/type_utils.py | 2 +- src/snowflake/snowpark/types.py | 29 +++++++++---------- tests/integ/test_datatypes.py | 4 +-- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 6adde546d3..0db82843b7 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -307,7 +307,7 @@ def convert_sf_to_sp_type( if column_type_name == "REAL": return DoubleType() if (column_type_name == "FIXED" or column_type_name == "NUMBER") and scale == 0: - return LongType(precision=precision, scale=scale) + return LongType(precision=precision) raise NotImplementedError( "Unsupported type: {}, precision: {}, scale: {}".format( column_type_name, precision, scale diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index d9a16b1bb4..aa14b1864e 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -169,20 +169,7 @@ def _fill_ast(self, ast: proto.DataType) -> None: class _NumericType(_AtomicType): - def __init__(self, **kwargs) -> None: - self._precision = kwargs.get("precision", None) - self._scale = kwargs.get("scale", None) - - def __eq__(self, other): - def filtered(d: dict) -> dict: - return {k: v for k, v in d.items() if k not in ("_precision", "_scale")} - - return isinstance(other, self.__class__) and filtered( - self.__dict__ - ) == filtered(other.__dict__) - - def __hash__(self): - return hash(repr(self)) + pass class TimestampTimeZone(Enum): @@ -383,7 +370,19 @@ def _fill_ast(self, ast: proto.DataType) -> None: # Numeric types class _IntegralType(_NumericType): - pass + def __init__(self, **kwargs) -> None: + self._precision = kwargs.get("precision", None) + + def __eq__(self, other): + def filtered(d: dict) -> dict: + return {k: v for k, v in d.items() if k != "_precision"} + + return isinstance(other, self.__class__) and filtered( + self.__dict__ + ) == filtered(other.__dict__) + + def __hash__(self): + return hash(repr(self)) class _FractionalType(_NumericType): diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py index 44e974c449..d71c5713eb 100644 --- a/tests/integ/test_datatypes.py +++ b/tests/integ/test_datatypes.py @@ -433,7 +433,7 @@ def test_numeric_type_store_precision_and_scale(session, massive_number, precisi result = session.sql(f"select * from {table_name}") datatype = result.schema.fields[0].datatype assert isinstance(datatype, LongType) - assert datatype._precision == 38 and datatype._scale == 0 + assert datatype._precision == 38 finally: session.sql(f"drop table {table_name}").collect() @@ -484,7 +484,7 @@ def write_csv(data): df1 = constrained_reader.csv(f"@{stage_name}/") datatype = df1.schema.fields[0].datatype assert isinstance(datatype, LongType) - assert datatype._precision == 38 and datatype._scale == 0 + assert datatype._precision == 38 finally: Utils.drop_stage(session, stage_name) From 20735a15fd440cc2e22446a811fa9f20a6d74ecd Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 14:37:36 -0800 Subject: [PATCH 10/14] address comments --- src/snowflake/snowpark/_internal/type_utils.py | 2 +- src/snowflake/snowpark/types.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 0db82843b7..7c913a5eb3 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -307,7 +307,7 @@ def convert_sf_to_sp_type( if column_type_name == "REAL": return DoubleType() if (column_type_name == "FIXED" or column_type_name == "NUMBER") and scale == 0: - return LongType(precision=precision) + return LongType(_precision=precision) raise NotImplementedError( "Unsupported type: {}, precision: {}, scale: {}".format( column_type_name, precision, scale diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index aa14b1864e..037e92081a 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -371,15 +371,23 @@ def _fill_ast(self, ast: proto.DataType) -> None: # Numeric types class _IntegralType(_NumericType): def __init__(self, **kwargs) -> None: - self._precision = kwargs.get("precision", None) + self._precision = kwargs.get("_precision", None) + + if kwargs: + raise TypeError( + f"{self.__class__.__name__}() does not accept any arguments, please construct it without parameters." + ) def __eq__(self, other): def filtered(d: dict) -> dict: return {k: v for k, v in d.items() if k != "_precision"} - return isinstance(other, self.__class__) and filtered( - self.__dict__ - ) == filtered(other.__dict__) + if context._is_snowpark_connect_compatible_mode: + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + else: + return isinstance(other, self.__class__) and filtered( + self.__dict__ + ) == filtered(other.__dict__) def __hash__(self): return hash(repr(self)) From 13ebe54b5f56dc3201161cfa7d87160955f1ded2 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 14:55:04 -0800 Subject: [PATCH 11/14] add test --- src/snowflake/snowpark/types.py | 4 ++-- tests/integ/test_datatypes.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 037e92081a..07175d96be 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -371,9 +371,9 @@ def _fill_ast(self, ast: proto.DataType) -> None: # Numeric types class _IntegralType(_NumericType): def __init__(self, **kwargs) -> None: - self._precision = kwargs.get("_precision", None) + self._precision = kwargs.pop("_precision", None) - if kwargs: + if kwargs != {}: raise TypeError( f"{self.__class__.__name__}() does not accept any arguments, please construct it without parameters." ) diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py index d71c5713eb..451ace2734 100644 --- a/tests/integ/test_datatypes.py +++ b/tests/integ/test_datatypes.py @@ -429,13 +429,17 @@ def test_numeric_type_store_precision_and_scale(session, massive_number, precisi [Decimal(massive_number)], StructType([StructField("large_value", DecimalType(precision, 0), True)]), ) + datatype = df.schema.fields[0].datatype + assert isinstance(datatype, LongType) + assert datatype._precision == precision + df.write.save_as_table(table_name, mode="overwrite", table_type="temp") result = session.sql(f"select * from {table_name}") datatype = result.schema.fields[0].datatype assert isinstance(datatype, LongType) assert datatype._precision == 38 finally: - session.sql(f"drop table {table_name}").collect() + session.sql(f"drop table if exists {table_name}").collect() @pytest.mark.skipif( From 8ad136ab2e7096e179fa01ace2c584a1c3f4e3f8 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 5 Dec 2025 15:20:06 -0800 Subject: [PATCH 12/14] add test --- src/snowflake/snowpark/types.py | 2 +- tests/integ/test_datatypes.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 07175d96be..603ba7d414 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -375,7 +375,7 @@ def __init__(self, **kwargs) -> None: if kwargs != {}: raise TypeError( - f"{self.__class__.__name__}() does not accept any arguments, please construct it without parameters." + f"__init__() takes 0 argument but {len(kwargs.keys())} were given" ) def __eq__(self, other): diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py index 451ace2734..4732eecc1d 100644 --- a/tests/integ/test_datatypes.py +++ b/tests/integ/test_datatypes.py @@ -433,8 +433,11 @@ def test_numeric_type_store_precision_and_scale(session, massive_number, precisi assert isinstance(datatype, LongType) assert datatype._precision == precision + # after save as table, the precision information is lost, because it is basically save LongType(), which + # does not have precision information, thus set to default 38. df.write.save_as_table(table_name, mode="overwrite", table_type="temp") result = session.sql(f"select * from {table_name}") + session.sql(f"describe table {table_name}").show() datatype = result.schema.fields[0].datatype assert isinstance(datatype, LongType) assert datatype._precision == 38 @@ -494,3 +497,8 @@ def write_csv(data): Utils.drop_stage(session, stage_name) if os.path.exists(file_path): os.remove(file_path) + + +def test_illegal_argument_intergraltype(): + with pytest.raises(TypeError, match="takes 0 argument but 1 were given"): + LongType(b=10) From e508e837c3d0d0d3a2b76c80969f09c62fbad398 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Sun, 7 Dec 2025 12:21:40 -0800 Subject: [PATCH 13/14] add eq function to timedelta --- .../modin/plugin/_internal/snowpark_pandas_types.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py index 2a2e3b1c00..2a5623a2ed 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py @@ -11,6 +11,7 @@ import numpy as np import pandas as native_pd +from snowflake.snowpark import context from snowflake.snowpark.column import Column from snowflake.snowpark.types import DataType, LongType @@ -132,6 +133,17 @@ class TimedeltaType(SnowparkPandasType, LongType): def __init__(self) -> None: super().__init__() + def __eq__(self, other: Any) -> bool: + def filtered(d: dict) -> dict: + return {k: v for k, v in d.items() if k != "_precision"} + + if context._is_snowpark_connect_compatible_mode: + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + else: + return isinstance(other, self.__class__) and filtered( + self.__dict__ + ) == filtered(other.__dict__) + def __ne__(self, other: Any) -> bool: return not self.__eq__(other) From 692ad3bccbb474d2d696676cbda22d4bd4a41b4a Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Sun, 7 Dec 2025 15:16:10 -0800 Subject: [PATCH 14/14] fix test --- .../modin/plugin/_internal/snowpark_pandas_types.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py index 2a5623a2ed..5437a8b1ec 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py @@ -5,8 +5,8 @@ import datetime import inspect from abc import ABCMeta, abstractmethod -from dataclasses import dataclass, field -from typing import Any, Callable, NamedTuple, Optional, Tuple, Type, Union +from dataclasses import dataclass +from typing import Any, Callable, NamedTuple, Optional, Tuple, Type, Union, ClassVar import numpy as np import pandas as native_pd @@ -122,7 +122,7 @@ class TimedeltaType(SnowparkPandasType, LongType): two times. """ - snowpark_type: DataType = field(default_factory=LongType) + snowpark_type: ClassVar[DataType] = LongType() pandas_type: np.dtype = np.dtype("timedelta64[ns]") types_to_convert_with_from_pandas: Tuple[Type] = ( # type: ignore[assignment] native_pd.Timedelta,