SNOW-2866776: add private var in numeric type (#4022)

sfc-gh-yuwang · web-flow · commit bcba153b02b3 · 2025-12-07T20:36:02.000-08:00
diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py
@@ -307,7 +307,7 @@ def convert_sf_to_sp_type(
     if column_type_name == "REAL":
         return DoubleType()
     if (column_type_name == "FIXED" or column_type_name == "NUMBER") and scale == 0:
-        return LongType()
+        return LongType(_precision=precision)
     raise NotImplementedError(
         "Unsupported type: {}, precision: {}, scale: {}".format(
             column_type_name, precision, scale
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py
@@ -6,11 +6,12 @@
 import inspect
 from abc import ABCMeta, abstractmethod
 from dataclasses import dataclass
-from typing import Any, Callable, NamedTuple, Optional, Tuple, Type, Union
+from typing import Any, Callable, NamedTuple, Optional, Tuple, Type, Union, ClassVar
 
 import numpy as np
 import pandas as native_pd
 
+from snowflake.snowpark import context
 from snowflake.snowpark.column import Column
 from snowflake.snowpark.types import DataType, LongType
 
@@ -121,7 +122,7 @@ class TimedeltaType(SnowparkPandasType, LongType):
     two times.
     """
 
-    snowpark_type: DataType = LongType()
+    snowpark_type: ClassVar[DataType] = LongType()
     pandas_type: np.dtype = np.dtype("timedelta64[ns]")
     types_to_convert_with_from_pandas: Tuple[Type] = (  # type: ignore[assignment]
         native_pd.Timedelta,
@@ -133,7 +134,15 @@ def __init__(self) -> None:
         super().__init__()
 
     def __eq__(self, other: Any) -> bool:
-        return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+        def filtered(d: dict) -> dict:
+            return {k: v for k, v in d.items() if k != "_precision"}
+
+        if context._is_snowpark_connect_compatible_mode:
+            return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+        else:
+            return isinstance(other, self.__class__) and filtered(
+                self.__dict__
+            ) == filtered(other.__dict__)
 
     def __ne__(self, other: Any) -> bool:
         return not self.__eq__(other)
diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py
@@ -370,7 +370,27 @@ def _fill_ast(self, ast: proto.DataType) -> None:
 
 # Numeric types
 class _IntegralType(_NumericType):
-    pass
+    def __init__(self, **kwargs) -> None:
+        self._precision = kwargs.pop("_precision", None)
+
+        if kwargs != {}:
+            raise TypeError(
+                f"__init__() takes 0 argument but {len(kwargs.keys())} were given"
+            )
+
+    def __eq__(self, other):
+        def filtered(d: dict) -> dict:
+            return {k: v for k, v in d.items() if k != "_precision"}
+
+        if context._is_snowpark_connect_compatible_mode:
+            return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+        else:
+            return isinstance(other, self.__class__) and filtered(
+                self.__dict__
+            ) == filtered(other.__dict__)
+
+    def __hash__(self):
+        return hash(repr(self))
 
 
 class _FractionalType(_NumericType):
diff --git a/tests/integ/test_datatypes.py b/tests/integ/test_datatypes.py
@@ -1,8 +1,13 @@
 #
 # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
 #
+import csv
+import os
+import tempfile
 from decimal import Decimal
 
+import pytest
+
 from snowflake.snowpark import DataFrame, Row
 from snowflake.snowpark.functions import lit
 from snowflake.snowpark.types import (
@@ -408,3 +413,92 @@ def test_join_basic(session):
             ]
         )
     )
+
+
+@pytest.mark.skipif(
+    "config.getoption('local_testing_mode', default=False)",
+    reason="session.sql not supported by local testing mode",
+)
+@pytest.mark.parametrize(
+    "massive_number, precision", [("9" * 38, 38), ("5" * 20, 20), ("7" * 10, 10)]
+)
+def test_numeric_type_store_precision_and_scale(session, massive_number, precision):
+    table_name = Utils.random_table_name()
+    try:
+        df = session.create_dataframe(
+            [Decimal(massive_number)],
+            StructType([StructField("large_value", DecimalType(precision, 0), True)]),
+        )
+        datatype = df.schema.fields[0].datatype
+        assert isinstance(datatype, LongType)
+        assert datatype._precision == precision
+
+        # after save as table, the precision information is lost, because it is basically save LongType(), which
+        # does not have precision information, thus set to default 38.
+        df.write.save_as_table(table_name, mode="overwrite", table_type="temp")
+        result = session.sql(f"select * from {table_name}")
+        session.sql(f"describe table {table_name}").show()
+        datatype = result.schema.fields[0].datatype
+        assert isinstance(datatype, LongType)
+        assert datatype._precision == 38
+    finally:
+        session.sql(f"drop table if exists {table_name}").collect()
+
+
+@pytest.mark.skipif(
+    "config.getoption('local_testing_mode', default=False)",
+    reason="relaxed_types not supported by local testing mode",
+)
+@pytest.mark.parametrize("massive_number", ["9" * 38, "5" * 20, "7" * 10])
+def test_numeric_type_store_precision_and_scale_read_file(session, massive_number):
+    stage_name = Utils.random_stage_name()
+    header = ("BIG_NUM",)
+    test_data = [(massive_number,)]
+
+    def write_csv(data):
+        with tempfile.NamedTemporaryFile(
+            mode="w+",
+            delete=False,
+            suffix=".csv",
+            newline="",
+        ) as file:
+            writer = csv.writer(file)
+            writer.writerow(header)
+            for row in data:
+                writer.writerow(row)
+            return file.name
+
+    file_path = write_csv(test_data)
+
+    try:
+        Utils.create_stage(session, stage_name, is_temporary=True)
+        result = session.file.put(
+            file_path, f"@{stage_name}", auto_compress=False, overwrite=True
+        )
+
+        # Infer schema from only the short file
+        constrained_reader = session.read.options(
+            {
+                "INFER_SCHEMA": True,
+                "INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]},
+                "PARSE_HEADER": True,
+                # Only load the short file
+                "PATTERN": f".*{result[0].target}",
+            }
+        )
+
+        # df1 uses constrained types
+        df1 = constrained_reader.csv(f"@{stage_name}/")
+        datatype = df1.schema.fields[0].datatype
+        assert isinstance(datatype, LongType)
+        assert datatype._precision == 38
+
+    finally:
+        Utils.drop_stage(session, stage_name)
+        if os.path.exists(file_path):
+            os.remove(file_path)
+
+
+def test_illegal_argument_intergraltype():
+    with pytest.raises(TypeError, match="takes 0 argument but 1 were given"):
+        LongType(b=10)