Skip to content
Merged
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/_internal/type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def convert_sf_to_sp_type(
if column_type_name == "REAL":
return DoubleType()
if (column_type_name == "FIXED" or column_type_name == "NUMBER") and scale == 0:
return LongType()
return LongType(precision=precision, scale=scale)
raise NotImplementedError(
"Unsupported type: {}, precision: {}, scale: {}".format(
column_type_name, precision, scale
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,12 @@ def __init__(self) -> None:
super().__init__()

def __eq__(self, other: Any) -> bool:
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
def filtered(d: dict) -> dict:
return {k: v for k, v in d.items() if k not in ("_precision", "_scale")}

return isinstance(other, self.__class__) and filtered(
self.__dict__
) == filtered(other.__dict__)

def __ne__(self, other: Any) -> bool:
return not self.__eq__(other)
Expand Down
12 changes: 11 additions & 1 deletion src/snowflake/snowpark/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,17 @@ def _fill_ast(self, ast: proto.DataType) -> None:


class _NumericType(_AtomicType):
pass
def __init__(self, **kwargs) -> None:
self._precision = kwargs.get("precision", None)
self._scale = kwargs.get("scale", None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def __init__(self, **kwargs) -> None:
self._precision = kwargs.get("precision", None)
self._scale = kwargs.get("scale", None)
def __init__(self, precision: int | None = None, scale: int | None = None) -> None:
self._precision = precision
self._scale = scale

Can we do this style instead to make it more explicit?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a internal only feature, I think we don't want to expose this information to customer?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, we can just keep it as kwargs then.

What do you think about moving the arguments onto _IntegralType instead of _NumericType? It looks like Snowflake only has a single precision for FLOAT and DECIMAL (docs), so we should never have to expose precision information for those types.

Copy link
Contributor

@sfc-gh-aling sfc-gh-aling Dec 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 on using _IntegralType, I don't think we need _scale as it would always be 0

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i see, will fix this


def __eq__(self, other):
def filtered(d: dict) -> dict:
return {k: v for k, v in d.items() if k not in ("_precision", "_scale")}

return isinstance(other, self.__class__) and filtered(
self.__dict__
) == filtered(other.__dict__)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why we overwrite the logic to exclude the new prop?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here is a example test:
expected_fields = [
StructField("COL1", LongType(), nullable=True),
StructField("COL2", LongType(), nullable=True),
]
assert df.schema.fields == expected_fields
we exclude the private var so that these existing test does not fail



class TimestampTimeZone(Enum):
Expand Down
82 changes: 82 additions & 0 deletions tests/integ/test_datatypes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
#
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
#
import csv
import os
import tempfile
from decimal import Decimal

import pytest

from snowflake.snowpark import DataFrame, Row
from snowflake.snowpark.functions import lit
from snowflake.snowpark.types import (
Expand Down Expand Up @@ -408,3 +413,80 @@ def test_join_basic(session):
]
)
)


@pytest.mark.skipif(
"config.getoption('local_testing_mode', default=False)",
reason="session.sql not supported by local testing mode",
)
@pytest.mark.parametrize(
"massive_number, precision", [("9" * 38, 38), ("5" * 20, 20), ("7" * 10, 10)]
)
def test_numeric_type_store_precision_and_scale(session, massive_number, precision):
table_name = Utils.random_table_name()
try:
df = session.create_dataframe(
[Decimal(massive_number)],
StructType([StructField("large_value", DecimalType(precision, 0), True)]),
)
df.write.save_as_table(table_name, mode="overwrite", table_type="temp")
result = session.sql(f"select * from {table_name}")
datatype = result.schema.fields[0].datatype
assert isinstance(datatype, LongType)
assert datatype._precision == 38 and datatype._scale == 0
finally:
session.sql(f"drop table {table_name}").collect()


@pytest.mark.skipif(
"config.getoption('local_testing_mode', default=False)",
reason="relaxed_types not supported by local testing mode",
)
@pytest.mark.parametrize("massive_number", ["9" * 38, "5" * 20, "7" * 10])
def test_numeric_type_store_precision_and_scale_read_file(session, massive_number):
stage_name = Utils.random_stage_name()
header = ("BIG_NUM",)
test_data = [(massive_number,)]

def write_csv(data):
with tempfile.NamedTemporaryFile(
mode="w+",
delete=False,
suffix=".csv",
newline="",
) as file:
writer = csv.writer(file)
writer.writerow(header)
for row in data:
writer.writerow(row)
return file.name

file_path = write_csv(test_data)

try:
Utils.create_stage(session, stage_name, is_temporary=True)
result = session.file.put(
file_path, f"@{stage_name}", auto_compress=False, overwrite=True
)

# Infer schema from only the short file
constrained_reader = session.read.options(
{
"INFER_SCHEMA": True,
"INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]},
"PARSE_HEADER": True,
# Only load the short file
"PATTERN": f".*{result[0].target}",
}
)

# df1 uses constrained types
df1 = constrained_reader.csv(f"@{stage_name}/")
datatype = df1.schema.fields[0].datatype
assert isinstance(datatype, LongType)
assert datatype._precision == 38 and datatype._scale == 0

finally:
Utils.drop_stage(session, stage_name)
if os.path.exists(file_path):
os.remove(file_path)
Loading