Skip to content

Commit 7ffc5c3

Browse files
SNOW-1557539 count describe queries for test (#2249)
<!--- Please answer these questions before creating your pull request. Thanks! ---> 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. Fixes SNOW-1557539 count describe queries for test 2. Fill out the following pre-review checklist: - [ ] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. 3. Please describe how your code solves the related issue. Add describe query to query listener history (opt-in)
1 parent 6b8df76 commit 7ffc5c3

File tree

7 files changed

+82
-13
lines changed

7 files changed

+82
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
- Added support for `TimedeltaIndex.mean` method.
1414
- Added support for some cases of aggregating `Timedelta` columns on `axis=0` with `agg` or `aggregate`.
1515
- Added support for `by`, `left_by`, `right_by`, `left_index`, and `right_index` for `pd.merge_asof`.
16+
- Added support for passing parameter `include_describe` to `Session.query_history`.
1617

1718
#### Bug Fixes
1819

src/snowflake/snowpark/_internal/server_connection.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,23 @@ def _get_string_datum(self, query: str) -> Optional[str]:
244244
@SnowflakePlan.Decorator.wrap_exception
245245
def get_result_attributes(self, query: str) -> List[Attribute]:
246246
return convert_result_meta_to_attribute(
247-
run_new_describe(self._cursor, query), self.max_string_size
247+
self._run_new_describe(self._cursor, query), self.max_string_size
248248
)
249249

250+
def _run_new_describe(
251+
self, cursor: SnowflakeCursor, query: str
252+
) -> Union[List[ResultMetadata], List["ResultMetadataV2"]]:
253+
result_metadata = run_new_describe(cursor, query)
254+
255+
for listener in filter(
256+
lambda listener: hasattr(listener, "include_describe")
257+
and listener.include_describe,
258+
self._query_listener,
259+
):
260+
listener._add_query(QueryRecord(cursor.sfqid, query, True))
261+
262+
return result_metadata
263+
250264
@_Decorator.log_msg_and_perf_telemetry("Uploading file to stage")
251265
def upload_file(
252266
self,

src/snowflake/snowpark/query_history.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class QueryRecord(NamedTuple):
1212

1313
query_id: str
1414
sql_text: str
15+
is_describe: bool = False
1516

1617

1718
class QueryHistory:
@@ -21,9 +22,14 @@ class QueryHistory:
2122
:meth:`snowflake.snowpark.Session.query_history`.
2223
"""
2324

24-
def __init__(self, session: "snowflake.snowpark.session.Session") -> None:
25+
def __init__(
26+
self,
27+
session: "snowflake.snowpark.session.Session",
28+
include_describe: bool = False,
29+
) -> None:
2530
self.session = session
2631
self._queries: List[QueryRecord] = []
32+
self._include_describe = include_describe
2733

2834
def __enter__(self):
2935
return self
@@ -37,3 +43,7 @@ def _add_query(self, query_record: QueryRecord):
3743
@property
3844
def queries(self) -> List[QueryRecord]:
3945
return self._queries
46+
47+
@property
48+
def include_describe(self) -> bool:
49+
return self._include_describe

src/snowflake/snowpark/session.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3456,16 +3456,21 @@ def flatten(
34563456
set_api_call_source(df, "Session.flatten")
34573457
return df
34583458

3459-
def query_history(self) -> QueryHistory:
3459+
def query_history(self, include_describe: bool = False) -> QueryHistory:
34603460
"""Create an instance of :class:`QueryHistory` as a context manager to record queries that are pushed down to the Snowflake database.
34613461
3462-
>>> with session.query_history() as query_history:
3462+
Args:
3463+
include_describe: Include query notifications for describe queries
3464+
3465+
>>> with session.query_history(True) as query_history:
34633466
... df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
34643467
... df = df.filter(df.a == 1)
34653468
... res = df.collect()
3466-
>>> assert len(query_history.queries) == 1
3469+
>>> assert len(query_history.queries) == 2
3470+
>>> assert query_history.queries[0].is_describe
3471+
>>> assert not query_history.queries[1].is_describe
34673472
"""
3468-
query_listener = QueryHistory(self)
3473+
query_listener = QueryHistory(self, include_describe)
34693474
self._conn.add_query_listener(query_listener)
34703475
return query_listener
34713476

tests/integ/modin/sql_counter.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from decorator import decorator
1515
from pandas._typing import Scalar
1616

17-
from snowflake.snowpark import QueryRecord
17+
from snowflake.snowpark.query_history import QueryRecord
1818
from snowflake.snowpark.session import Session
1919
from tests.utils import IS_IN_STORED_PROC
2020

@@ -43,6 +43,7 @@
4343
UDTF_COUNT_PARAMETER = "udtf_count"
4444
SELECT_COUNT_PARAMETER = "select_count"
4545
UNION_COUNT_PARAMETER = "union_count"
46+
DESCRIBE_COUNT_PARAMETER = "describe_count"
4647
EXPECT_HIGH_COUNT = "expect_high_count"
4748
HIGH_COUNT_REASON = "high_count_reason"
4849

@@ -54,6 +55,7 @@
5455
UDTF_COUNT_PARAMETER,
5556
SELECT_COUNT_PARAMETER,
5657
UNION_COUNT_PARAMETER,
58+
DESCRIBE_COUNT_PARAMETER,
5759
]
5860
BOOL_PARAMETERS = [EXPECT_HIGH_COUNT]
5961

@@ -156,6 +158,11 @@ def __init__(
156158
# Add SqlCounter as a snowpark query listener.
157159
self.session._conn.add_query_listener(self)
158160

161+
# The query history listener will include describe queries if this is true.
162+
@property
163+
def include_describe(self) -> bool:
164+
return True
165+
159166
@staticmethod
160167
def set_record_mode(record_mode):
161168
"""Record mode means the SqlCounter does not assert any results, but rather collects them so they can
@@ -251,7 +258,7 @@ def expects(self, **kwargs):
251258

252259
# If there are any failures, print out all the captured queries so clear which are being counted.
253260
if failed:
254-
title = f"{'='*20} SqlCounter Captured Queries {'='*20}"
261+
title = f"\n{'='*20} SqlCounter Captured Queries {'='*20}\n"
255262
print(title, file=sys.stderr)
256263
for query in self._get_actual_queries():
257264
print(query, file=sys.stderr)
@@ -268,7 +275,12 @@ def _get_actual_queries(self):
268275
for fw in FILTER_OUT_QUERIES
269276
]
270277
),
271-
list(map(lambda q: q.sql_text, self._queries)),
278+
list(
279+
map(
280+
lambda q: q.sql_text,
281+
[q for q in self._queries if not q.is_describe],
282+
)
283+
),
272284
)
273285
)
274286

@@ -329,6 +341,9 @@ def actual_select_count(self):
329341
def actual_union_count(self):
330342
return self._count_instances_by_query_substr(contains=[UNION])
331343

344+
def actual_describe_count(self):
345+
return len([q for q in self._queries if q.is_describe])
346+
332347
def get_actual_counts(self):
333348
"""Retrieve all actual counts so far."""
334349
actual_counts = {}

tests/integ/modin/test_sql_counter.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,22 +80,22 @@ def test_sql_counter_with_context_manager_inside_loop():
8080

8181
@sql_count_checker(no_check=True)
8282
def test_sql_counter_with_multiple_checks():
83-
with SqlCounter(query_count=1):
83+
with SqlCounter(query_count=1, describe_count=1):
8484
df = pd.DataFrame({"a": [1, 2, 3]})
8585
assert len(df) == 3
8686

87-
with SqlCounter(query_count=1):
87+
with SqlCounter(query_count=1, describe_count=1):
8888
df = pd.DataFrame({"b": [4, 5, 6]})
8989
assert len(df) == 3
9090

91-
with SqlCounter(query_count=1):
91+
with SqlCounter(query_count=1, describe_count=1):
9292
df = pd.DataFrame({"c": [7, 8, 9]})
9393
assert len(df) == 3
9494

9595

9696
@sql_count_checker(no_check=True)
9797
def test_sql_counter_with_context_manager_outside_loop():
98-
sc = SqlCounter(query_count=3)
98+
sc = SqlCounter(query_count=3, describe_count=3)
9999
sc.__enter__()
100100
for _ in range(3):
101101
df = pd.DataFrame({"a": [1, 2, 3]})

tests/integ/test_query_history.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,20 @@ def test_query_history(session):
2222
assert len(query_listener.queries) == 1
2323
assert query_listener.queries[0].query_id is not None
2424
assert query_listener.queries[0].sql_text == "select 0"
25+
assert not query_listener.queries[0].is_describe
26+
27+
28+
def test_query_history_with_describe(session):
29+
with session.query_history(True) as query_listener:
30+
df = session.sql("select 0")
31+
df.columns
32+
df.collect()
33+
assert len(query_listener.queries) == 2
34+
for query in query_listener.queries:
35+
assert query.query_id is not None
36+
assert query.sql_text == "select 0"
37+
assert query_listener.queries[0].is_describe
38+
assert not query_listener.queries[1].is_describe
2539

2640

2741
def test_query_history_stop_listening(session):
@@ -57,6 +71,16 @@ def test_query_history_two_listeners(session):
5771

5872

5973
def test_query_history_multiple_actions(session):
74+
with session.query_history(True) as query_history:
75+
df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
76+
df = df.filter(df.a == 1)
77+
df.collect()
78+
79+
assert len(query_history.queries) == 3
80+
assert query_history.queries[0].is_describe
81+
assert query_history.queries[1].is_describe
82+
assert not query_history.queries[2].is_describe
83+
6084
with session.query_history() as query_listener:
6185
session.sql("select 0").collect()
6286
session.sql("select 1").collect()

0 commit comments

Comments
 (0)