Skip to content

Commit 61f2760

Browse files
Merge pull request #111 from databricks-industry-solutions/pandas-date_range-fix
update for new pandas date_range frequency
2 parents 5778027 + 12f9409 commit 61f2760

22 files changed

+541
-556
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,5 @@ dmypy.json
137137
# Lightning Logs
138138
examples/lightning_logs
139139
examples/m5-examples/lightning_logs
140+
141+
.databricks

constraints.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
torch==2.3.1+cu121
22
torchvision>=0.18.0
33
numpy==1.26.4
4-
pandas==2.1.4
4+
pandas==2.2
55
pyarrow==14.0.1
66
pyarrow-hotfix==0.6

examples/daily/foundation_daily.ipynb

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -174,27 +174,26 @@
174174
"\n",
175175
"def create_m4_daily():\n",
176176
" y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group=\"Daily\")\n",
177-
" _ids = [f\"D{i}\" for i in range(1, n+1)]\n",
177+
" target_ids = {f\"D{i}\" for i in range(1, n)}\n",
178+
" y_df = y_df[y_df[\"unique_id\"].isin(target_ids)]\n",
178179
" y_df = (\n",
179-
" y_df.groupby(\"unique_id\")\n",
180-
" .filter(lambda x: x.unique_id.iloc[0] in _ids)\n",
181-
" .groupby(\"unique_id\")\n",
182-
" .apply(transform_group)\n",
183-
" .reset_index(drop=True)\n",
180+
" y_df.groupby(\"unique_id\", group_keys=False)\n",
181+
" .apply(lambda g: transform_group(g, g.name))\n",
182+
" .reset_index(drop=True)\n",
184183
" )\n",
185184
" return y_df\n",
186185
"\n",
187186
"\n",
188-
"def transform_group(df):\n",
189-
" unique_id = df.unique_id.iloc[0]\n",
187+
"def transform_group(df, unique_id):\n",
190188
" if len(df) > 1020:\n",
191189
" df = df.iloc[-1020:]\n",
192-
" _start = pd.Timestamp(\"2020-01-01\")\n",
193-
" _end = _start + pd.DateOffset(days=int(df.count()[0]) - 1)\n",
194-
" date_idx = pd.date_range(start=_start, end=_end, freq=\"D\", name=\"ds\")\n",
195-
" res_df = pd.DataFrame(data=[], index=date_idx).reset_index()\n",
196-
" res_df[\"unique_id\"] = unique_id\n",
197-
" res_df[\"y\"] = df.y.values\n",
190+
" start = pd.Timestamp(\"2020-01-01\")\n",
191+
" date_idx = pd.date_range(start=start, periods=len(df), freq=\"D\", name=\"ds\")\n",
192+
" res_df = pd.DataFrame({\n",
193+
" \"ds\": date_idx,\n",
194+
" \"unique_id\": unique_id,\n",
195+
" \"y\": df[\"y\"].to_numpy()\n",
196+
" })\n",
198197
" return res_df"
199198
]
200199
},

examples/daily/global_daily.ipynb

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -174,28 +174,27 @@
174174
"\n",
175175
"def create_m4_daily():\n",
176176
" y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group=\"Daily\")\n",
177-
" _ids = [f\"D{i}\" for i in range(1, n+1)]\n",
177+
" target_ids = {f\"D{i}\" for i in range(1, n)}\n",
178+
" y_df = y_df[y_df[\"unique_id\"].isin(target_ids)]\n",
178179
" y_df = (\n",
179-
" y_df.groupby(\"unique_id\")\n",
180-
" .filter(lambda x: x.unique_id.iloc[0] in _ids)\n",
181-
" .groupby(\"unique_id\")\n",
182-
" .apply(transform_group)\n",
183-
" .reset_index(drop=True)\n",
180+
" y_df.groupby(\"unique_id\", group_keys=False)\n",
181+
" .apply(lambda g: transform_group(g, g.name))\n",
182+
" .reset_index(drop=True)\n",
184183
" )\n",
185184
" return y_df\n",
186185
"\n",
187186
"\n",
188-
"def transform_group(df):\n",
189-
" unique_id = df.unique_id.iloc[0]\n",
187+
"def transform_group(df, unique_id):\n",
190188
" if len(df) > 1020:\n",
191189
" df = df.iloc[-1020:]\n",
192-
" _start = pd.Timestamp(\"2020-01-01\")\n",
193-
" _end = _start + pd.DateOffset(days=int(df.count()[0]) - 1)\n",
194-
" date_idx = pd.date_range(start=_start, end=_end, freq=\"D\", name=\"ds\")\n",
195-
" res_df = pd.DataFrame(data=[], index=date_idx).reset_index()\n",
196-
" res_df[\"unique_id\"] = unique_id\n",
197-
" res_df[\"y\"] = df.y.values\n",
198-
" return res_df\n"
190+
" start = pd.Timestamp(\"2020-01-01\")\n",
191+
" date_idx = pd.date_range(start=start, periods=len(df), freq=\"D\", name=\"ds\")\n",
192+
" res_df = pd.DataFrame({\n",
193+
" \"ds\": date_idx,\n",
194+
" \"unique_id\": unique_id,\n",
195+
" \"y\": df[\"y\"].to_numpy()\n",
196+
" })\n",
197+
" return res_df"
199198
]
200199
},
201200
{

examples/daily/local_univariate_daily.ipynb

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"source": [
3939
"### Cluster setup\n",
4040
"\n",
41-
"We recommend using a cluster with [Databricks Runtime 16.4 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/16.4lts-ml.html). The cluster can be either a single-node or multi-node CPU cluster. MMF leverages [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) under the hood and utilizes all the available resource. Make sure to set the following Spark configurations before you start your cluster: [`spark.sql.execution.arrow.enabled true`](https://spark.apache.org/docs/3.0.1/sql-pyspark-pandas-with-arrow.html#enabling-for-conversion-tofrom-pandas) and [`spark.sql.adaptive.enabled false`](https://spark.apache.org/docs/latest/sql-performance-tuning.html#adaptive-query-execution). You can do this by specifying [Spark configuration](https://docs.databricks.com/en/compute/configure.html#spark-configuration) in the advanced options on the cluster creation page."
41+
"We recommend using a cluster with [Databricks Runtime 17.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/17.3lts-ml.html). The cluster can be either a single-node or multi-node CPU cluster. MMF leverages [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) under the hood and utilizes all the available resource. Make sure to set the following Spark configurations before you start your cluster: [`spark.sql.execution.arrow.enabled true`](https://spark.apache.org/docs/3.0.1/sql-pyspark-pandas-with-arrow.html#enabling-for-conversion-tofrom-pandas) and [`spark.sql.adaptive.enabled false`](https://spark.apache.org/docs/latest/sql-performance-tuning.html#adaptive-query-execution). You can do this by specifying [Spark configuration](https://docs.databricks.com/en/compute/configure.html#spark-configuration) in the advanced options on the cluster creation page."
4242
]
4343
},
4444
{
@@ -174,27 +174,26 @@
174174
"\n",
175175
"def create_m4_daily():\n",
176176
" y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group=\"Daily\")\n",
177-
" _ids = [f\"D{i}\" for i in range(1, n)]\n",
177+
" target_ids = {f\"D{i}\" for i in range(1, n)}\n",
178+
" y_df = y_df[y_df[\"unique_id\"].isin(target_ids)]\n",
178179
" y_df = (\n",
179-
" y_df.groupby(\"unique_id\")\n",
180-
" .filter(lambda x: x.unique_id.iloc[0] in _ids)\n",
181-
" .groupby(\"unique_id\")\n",
182-
" .apply(transform_group)\n",
183-
" .reset_index(drop=True)\n",
180+
" y_df.groupby(\"unique_id\", group_keys=False)\n",
181+
" .apply(lambda g: transform_group(g, g.name))\n",
182+
" .reset_index(drop=True)\n",
184183
" )\n",
185184
" return y_df\n",
186185
"\n",
187186
"\n",
188-
"def transform_group(df):\n",
189-
" unique_id = df.unique_id.iloc[0]\n",
187+
"def transform_group(df, unique_id):\n",
190188
" if len(df) > 1020:\n",
191189
" df = df.iloc[-1020:]\n",
192-
" _start = pd.Timestamp(\"2020-01-01\")\n",
193-
" _end = _start + pd.DateOffset(days=int(df.count()[0]) - 1)\n",
194-
" date_idx = pd.date_range(start=_start, end=_end, freq=\"D\", name=\"ds\")\n",
195-
" res_df = pd.DataFrame(data=[], index=date_idx).reset_index()\n",
196-
" res_df[\"unique_id\"] = unique_id\n",
197-
" res_df[\"y\"] = df.y.values\n",
190+
" start = pd.Timestamp(\"2020-01-01\")\n",
191+
" date_idx = pd.date_range(start=start, periods=len(df), freq=\"D\", name=\"ds\")\n",
192+
" res_df = pd.DataFrame({\n",
193+
" \"ds\": date_idx,\n",
194+
" \"unique_id\": unique_id,\n",
195+
" \"y\": df[\"y\"].to_numpy()\n",
196+
" })\n",
198197
" return res_df"
199198
]
200199
},

examples/external_regressors/local_univariate_external_regressors_daily.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"source": [
4040
"### Cluster setup\n",
4141
"\n",
42-
"We recommend using a cluster with [Databricks Runtime 16.4 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/16.4lts-ml.html). The cluster can be either a single-node or multi-node CPU cluster. Make sure to set the following Spark configurations before you start your cluster: [`spark.sql.execution.arrow.enabled true`](https://spark.apache.org/docs/3.0.1/sql-pyspark-pandas-with-arrow.html#enabling-for-conversion-tofrom-pandas) and [`spark.sql.adaptive.enabled false`](https://spark.apache.org/docs/latest/sql-performance-tuning.html#adaptive-query-execution). You can do this by specifying [Spark configuration](https://docs.databricks.com/en/compute/configure.html#spark-configuration) in the advanced options on the cluster creation page."
42+
"We recommend using a cluster with [Databricks Runtime 17.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/17.3lts-ml.html). The cluster can be either a single-node or multi-node CPU cluster. Make sure to set the following Spark configurations before you start your cluster: [`spark.sql.execution.arrow.enabled true`](https://spark.apache.org/docs/3.0.1/sql-pyspark-pandas-with-arrow.html#enabling-for-conversion-tofrom-pandas) and [`spark.sql.adaptive.enabled false`](https://spark.apache.org/docs/latest/sql-performance-tuning.html#adaptive-query-execution). You can do this by specifying [Spark configuration](https://docs.databricks.com/en/compute/configure.html#spark-configuration) in the advanced options on the cluster creation page."
4343
]
4444
},
4545
{

examples/hourly/foundation_hourly.ipynb

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -174,27 +174,26 @@
174174
"\n",
175175
"def create_m4_hourly():\n",
176176
" y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group=\"Hourly\")\n",
177-
" _ids = [f\"H{i}\" for i in range(1, n)]\n",
177+
" target_ids = {f\"H{i}\" for i in range(1, n)}\n",
178+
" y_df = y_df[y_df[\"unique_id\"].isin(target_ids)]\n",
178179
" y_df = (\n",
179-
" y_df.groupby(\"unique_id\")\n",
180-
" .filter(lambda x: x.unique_id.iloc[0] in _ids)\n",
181-
" .groupby(\"unique_id\")\n",
182-
" .apply(transform_group)\n",
183-
" .reset_index(drop=True)\n",
180+
" y_df.groupby(\"unique_id\", group_keys=False)\n",
181+
" .apply(lambda g: transform_group(g, g.name))\n",
182+
" .reset_index(drop=True)\n",
184183
" )\n",
185184
" return y_df\n",
186185
"\n",
187186
"\n",
188-
"def transform_group(df):\n",
189-
" unique_id = df.unique_id.iloc[0]\n",
187+
"def transform_group(df, unique_id):\n",
190188
" if len(df) > 720:\n",
191189
" df = df.iloc[-720:]\n",
192-
" _start = pd.Timestamp(\"2025-01-01 00:00\")\n",
193-
" _end = _start + pd.DateOffset(hours=len(df)-1)\n",
194-
" date_idx = pd.date_range(start=_start, end=_end, freq=\"H\", name=\"ds\")\n",
195-
" res_df = pd.DataFrame(data=[], index=date_idx).reset_index()\n",
196-
" res_df[\"unique_id\"] = unique_id\n",
197-
" res_df[\"y\"] = df.y.values\n",
190+
" start = pd.Timestamp(\"2025-01-01 00:00\")\n",
191+
" date_idx = pd.date_range(start=start, periods=len(df), freq=\"h\", name=\"ds\")\n",
192+
" res_df = pd.DataFrame({\n",
193+
" \"ds\": date_idx,\n",
194+
" \"unique_id\": unique_id,\n",
195+
" \"y\": df[\"y\"].to_numpy()\n",
196+
" })\n",
198197
" return res_df"
199198
]
200199
},

examples/hourly/global_hourly.ipynb

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -174,27 +174,26 @@
174174
"\n",
175175
"def create_m4_hourly():\n",
176176
" y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group=\"Hourly\")\n",
177-
" _ids = [f\"H{i}\" for i in range(1, n)]\n",
177+
" target_ids = {f\"H{i}\" for i in range(1, n)}\n",
178+
" y_df = y_df[y_df[\"unique_id\"].isin(target_ids)]\n",
178179
" y_df = (\n",
179-
" y_df.groupby(\"unique_id\")\n",
180-
" .filter(lambda x: x.unique_id.iloc[0] in _ids)\n",
181-
" .groupby(\"unique_id\")\n",
182-
" .apply(transform_group)\n",
183-
" .reset_index(drop=True)\n",
180+
" y_df.groupby(\"unique_id\", group_keys=False)\n",
181+
" .apply(lambda g: transform_group(g, g.name))\n",
182+
" .reset_index(drop=True)\n",
184183
" )\n",
185184
" return y_df\n",
186185
"\n",
187186
"\n",
188-
"def transform_group(df):\n",
189-
" unique_id = df.unique_id.iloc[0]\n",
187+
"def transform_group(df, unique_id):\n",
190188
" if len(df) > 720:\n",
191189
" df = df.iloc[-720:]\n",
192-
" _start = pd.Timestamp(\"2025-01-01 00:00\")\n",
193-
" _end = _start + pd.DateOffset(hours=len(df)-1)\n",
194-
" date_idx = pd.date_range(start=_start, end=_end, freq=\"H\", name=\"ds\")\n",
195-
" res_df = pd.DataFrame(data=[], index=date_idx).reset_index()\n",
196-
" res_df[\"unique_id\"] = unique_id\n",
197-
" res_df[\"y\"] = df.y.values\n",
190+
" start = pd.Timestamp(\"2025-01-01 00:00\")\n",
191+
" date_idx = pd.date_range(start=start, periods=len(df), freq=\"h\", name=\"ds\")\n",
192+
" res_df = pd.DataFrame({\n",
193+
" \"ds\": date_idx,\n",
194+
" \"unique_id\": unique_id,\n",
195+
" \"y\": df[\"y\"].to_numpy()\n",
196+
" })\n",
198197
" return res_df"
199198
]
200199
},

examples/hourly/local_univariate_hourly.ipynb

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"source": [
3939
"### Cluster setup\n",
4040
"\n",
41-
"We recommend using a cluster with [Databricks Runtime 16.4 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/16.4lts-ml.html). The cluster can be either a single-node or multi-node CPU cluster. MMF leverages [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) under the hood and utilizes all the available resource. Make sure to set the following Spark configurations before you start your cluster: [`spark.sql.execution.arrow.enabled true`](https://spark.apache.org/docs/3.0.1/sql-pyspark-pandas-with-arrow.html#enabling-for-conversion-tofrom-pandas) and [`spark.sql.adaptive.enabled false`](https://spark.apache.org/docs/latest/sql-performance-tuning.html#adaptive-query-execution). You can do this by specifying [Spark configuration](https://docs.databricks.com/en/compute/configure.html#spark-configuration) in the advanced options on the cluster creation page."
41+
"We recommend using a cluster with [Databricks Runtime 17.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/17.3lts-ml.html). The cluster can be either a single-node or multi-node CPU cluster. MMF leverages [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) under the hood and utilizes all the available resource. Make sure to set the following Spark configurations before you start your cluster: [`spark.sql.execution.arrow.enabled true`](https://spark.apache.org/docs/3.0.1/sql-pyspark-pandas-with-arrow.html#enabling-for-conversion-tofrom-pandas) and [`spark.sql.adaptive.enabled false`](https://spark.apache.org/docs/latest/sql-performance-tuning.html#adaptive-query-execution). You can do this by specifying [Spark configuration](https://docs.databricks.com/en/compute/configure.html#spark-configuration) in the advanced options on the cluster creation page."
4242
]
4343
},
4444
{
@@ -174,27 +174,26 @@
174174
"\n",
175175
"def create_m4_hourly():\n",
176176
" y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group=\"Hourly\")\n",
177-
" _ids = [f\"H{i}\" for i in range(1, n)]\n",
177+
" target_ids = {f\"H{i}\" for i in range(1, n)}\n",
178+
" y_df = y_df[y_df[\"unique_id\"].isin(target_ids)]\n",
178179
" y_df = (\n",
179-
" y_df.groupby(\"unique_id\")\n",
180-
" .filter(lambda x: x.unique_id.iloc[0] in _ids)\n",
181-
" .groupby(\"unique_id\")\n",
182-
" .apply(transform_group)\n",
183-
" .reset_index(drop=True)\n",
180+
" y_df.groupby(\"unique_id\", group_keys=False)\n",
181+
" .apply(lambda g: transform_group(g, g.name))\n",
182+
" .reset_index(drop=True)\n",
184183
" )\n",
185184
" return y_df\n",
186185
"\n",
187186
"\n",
188-
"def transform_group(df):\n",
189-
" unique_id = df.unique_id.iloc[0]\n",
187+
"def transform_group(df, unique_id):\n",
190188
" if len(df) > 720:\n",
191189
" df = df.iloc[-720:]\n",
192-
" _start = pd.Timestamp(\"2025-01-01 00:00\")\n",
193-
" _end = _start + pd.DateOffset(hours=len(df)-1)\n",
194-
" date_idx = pd.date_range(start=_start, end=_end, freq=\"H\", name=\"ds\")\n",
195-
" res_df = pd.DataFrame(data=[], index=date_idx).reset_index()\n",
196-
" res_df[\"unique_id\"] = unique_id\n",
197-
" res_df[\"y\"] = df.y.values\n",
190+
" start = pd.Timestamp(\"2025-01-01 00:00\")\n",
191+
" date_idx = pd.date_range(start=start, periods=len(df), freq=\"h\", name=\"ds\")\n",
192+
" res_df = pd.DataFrame({\n",
193+
" \"ds\": date_idx,\n",
194+
" \"unique_id\": unique_id,\n",
195+
" \"y\": df[\"y\"].to_numpy()\n",
196+
" })\n",
198197
" return res_df"
199198
]
200199
},

examples/monthly/foundation_monthly.ipynb

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
},
8080
"outputs": [],
8181
"source": [
82-
"%pip install datasetsforecast==0.0.8 --quiet\n",
82+
"%pip install datasetsforecast==0.0.8 pandas==2.2.3 --quiet\n",
8383
"dbutils.library.restartPython()"
8484
]
8585
},
@@ -174,31 +174,27 @@
174174
"\n",
175175
"def create_m4_monthly():\n",
176176
" y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group=\"Monthly\")\n",
177-
" _ids = [f\"M{i}\" for i in range(1, n + 1)]\n",
177+
" target_ids = {f\"M{i}\" for i in range(1, n)}\n",
178+
" y_df = y_df[y_df[\"unique_id\"].isin(target_ids)]\n",
178179
" y_df = (\n",
179-
" y_df.groupby(\"unique_id\")\n",
180-
" .filter(lambda x: x.unique_id.iloc[0] in _ids)\n",
181-
" .groupby(\"unique_id\")\n",
182-
" .apply(transform_group)\n",
183-
" .reset_index(drop=True)\n",
180+
" y_df.groupby(\"unique_id\", group_keys=False)\n",
181+
" .apply(lambda g: transform_group(g, g.name))\n",
182+
" .reset_index(drop=True)\n",
184183
" )\n",
185184
" return y_df\n",
186185
"\n",
187186
"\n",
188-
"def transform_group(df):\n",
189-
" unique_id = df.unique_id.iloc[0]\n",
190-
" _cnt = 60 # df.count()[0]\n",
191-
" _start = pd.Timestamp(\"2018-01-01\")\n",
192-
" _end = _start + pd.DateOffset(months=_cnt)\n",
193-
" date_idx = pd.date_range(start=_start, end=_end, freq=\"M\", name=\"date\")\n",
194-
" _df = (\n",
195-
" pd.DataFrame(data=[], index=date_idx)\n",
196-
" .reset_index()\n",
197-
" .rename(columns={\"index\": \"date\"})\n",
198-
" )\n",
199-
" _df[\"unique_id\"] = unique_id\n",
200-
" _df[\"y\"] = df[:60].y.values\n",
201-
" return _df\n"
187+
"def transform_group(df, unique_id):\n",
188+
" if len(df) > 60:\n",
189+
" df = df.iloc[-60:]\n",
190+
" start = pd.Timestamp(\"2018-01-01\")\n",
191+
" date_idx = pd.date_range(start=start, periods=len(df), freq=\"ME\", name=\"ds\")\n",
192+
" res_df = pd.DataFrame({\n",
193+
" \"ds\": date_idx,\n",
194+
" \"unique_id\": unique_id,\n",
195+
" \"y\": df[\"y\"].to_numpy()\n",
196+
" })\n",
197+
" return res_df"
202198
]
203199
},
204200
{
@@ -309,7 +305,7 @@
309305
},
310306
"outputs": [],
311307
"source": [
312-
"display(spark.sql(f\"select unique_id, count(date) as count from {catalog}.{db}.m4_monthly_train group by unique_id order by unique_id\"))"
308+
"display(spark.sql(f\"select unique_id, count(ds) as count from {catalog}.{db}.m4_monthly_train group by unique_id order by unique_id\"))"
313309
]
314310
},
315311
{
@@ -331,7 +327,7 @@
331327
"outputs": [],
332328
"source": [
333329
"display(\n",
334-
" spark.sql(f\"select * from {catalog}.{db}.m4_monthly_train where unique_id in ('M1', 'M2', 'M3', 'M4', 'M5') order by unique_id, date\")\n",
330+
" spark.sql(f\"select * from {catalog}.{db}.m4_monthly_train where unique_id in ('M1', 'M2', 'M3', 'M4', 'M5') order by unique_id, ds\")\n",
335331
" )"
336332
]
337333
},
@@ -547,7 +543,7 @@
547543
"display(spark.sql(f\"\"\"\n",
548544
" select * from {catalog}.{db}.monthly_scoring_output \n",
549545
" where unique_id = 'M1'\n",
550-
" order by unique_id, model, date\n",
546+
" order by unique_id, model, ds\n",
551547
" \"\"\"))"
552548
]
553549
},

0 commit comments

Comments
 (0)