Skip to content

Commit 813d341

Browse files
authored
Create spark-sql-delta-2-zorder-table.py
1 parent b938c9e commit 813d341

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

spark-sql-delta-2-zorder-table.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from pyspark.sql import SparkSession
2+
import uuid
3+
import sys
4+
5+
# init output_path variable
6+
output_path = None
7+
8+
# use spark session instead of spark context as the entrypoint
9+
spark = (
10+
SparkSession.builder.appName("SparkSQL")
11+
.config("hive.metastore.client.factory.class", "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory")
12+
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
13+
.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
14+
.enableHiveSupport()
15+
.getOrCreate()
16+
)
17+
18+
insert_table_sql = "OPTIMIZE default.deltatb ZORDER BY (loc)"
19+
20+
# generate symlink_format_manifest for athena
21+
generate_mode_sql = "GENERATE symlink_format_manifest FOR TABLE default.deltatb"
22+
23+
# insert into Delta table using simple values
24+
spark.sql(insert_table_sql)
25+
spark.sql(generate_mode_sql)
26+
print("Inserting data into table deltatb.")

0 commit comments

Comments
 (0)