Skip to content

Commit

Permalink
use static alias for unnesting a struct
Browse files Browse the repository at this point in the history
update hive select transformer to support INLINE by default
update tests
  • Loading branch information
Gaurav Sagar committed Oct 30, 2024
1 parent b4ea602 commit 9d443cd
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 8 deletions.
2 changes: 1 addition & 1 deletion sqlglot/dialects/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ class Generator(generator.Generator):
[
transforms.eliminate_qualify,
transforms.eliminate_distinct_on,
partial(transforms.unnest_to_explode, unnest_using_arrays_zip=False),
partial(transforms.unnest_to_explode),
transforms.any_to_exists,
]
),
Expand Down
26 changes: 21 additions & 5 deletions sqlglot/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,17 +303,14 @@ def unqualify_unnest(expression: exp.Expression) -> exp.Expression:


def unnest_to_explode(
expression: exp.Expression,
unnest_using_arrays_zip: bool = True,
expression: exp.Expression
) -> exp.Expression:
"""Convert cross join unnest into lateral view explode."""

def _unnest_zip_exprs(
u: exp.Unnest, unnest_exprs: t.List[exp.Expression], has_multi_expr: bool
) -> t.List[exp.Expression]:
if has_multi_expr:
if not unnest_using_arrays_zip:
raise UnsupportedError("Cannot transpile UNNEST with multiple input arrays")

# Use INLINE(ARRAYS_ZIP(...)) for multiple expressions
zip_exprs: t.List[exp.Expression] = [
Expand Down Expand Up @@ -368,6 +365,25 @@ def _udtf_type(u: exp.Unnest, has_multi_expr: bool) -> t.Type[exp.Func]:
expression.args["joins"].remove(join)

alias_cols = alias.columns if alias else []


"""
Handle Presto CROSS JOIN UNNEST to LATERAL VIEW EXPLODE for Multiple or No Exploded table column alias.
Spark/Hive LATERAL VIEW EXPLODE requires only single alias for respective exploded array/struct to be given for unnest ulike trino/presto which can take multiple.
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+LateralView
https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-lateral-view.html
"""
# Replace multiple alias for single EXPLODE column with single static alias name: `t_struct`
if not has_multi_expr and (len(alias_cols) != 1):
alias_cols = ["t_struct"]

# [Optional] Do Update the Column reference in AST for table with current alias
for column in expression.find_all(exp.Column):
if alias and column.table == alias.name:
column.set("table", "t_struct")

for e, column in zip(exprs, alias_cols):
expression.append(
"laterals",
Expand All @@ -376,7 +392,7 @@ def _udtf_type(u: exp.Unnest, has_multi_expr: bool) -> t.Type[exp.Func]:
view=True,
alias=exp.TableAlias(
this=alias.this, # type: ignore
columns=alias_cols if unnest_using_arrays_zip else [column], # type: ignore
columns=alias_cols
),
),
)
Expand Down
28 changes: 28 additions & 0 deletions tests/dialects/test_dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,34 @@ def test_cross_join(self):
},
)

# UNNEST without column alias
self.validate_all(
"SELECT * FROM x CROSS JOIN UNNEST(y) AS t",
write={
"presto": "SELECT * FROM x CROSS JOIN UNNEST(y) AS t",
"spark": "SELECT * FROM x LATERAL VIEW EXPLODE(y) t AS t_struct",
"databricks": "SELECT * FROM x LATERAL VIEW EXPLODE(y) t AS t_struct",
},
)

# UNNEST STRUCT Object into multiple columns, using single alias
self.validate_all(
"SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t (a, b)",
write={
"presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t(a, b)",
"spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS t_struct",
},
)

# Unnest multiple Expression into respective mapped alias
self.validate_all(
"SELECT numbers, animals, n, a FROM (SELECT ARRAY(2, 5) AS numbers, ARRAY('dog', 'cat', 'bird') AS animals UNION ALL SELECT ARRAY(7, 8, 9), ARRAY('cow', 'pig')) AS x CROSS JOIN UNNEST(numbers, animals) AS t(n, a)",
write={
"presto": "SELECT numbers, animals, n, a FROM (SELECT ARRAY[2, 5] AS numbers, ARRAY['dog', 'cat', 'bird'] AS animals UNION ALL SELECT ARRAY[7, 8, 9], ARRAY['cow', 'pig']) AS x CROSS JOIN UNNEST(numbers, animals) AS t(n, a)",
"spark": "SELECT numbers, animals, n, a FROM (SELECT ARRAY(2, 5) AS numbers, ARRAY('dog', 'cat', 'bird') AS animals UNION ALL SELECT ARRAY(7, 8, 9), ARRAY('cow', 'pig')) AS x LATERAL VIEW INLINE(ARRAYS_ZIP(numbers, animals)) t AS n, a",
},
)

def test_lateral_subquery(self):
self.validate_identity(
"SELECT art FROM tbl1 INNER JOIN LATERAL (SELECT art FROM tbl2) AS tbl2 ON tbl1.art = tbl2.art"
Expand Down
4 changes: 2 additions & 2 deletions tests/dialects/test_starrocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_unnest(self):
"spark": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
"databricks": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
"starrocks": r"""SELECT id, t.type, t.scores FROM example_table, UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""",
"hive": UnsupportedError,
"hive": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
},
)

Expand All @@ -100,7 +100,7 @@ def test_unnest(self):
write={
"spark": r"""SELECT id, t.type, t.scores FROM example_table_2 LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
"starrocks": r"""SELECT id, t.type, t.scores FROM example_table_2 CROSS JOIN LATERAL UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""",
"hive": UnsupportedError,
"hive": r"""SELECT id, t.type, t.scores FROM example_table_2 LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
},
)

Expand Down

0 comments on commit 9d443cd

Please sign in to comment.