diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index db4c997286..3208e810f8 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -555,7 +555,7 @@ class Generator(generator.Generator): [ transforms.eliminate_qualify, transforms.eliminate_distinct_on, - partial(transforms.unnest_to_explode, unnest_using_arrays_zip=False), + partial(transforms.unnest_to_explode), transforms.any_to_exists, ] ), diff --git a/sqlglot/transforms.py b/sqlglot/transforms.py index 46c859b028..fd6b8746ee 100644 --- a/sqlglot/transforms.py +++ b/sqlglot/transforms.py @@ -302,19 +302,13 @@ def unqualify_unnest(expression: exp.Expression) -> exp.Expression: return expression -def unnest_to_explode( - expression: exp.Expression, - unnest_using_arrays_zip: bool = True, -) -> exp.Expression: +def unnest_to_explode(expression: exp.Expression) -> exp.Expression: """Convert cross join unnest into lateral view explode.""" def _unnest_zip_exprs( u: exp.Unnest, unnest_exprs: t.List[exp.Expression], has_multi_expr: bool ) -> t.List[exp.Expression]: if has_multi_expr: - if not unnest_using_arrays_zip: - raise UnsupportedError("Cannot transpile UNNEST with multiple input arrays") - # Use INLINE(ARRAYS_ZIP(...)) for multiple expressions zip_exprs: t.List[exp.Expression] = [ exp.Anonymous(this="ARRAYS_ZIP", expressions=unnest_exprs) @@ -368,6 +362,25 @@ def _udtf_type(u: exp.Unnest, has_multi_expr: bool) -> t.Type[exp.Func]: expression.args["joins"].remove(join) alias_cols = alias.columns if alias else [] + + """ + Handle Presto CROSS JOIN UNNEST to LATERAL VIEW EXPLODE for Multiple or No Exploded table column alias. + + Spark/Hive LATERAL VIEW EXPLODE requires only single alias for respective exploded array/struct to be given for unnest ulike trino/presto which can take multiple. + https://cwiki.apache.org/confluence/display/Hive/LanguageManual+LateralView + https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-lateral-view.html + + """ + # Replace multiple alias for single EXPLODE column with single static alias name: `t_struct` + if not has_multi_expr and (len(alias_cols) != 1): + alias_cols = ["t_struct"] + + # [Optional] Do Update the Column reference in AST for table with current alias + if alias: + for column in expression.find_all(exp.Column): + if column.table == alias.name: + column.set("table", "t_struct") + for e, column in zip(exprs, alias_cols): expression.append( "laterals", @@ -376,7 +389,7 @@ def _udtf_type(u: exp.Unnest, has_multi_expr: bool) -> t.Type[exp.Func]: view=True, alias=exp.TableAlias( this=alias.this, # type: ignore - columns=alias_cols if unnest_using_arrays_zip else [column], # type: ignore + columns=alias_cols, ), ), ) diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index e7533773f8..444e9f2958 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -1444,6 +1444,34 @@ def test_cross_join(self): }, ) + # UNNEST without column alias + self.validate_all( + "SELECT * FROM x CROSS JOIN UNNEST(y) AS t", + write={ + "presto": "SELECT * FROM x CROSS JOIN UNNEST(y) AS t", + "spark": "SELECT * FROM x LATERAL VIEW EXPLODE(y) t AS t_struct", + "databricks": "SELECT * FROM x LATERAL VIEW EXPLODE(y) t AS t_struct", + }, + ) + + # UNNEST STRUCT Object into multiple columns, using single alias + self.validate_all( + "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t (a, b)", + write={ + "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t(a, b)", + "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS t_struct", + }, + ) + + # Unnest multiple Expression into respective mapped alias + self.validate_all( + "SELECT numbers, animals, n, a FROM (SELECT ARRAY(2, 5) AS numbers, ARRAY('dog', 'cat', 'bird') AS animals UNION ALL SELECT ARRAY(7, 8, 9), ARRAY('cow', 'pig')) AS x CROSS JOIN UNNEST(numbers, animals) AS t(n, a)", + write={ + "presto": "SELECT numbers, animals, n, a FROM (SELECT ARRAY[2, 5] AS numbers, ARRAY['dog', 'cat', 'bird'] AS animals UNION ALL SELECT ARRAY[7, 8, 9], ARRAY['cow', 'pig']) AS x CROSS JOIN UNNEST(numbers, animals) AS t(n, a)", + "spark": "SELECT numbers, animals, n, a FROM (SELECT ARRAY(2, 5) AS numbers, ARRAY('dog', 'cat', 'bird') AS animals UNION ALL SELECT ARRAY(7, 8, 9), ARRAY('cow', 'pig')) AS x LATERAL VIEW INLINE(ARRAYS_ZIP(numbers, animals)) t AS n, a", + }, + ) + def test_lateral_subquery(self): self.validate_identity( "SELECT art FROM tbl1 INNER JOIN LATERAL (SELECT art FROM tbl2) AS tbl2 ON tbl1.art = tbl2.art" diff --git a/tests/dialects/test_starrocks.py b/tests/dialects/test_starrocks.py index 1edd7c6fd3..1ec319a643 100644 --- a/tests/dialects/test_starrocks.py +++ b/tests/dialects/test_starrocks.py @@ -1,4 +1,3 @@ -from sqlglot.errors import UnsupportedError from tests.dialects.test_dialect import Validator @@ -91,7 +90,7 @@ def test_unnest(self): "spark": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""", "databricks": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""", "starrocks": r"""SELECT id, t.type, t.scores FROM example_table, UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""", - "hive": UnsupportedError, + "hive": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""", }, ) @@ -100,7 +99,7 @@ def test_unnest(self): write={ "spark": r"""SELECT id, t.type, t.scores FROM example_table_2 LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""", "starrocks": r"""SELECT id, t.type, t.scores FROM example_table_2 CROSS JOIN LATERAL UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""", - "hive": UnsupportedError, + "hive": r"""SELECT id, t.type, t.scores FROM example_table_2 LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""", }, )