|
154 | 154 | │ └─LogicalScan { table: t1, columns: [t1.y] }
|
155 | 155 | └─LogicalProject { exprs: [t2.y, 1:Int32] }
|
156 | 156 | └─LogicalScan { table: t2, columns: [t2.y], predicate: IsNotNull(t2.y) }
|
| 157 | +- name: 'Like `count(*)`, SimpleAgg also need to rewrite `array_agg` for the extra null row due to outer join #14735' |
| 158 | + sql: | |
| 159 | + create table t1(a int, b int); |
| 160 | + select a, (select array_agg(t1.a) filter (where t1.a is distinct from 1) from t1 where t1.a <> t.b) from t1 as t order by 1; |
| 161 | + logical_plan: |- |
| 162 | + LogicalProject { exprs: [t1.a, array_agg(t1.a) filter(IsDistinctFrom(t1.a, 1:Int32))] } |
| 163 | + └─LogicalApply { type: LeftOuter, on: true, correlated_id: 1, max_one_row: true } |
| 164 | + ├─LogicalScan { table: t1, columns: [t1.a, t1.b, t1._row_id] } |
| 165 | + └─LogicalProject { exprs: [array_agg(t1.a) filter(IsDistinctFrom(t1.a, 1:Int32))] } |
| 166 | + └─LogicalAgg { aggs: [array_agg(t1.a) filter(IsDistinctFrom(t1.a, 1:Int32))] } |
| 167 | + └─LogicalProject { exprs: [t1.a] } |
| 168 | + └─LogicalFilter { predicate: (t1.a <> CorrelatedInputRef { index: 1, correlated_id: 1 }) } |
| 169 | + └─LogicalScan { table: t1, columns: [t1.a, t1.b, t1._row_id] } |
| 170 | + optimized_logical_plan_for_batch: |- |
| 171 | + LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t1.b, t1.b), output: [t1.a, array_agg(t1.a) filter(IsDistinctFrom(t1.a, 1:Int32) AND IsNotNull(1:Int32))] } |
| 172 | + ├─LogicalScan { table: t1, columns: [t1.a, t1.b] } |
| 173 | + └─LogicalAgg { group_key: [t1.b], aggs: [array_agg(t1.a) filter(IsDistinctFrom(t1.a, 1:Int32) AND IsNotNull(1:Int32))] } |
| 174 | + └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t1.b, t1.b), output: [t1.b, t1.a, 1:Int32] } |
| 175 | + ├─LogicalAgg { group_key: [t1.b], aggs: [] } |
| 176 | + │ └─LogicalScan { table: t1, columns: [t1.b] } |
| 177 | + └─LogicalProject { exprs: [t1.b, t1.a, 1:Int32] } |
| 178 | + └─LogicalJoin { type: Inner, on: (t1.a <> t1.b), output: all } |
| 179 | + ├─LogicalAgg { group_key: [t1.b], aggs: [] } |
| 180 | + │ └─LogicalScan { table: t1, columns: [t1.b] } |
| 181 | + └─LogicalScan { table: t1, columns: [t1.a] } |
157 | 182 | - sql: |
|
158 | 183 | create table t1(x int, y int);
|
159 | 184 | create table t2(x int, y int);
|
|
981 | 1006 | └─BatchHashJoin { type: LeftOuter, predicate: t1.b IS NOT DISTINCT FROM t1.b, output: [$expr1] }
|
982 | 1007 | ├─BatchExchange { order: [], dist: HashShard(t1.b) }
|
983 | 1008 | │ └─BatchScan { table: t1, columns: [t1.b], distribution: SomeShard }
|
984 |
| - └─BatchProject { exprs: [t1.b, Coalesce(array_agg(t2.c), ARRAY[]:List(Int32)) as $expr1] } |
985 |
| - └─BatchHashAgg { group_key: [t1.b], aggs: [array_agg(t2.c)] } |
986 |
| - └─BatchHashJoin { type: LeftOuter, predicate: t1.b IS NOT DISTINCT FROM t2.d, output: [t1.b, t2.c] } |
| 1009 | + └─BatchProject { exprs: [t1.b, Coalesce(array_agg(t2.c) filter(IsNotNull(1:Int32)), ARRAY[]:List(Int32)) as $expr1] } |
| 1010 | + └─BatchHashAgg { group_key: [t1.b], aggs: [array_agg(t2.c) filter(IsNotNull(1:Int32))] } |
| 1011 | + └─BatchHashJoin { type: LeftOuter, predicate: t1.b IS NOT DISTINCT FROM t2.d, output: [t1.b, t2.c, 1:Int32] } |
987 | 1012 | ├─BatchHashAgg { group_key: [t1.b], aggs: [] }
|
988 | 1013 | │ └─BatchExchange { order: [], dist: HashShard(t1.b) }
|
989 | 1014 | │ └─BatchScan { table: t1, columns: [t1.b], distribution: SomeShard }
|
990 | 1015 | └─BatchExchange { order: [], dist: HashShard(t2.d) }
|
991 |
| - └─BatchProject { exprs: [t2.d, t2.c] } |
| 1016 | + └─BatchProject { exprs: [t2.d, t2.c, 1:Int32] } |
992 | 1017 | └─BatchFilter { predicate: IsNotNull(t2.d) }
|
993 | 1018 | └─BatchScan { table: t2, columns: [t2.c, t2.d], distribution: SomeShard }
|
994 | 1019 | stream_plan: |-
|
|
997 | 1022 | └─StreamHashJoin { type: LeftOuter, predicate: t1.b IS NOT DISTINCT FROM t1.b, output: [$expr1, t1._row_id, t1.b, t1.b] }
|
998 | 1023 | ├─StreamExchange { dist: HashShard(t1.b) }
|
999 | 1024 | │ └─StreamTableScan { table: t1, columns: [t1.b, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
|
1000 |
| - └─StreamProject { exprs: [t1.b, Coalesce(array_agg(t2.c), ARRAY[]:List(Int32)) as $expr1] } |
1001 |
| - └─StreamHashAgg { group_key: [t1.b], aggs: [array_agg(t2.c), count] } |
1002 |
| - └─StreamHashJoin { type: LeftOuter, predicate: t1.b IS NOT DISTINCT FROM t2.d, output: [t1.b, t2.c, t2._row_id] } |
| 1025 | + └─StreamProject { exprs: [t1.b, Coalesce(array_agg(t2.c) filter(IsNotNull(1:Int32)), ARRAY[]:List(Int32)) as $expr1] } |
| 1026 | + └─StreamHashAgg { group_key: [t1.b], aggs: [array_agg(t2.c) filter(IsNotNull(1:Int32)), count] } |
| 1027 | + └─StreamHashJoin { type: LeftOuter, predicate: t1.b IS NOT DISTINCT FROM t2.d, output: [t1.b, t2.c, 1:Int32, t2._row_id] } |
1003 | 1028 | ├─StreamProject { exprs: [t1.b] }
|
1004 | 1029 | │ └─StreamHashAgg { group_key: [t1.b], aggs: [count] }
|
1005 | 1030 | │ └─StreamExchange { dist: HashShard(t1.b) }
|
1006 | 1031 | │ └─StreamTableScan { table: t1, columns: [t1.b, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
|
1007 | 1032 | └─StreamExchange { dist: HashShard(t2.d) }
|
1008 |
| - └─StreamProject { exprs: [t2.d, t2.c, t2._row_id] } |
| 1033 | + └─StreamProject { exprs: [t2.d, t2.c, 1:Int32, t2._row_id] } |
1009 | 1034 | └─StreamFilter { predicate: IsNotNull(t2.d) }
|
1010 | 1035 | └─StreamTableScan { table: t2, columns: [t2.c, t2.d, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
|
1011 | 1036 | - name: correlated array subquery \du
|
|
1030 | 1055 | ├─BatchExchange { order: [], dist: HashShard(rw_users.id) }
|
1031 | 1056 | │ └─BatchFilter { predicate: Not(RegexpEq(rw_users.name, '^pg_':Varchar)) }
|
1032 | 1057 | │ └─BatchScan { table: rw_users, columns: [rw_users.id, rw_users.name, rw_users.is_super, rw_users.create_db, rw_users.create_user, rw_users.can_login], distribution: Single }
|
1033 |
| - └─BatchProject { exprs: [rw_users.id, Coalesce(array_agg(rw_users.name), ARRAY[]:List(Varchar)) as $expr1] } |
1034 |
| - └─BatchHashAgg { group_key: [rw_users.id], aggs: [array_agg(rw_users.name)] } |
1035 |
| - └─BatchHashJoin { type: LeftOuter, predicate: rw_users.id IS NOT DISTINCT FROM rw_users.id, output: [rw_users.id, rw_users.name] } |
| 1058 | + └─BatchProject { exprs: [rw_users.id, Coalesce(array_agg(rw_users.name) filter(IsNotNull(1:Int32)), ARRAY[]:List(Varchar)) as $expr1] } |
| 1059 | + └─BatchHashAgg { group_key: [rw_users.id], aggs: [array_agg(rw_users.name) filter(IsNotNull(1:Int32))] } |
| 1060 | + └─BatchHashJoin { type: LeftOuter, predicate: rw_users.id IS NOT DISTINCT FROM rw_users.id, output: [rw_users.id, rw_users.name, 1:Int32] } |
1036 | 1061 | ├─BatchHashAgg { group_key: [rw_users.id], aggs: [] }
|
1037 | 1062 | │ └─BatchExchange { order: [], dist: HashShard(rw_users.id) }
|
1038 | 1063 | │ └─BatchProject { exprs: [rw_users.id] }
|
1039 | 1064 | │ └─BatchFilter { predicate: Not(RegexpEq(rw_users.name, '^pg_':Varchar)) }
|
1040 | 1065 | │ └─BatchScan { table: rw_users, columns: [rw_users.id, rw_users.name], distribution: Single }
|
1041 | 1066 | └─BatchExchange { order: [], dist: HashShard(rw_users.id) }
|
1042 |
| - └─BatchHashJoin { type: Inner, predicate: null:Int32 = rw_users.id, output: [rw_users.id, rw_users.name] } |
1043 |
| - ├─BatchExchange { order: [], dist: HashShard(null:Int32) } |
1044 |
| - │ └─BatchProject { exprs: [rw_users.id, null:Int32] } |
1045 |
| - │ └─BatchNestedLoopJoin { type: Inner, predicate: true, output: all } |
1046 |
| - │ ├─BatchExchange { order: [], dist: Single } |
1047 |
| - │ │ └─BatchHashAgg { group_key: [rw_users.id], aggs: [] } |
1048 |
| - │ │ └─BatchExchange { order: [], dist: HashShard(rw_users.id) } |
1049 |
| - │ │ └─BatchProject { exprs: [rw_users.id] } |
1050 |
| - │ │ └─BatchFilter { predicate: (null:Int32 = rw_users.id) AND Not(RegexpEq(rw_users.name, '^pg_':Varchar)) } |
1051 |
| - │ │ └─BatchScan { table: rw_users, columns: [rw_users.id, rw_users.name], distribution: Single } |
1052 |
| - │ └─BatchValues { rows: [] } |
1053 |
| - └─BatchExchange { order: [], dist: HashShard(rw_users.id) } |
1054 |
| - └─BatchScan { table: rw_users, columns: [rw_users.id, rw_users.name], distribution: Single } |
| 1067 | + └─BatchProject { exprs: [rw_users.id, rw_users.name, 1:Int32] } |
| 1068 | + └─BatchHashJoin { type: Inner, predicate: null:Int32 = rw_users.id, output: [rw_users.id, rw_users.name] } |
| 1069 | + ├─BatchExchange { order: [], dist: HashShard(null:Int32) } |
| 1070 | + │ └─BatchProject { exprs: [rw_users.id, null:Int32] } |
| 1071 | + │ └─BatchNestedLoopJoin { type: Inner, predicate: true, output: all } |
| 1072 | + │ ├─BatchExchange { order: [], dist: Single } |
| 1073 | + │ │ └─BatchHashAgg { group_key: [rw_users.id], aggs: [] } |
| 1074 | + │ │ └─BatchExchange { order: [], dist: HashShard(rw_users.id) } |
| 1075 | + │ │ └─BatchProject { exprs: [rw_users.id] } |
| 1076 | + │ │ └─BatchFilter { predicate: (null:Int32 = rw_users.id) AND Not(RegexpEq(rw_users.name, '^pg_':Varchar)) } |
| 1077 | + │ │ └─BatchScan { table: rw_users, columns: [rw_users.id, rw_users.name], distribution: Single } |
| 1078 | + │ └─BatchValues { rows: [] } |
| 1079 | + └─BatchExchange { order: [], dist: HashShard(rw_users.id) } |
| 1080 | + └─BatchScan { table: rw_users, columns: [rw_users.id, rw_users.name], distribution: Single } |
1055 | 1081 | - name: correlated array subquery (issue 14423)
|
1056 | 1082 | sql: |
|
1057 | 1083 | CREATE TABLE array_types ( x BIGINT[] );
|
|
1066 | 1092 | └─BatchHashJoin { type: LeftOuter, predicate: array_types.x IS NOT DISTINCT FROM array_types.x, output: [$expr1] }
|
1067 | 1093 | ├─BatchExchange { order: [], dist: HashShard(array_types.x) }
|
1068 | 1094 | │ └─BatchScan { table: array_types, columns: [array_types.x], distribution: SomeShard }
|
1069 |
| - └─BatchProject { exprs: [array_types.x, Coalesce(array_agg(array_types.x), ARRAY[]:List(List(Int64))) as $expr1] } |
1070 |
| - └─BatchHashAgg { group_key: [array_types.x], aggs: [array_agg(array_types.x)] } |
1071 |
| - └─BatchHashJoin { type: LeftOuter, predicate: array_types.x IS NOT DISTINCT FROM array_types.x, output: [array_types.x, array_types.x] } |
| 1095 | + └─BatchProject { exprs: [array_types.x, Coalesce(array_agg(array_types.x) filter(IsNotNull(1:Int32)), ARRAY[]:List(List(Int64))) as $expr1] } |
| 1096 | + └─BatchHashAgg { group_key: [array_types.x], aggs: [array_agg(array_types.x) filter(IsNotNull(1:Int32))] } |
| 1097 | + └─BatchHashJoin { type: LeftOuter, predicate: array_types.x IS NOT DISTINCT FROM array_types.x, output: [array_types.x, array_types.x, 1:Int32] } |
1072 | 1098 | ├─BatchHashAgg { group_key: [array_types.x], aggs: [] }
|
1073 | 1099 | │ └─BatchExchange { order: [], dist: HashShard(array_types.x) }
|
1074 | 1100 | │ └─BatchScan { table: array_types, columns: [array_types.x], distribution: SomeShard }
|
1075 | 1101 | └─BatchExchange { order: [], dist: HashShard(array_types.x) }
|
1076 |
| - └─BatchProject { exprs: [array_types.x, array_types.x] } |
| 1102 | + └─BatchProject { exprs: [array_types.x, array_types.x, 1:Int32] } |
1077 | 1103 | └─BatchHashAgg { group_key: [array_types.x], aggs: [] }
|
1078 | 1104 | └─BatchExchange { order: [], dist: HashShard(array_types.x) }
|
1079 | 1105 | └─BatchScan { table: array_types, columns: [array_types.x], distribution: SomeShard }
|
0 commit comments