|
9 | 9 |
|
10 | 10 | import org.apache.calcite.sql.SqlCall; |
11 | 11 | import org.apache.calcite.sql.SqlNode; |
| 12 | +import org.apache.calcite.sql.fun.SqlStdOperatorTable; |
12 | 13 | import org.apache.calcite.sql.util.SqlShuttle; |
13 | 14 |
|
14 | 15 | import com.linkedin.coral.common.transformers.OperatorRenameSqlCallTransformer; |
15 | 16 | import com.linkedin.coral.common.transformers.SqlCallTransformers; |
16 | 17 | import com.linkedin.coral.spark.containers.SparkUDFInfo; |
17 | | -import com.linkedin.coral.spark.transformers.FallBackToHiveUDFTransformer; |
18 | | -import com.linkedin.coral.spark.transformers.TransportableUDFTransformer; |
| 18 | +import com.linkedin.coral.spark.transformers.FallBackToLinkedInHiveUDFTransformer; |
| 19 | +import com.linkedin.coral.spark.transformers.TransportUDFTransformer; |
19 | 20 |
|
20 | | -import static com.linkedin.coral.spark.transformers.TransportableUDFTransformer.*; |
| 21 | +import static com.linkedin.coral.spark.transformers.TransportUDFTransformer.*; |
21 | 22 |
|
22 | 23 |
|
23 | 24 | /** |
24 | 25 | * This class extends the class of {@link org.apache.calcite.sql.util.SqlShuttle} and initialize a {@link com.linkedin.coral.common.transformers.SqlCallTransformers} |
25 | 26 | * which containing a list of {@link com.linkedin.coral.common.transformers.SqlCallTransformer} to traverse the hierarchy of a {@link org.apache.calcite.sql.SqlCall} |
26 | 27 | * and converts the functions from Coral operator to Spark operator if it is required |
| 28 | + * |
| 29 | + * In this converter, we need to apply {@link TransportUDFTransformer} before {@link FallBackToLinkedInHiveUDFTransformer} |
| 30 | + * because we should try to transform a UDF to an equivalent Transport UDF before falling back to LinkedIn Hive UDF. |
27 | 31 | */ |
28 | 32 | public class CoralToSparkSqlCallConverter extends SqlShuttle { |
29 | 33 | private final SqlCallTransformers sqlCallTransformers; |
30 | 34 |
|
31 | 35 | public CoralToSparkSqlCallConverter(Set<SparkUDFInfo> sparkUDFInfos) { |
32 | 36 | this.sqlCallTransformers = SqlCallTransformers.of( |
33 | | - // Transportable UDFs |
34 | | - new TransportableUDFTransformer("com.linkedin.dali.udf.date.hive.DateFormatToEpoch", |
| 37 | + // Transport UDFs |
| 38 | + new TransportUDFTransformer("com.linkedin.dali.udf.date.hive.DateFormatToEpoch", |
35 | 39 | "com.linkedin.stdudfs.daliudfs.spark.DateFormatToEpoch", DALI_UDFS_IVY_URL_SPARK_2_11, |
36 | 40 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
37 | 41 |
|
38 | | - new TransportableUDFTransformer("com.linkedin.dali.udf.date.hive.EpochToDateFormat", |
| 42 | + new TransportUDFTransformer("com.linkedin.dali.udf.date.hive.EpochToDateFormat", |
39 | 43 | "com.linkedin.stdudfs.daliudfs.spark.EpochToDateFormat", DALI_UDFS_IVY_URL_SPARK_2_11, |
40 | 44 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
41 | 45 |
|
42 | | - new TransportableUDFTransformer("com.linkedin.dali.udf.date.hive.EpochToEpochMilliseconds", |
| 46 | + new TransportUDFTransformer("com.linkedin.dali.udf.date.hive.EpochToEpochMilliseconds", |
43 | 47 | "com.linkedin.stdudfs.daliudfs.spark.EpochToEpochMilliseconds", DALI_UDFS_IVY_URL_SPARK_2_11, |
44 | 48 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
45 | 49 |
|
46 | | - new TransportableUDFTransformer("com.linkedin.dali.udf.isguestmemberid.hive.IsGuestMemberId", |
| 50 | + new TransportUDFTransformer("com.linkedin.dali.udf.isguestmemberid.hive.IsGuestMemberId", |
47 | 51 | "com.linkedin.stdudfs.daliudfs.spark.IsGuestMemberId", DALI_UDFS_IVY_URL_SPARK_2_11, |
48 | 52 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
49 | 53 |
|
50 | | - new TransportableUDFTransformer("com.linkedin.dali.udf.istestmemberid.hive.IsTestMemberId", |
| 54 | + new TransportUDFTransformer("com.linkedin.dali.udf.istestmemberid.hive.IsTestMemberId", |
51 | 55 | "com.linkedin.stdudfs.daliudfs.spark.IsTestMemberId", DALI_UDFS_IVY_URL_SPARK_2_11, |
52 | 56 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
53 | 57 |
|
54 | | - new TransportableUDFTransformer("com.linkedin.dali.udf.maplookup.hive.MapLookup", |
| 58 | + new TransportUDFTransformer("com.linkedin.dali.udf.maplookup.hive.MapLookup", |
55 | 59 | "com.linkedin.stdudfs.daliudfs.spark.MapLookup", DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12, |
56 | 60 | sparkUDFInfos), |
57 | 61 |
|
58 | | - new TransportableUDFTransformer("com.linkedin.dali.udf.sanitize.hive.Sanitize", |
| 62 | + new TransportUDFTransformer("com.linkedin.dali.udf.sanitize.hive.Sanitize", |
59 | 63 | "com.linkedin.stdudfs.daliudfs.spark.Sanitize", DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12, |
60 | 64 | sparkUDFInfos), |
61 | 65 |
|
62 | | - new TransportableUDFTransformer("com.linkedin.dali.udf.watbotcrawlerlookup.hive.WATBotCrawlerLookup", |
| 66 | + new TransportUDFTransformer("com.linkedin.dali.udf.watbotcrawlerlookup.hive.WATBotCrawlerLookup", |
63 | 67 | "com.linkedin.stdudfs.daliudfs.spark.WatBotCrawlerLookup", DALI_UDFS_IVY_URL_SPARK_2_11, |
64 | 68 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
65 | 69 |
|
66 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.DateFormatToEpoch", |
| 70 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.DateFormatToEpoch", |
67 | 71 | "com.linkedin.stdudfs.daliudfs.spark.DateFormatToEpoch", DALI_UDFS_IVY_URL_SPARK_2_11, |
68 | 72 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
69 | 73 |
|
70 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.EpochToDateFormat", |
| 74 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.EpochToDateFormat", |
71 | 75 | "com.linkedin.stdudfs.daliudfs.spark.EpochToDateFormat", DALI_UDFS_IVY_URL_SPARK_2_11, |
72 | 76 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
73 | 77 |
|
74 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.EpochToEpochMilliseconds", |
| 78 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.EpochToEpochMilliseconds", |
75 | 79 | "com.linkedin.stdudfs.daliudfs.spark.EpochToEpochMilliseconds", DALI_UDFS_IVY_URL_SPARK_2_11, |
76 | 80 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
77 | 81 |
|
78 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.GetProfileSections", |
| 82 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.GetProfileSections", |
79 | 83 | "com.linkedin.stdudfs.daliudfs.spark.GetProfileSections", DALI_UDFS_IVY_URL_SPARK_2_11, |
80 | 84 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
81 | 85 |
|
82 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.stringudfs.hive.InitCap", |
| 86 | + new TransportUDFTransformer("com.linkedin.stdudfs.stringudfs.hive.InitCap", |
83 | 87 | "com.linkedin.stdudfs.stringudfs.spark.InitCap", |
84 | 88 | "ivy://com.linkedin.standard-udfs-common-sql-udfs:standard-udfs-string-udfs:1.0.1?classifier=spark_2.11", |
85 | 89 | "ivy://com.linkedin.standard-udfs-common-sql-udfs:standard-udfs-string-udfs:1.0.1?classifier=spark_2.12", |
86 | 90 | sparkUDFInfos), |
87 | 91 |
|
88 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.IsGuestMemberId", |
| 92 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.IsGuestMemberId", |
89 | 93 | "com.linkedin.stdudfs.daliudfs.spark.IsGuestMemberId", DALI_UDFS_IVY_URL_SPARK_2_11, |
90 | 94 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
91 | 95 |
|
92 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.IsTestMemberId", |
| 96 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.IsTestMemberId", |
93 | 97 | "com.linkedin.stdudfs.daliudfs.spark.IsTestMemberId", DALI_UDFS_IVY_URL_SPARK_2_11, |
94 | 98 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
95 | 99 |
|
96 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.MapLookup", |
| 100 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.MapLookup", |
97 | 101 | "com.linkedin.stdudfs.daliudfs.spark.MapLookup", DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12, |
98 | 102 | sparkUDFInfos), |
99 | 103 |
|
100 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.PortalLookup", |
| 104 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.PortalLookup", |
101 | 105 | "com.linkedin.stdudfs.daliudfs.spark.PortalLookup", DALI_UDFS_IVY_URL_SPARK_2_11, |
102 | 106 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
103 | 107 |
|
104 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.Sanitize", |
| 108 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.Sanitize", |
105 | 109 | "com.linkedin.stdudfs.daliudfs.spark.Sanitize", DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12, |
106 | 110 | sparkUDFInfos), |
107 | 111 |
|
108 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.userinterfacelookup.hive.UserInterfaceLookup", |
| 112 | + new TransportUDFTransformer("com.linkedin.stdudfs.userinterfacelookup.hive.UserInterfaceLookup", |
109 | 113 | "com.linkedin.stdudfs.userinterfacelookup.spark.UserInterfaceLookup", |
110 | 114 | "ivy://com.linkedin.standard-udf-userinterfacelookup:userinterfacelookup-std-udf:0.0.27?classifier=spark_2.11", |
111 | 115 | "ivy://com.linkedin.standard-udf-userinterfacelookup:userinterfacelookup-std-udf:0.0.27?classifier=spark_2.12", |
112 | 116 | sparkUDFInfos), |
113 | 117 |
|
114 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.WatBotCrawlerLookup", |
| 118 | + new TransportUDFTransformer("com.linkedin.stdudfs.daliudfs.hive.WatBotCrawlerLookup", |
115 | 119 | "com.linkedin.stdudfs.daliudfs.spark.WatBotCrawlerLookup", DALI_UDFS_IVY_URL_SPARK_2_11, |
116 | 120 | DALI_UDFS_IVY_URL_SPARK_2_12, sparkUDFInfos), |
117 | 121 |
|
118 | | - new TransportableUDFTransformer("com.linkedin.jemslookup.udf.hive.JemsLookup", |
| 122 | + new TransportUDFTransformer("com.linkedin.jemslookup.udf.hive.JemsLookup", |
119 | 123 | "com.linkedin.jemslookup.udf.spark.JemsLookup", |
120 | 124 | "ivy://com.linkedin.jobs-udf:jems-udfs:2.1.7?classifier=spark_2.11", |
121 | 125 | "ivy://com.linkedin.jobs-udf:jems-udfs:2.1.7?classifier=spark_2.12", sparkUDFInfos), |
122 | 126 |
|
123 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.parsing.hive.UserAgentParser", |
| 127 | + new TransportUDFTransformer("com.linkedin.stdudfs.parsing.hive.UserAgentParser", |
124 | 128 | "com.linkedin.stdudfs.parsing.spark.UserAgentParser", |
125 | 129 | "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:3.0.3?classifier=spark_2.11", |
126 | 130 | "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:3.0.3?classifier=spark_2.12", sparkUDFInfos), |
127 | 131 |
|
128 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.parsing.hive.Ip2Str", |
| 132 | + new TransportUDFTransformer("com.linkedin.stdudfs.parsing.hive.Ip2Str", |
129 | 133 | "com.linkedin.stdudfs.parsing.spark.Ip2Str", |
130 | 134 | "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:3.0.3?classifier=spark_2.11", |
131 | 135 | "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:3.0.3?classifier=spark_2.12", sparkUDFInfos), |
132 | 136 |
|
133 | | - new TransportableUDFTransformer("com.linkedin.stdudfs.lookup.hive.BrowserLookup", |
| 137 | + new TransportUDFTransformer("com.linkedin.stdudfs.lookup.hive.BrowserLookup", |
134 | 138 | "com.linkedin.stdudfs.lookup.spark.BrowserLookup", |
135 | 139 | "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:3.0.3?classifier=spark_2.11", |
136 | 140 | "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:3.0.3?classifier=spark_2.12", sparkUDFInfos), |
137 | 141 |
|
138 | | - new TransportableUDFTransformer("com.linkedin.jobs.udf.hive.ConvertIndustryCode", |
| 142 | + new TransportUDFTransformer("com.linkedin.jobs.udf.hive.ConvertIndustryCode", |
139 | 143 | "com.linkedin.jobs.udf.spark.ConvertIndustryCode", |
140 | 144 | "ivy://com.linkedin.jobs-udf:jobs-udfs:2.1.6?classifier=spark_2.11", |
141 | 145 | "ivy://com.linkedin.jobs-udf:jobs-udfs:2.1.6?classifier=spark_2.12", sparkUDFInfos), |
142 | 146 |
|
143 | | - // Transportable UDF for unit test |
144 | | - new TransportableUDFTransformer("com.linkedin.coral.hive.hive2rel.CoralTestUDF", |
| 147 | + // Transport UDF for unit test |
| 148 | + new TransportUDFTransformer("com.linkedin.coral.hive.hive2rel.CoralTestUDF", |
145 | 149 | "com.linkedin.coral.spark.CoralTestUDF", |
146 | 150 | "ivy://com.linkedin.coral.spark.CoralTestUDF?classifier=spark_2.11", null, sparkUDFInfos), |
147 | 151 |
|
148 | 152 | // Built-in operator |
149 | | - new OperatorRenameSqlCallTransformer("CARDINALITY", 1, "size"), |
| 153 | + new OperatorRenameSqlCallTransformer(SqlStdOperatorTable.CARDINALITY, 1, "size"), |
150 | 154 |
|
151 | 155 | // Fall back to the original Hive UDF defined in StaticHiveFunctionRegistry after failing to apply transformers above |
152 | | - new FallBackToHiveUDFTransformer(sparkUDFInfos)); |
| 156 | + new FallBackToLinkedInHiveUDFTransformer(sparkUDFInfos)); |
153 | 157 | } |
154 | 158 |
|
155 | 159 | @Override |
|
0 commit comments