Skip to content

Commit 6f886b2

Browse files
committed
fixed
1 parent 6d98656 commit 6f886b2

File tree

6 files changed

+31
-27
lines changed

6 files changed

+31
-27
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package io.github.interestinglab.waterdrop.core
2+
3+
object RowConstant {
4+
val ROOT = "__root__"
5+
val TMP = "__tmp__"
6+
}

waterdrop-core/src/main/scala/io/github/interestinglab/waterdrop/filter/Date.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package io.github.interestinglab.waterdrop.filter
22

33
import com.typesafe.config.{Config, ConfigFactory}
44
import io.github.interestinglab.waterdrop.apis.BaseFilter
5+
import io.github.interestinglab.waterdrop.core.RowConstant
56
import io.github.interestinglab.waterdrop.utils.{FormatParser, StringTemplate, UnixMSParser, UnixParser}
67
import org.apache.spark.sql.functions._
78
import org.apache.spark.sql.{DataFrame, SparkSession}
@@ -26,7 +27,7 @@ class Date(var config: Config) extends BaseFilter(config) {
2627
super.prepare(spark, ssc)
2728
val defaultConfig = ConfigFactory.parseMap(
2829
Map(
29-
"source_field" -> Json.ROOT,
30+
"source_field" -> RowConstant.ROOT,
3031
"target_field" -> "datetime",
3132
"source_time_format" -> "UNIX_MS",
3233
"target_time_format" -> "yyyy/MM/dd HH:mm:ss",
@@ -59,7 +60,7 @@ class Date(var config: Config) extends BaseFilter(config) {
5960
})
6061

6162
config.getString("source_field") match {
62-
case Json.ROOT => df.withColumn(targetField, func(lit(System.currentTimeMillis().toString)))
63+
case RowConstant.ROOT => df.withColumn(targetField, func(lit(System.currentTimeMillis().toString)))
6364
case srcField: String => df.withColumn(targetField, func(col(srcField)))
6465
}
6566
}

waterdrop-core/src/main/scala/io/github/interestinglab/waterdrop/filter/Grok.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import java.util
77
import com.typesafe.config.{Config, ConfigFactory}
88
import io.github.interestinglab.waterdrop.apis.BaseFilter
99
import io.github.interestinglab.waterdrop.config.Common
10+
import io.github.interestinglab.waterdrop.core.RowConstant
1011
import io.thekraken.grok.api.{Grok => GrokLib}
1112
import org.apache.spark.sql.{DataFrame, SparkSession}
1213
import org.apache.spark.streaming.StreamingContext
@@ -18,7 +19,6 @@ class Grok(var conf: Config) extends BaseFilter(conf) {
1819

1920
val grok = GrokLib.EMPTY
2021

21-
2222
def this() = {
2323
this(ConfigFactory.empty())
2424
}
@@ -47,7 +47,7 @@ class Grok(var conf: Config) extends BaseFilter(conf) {
4747
.toString,
4848
"named_captures_only" -> true,
4949
"source_field" -> "raw_message",
50-
"target_field" -> Json.ROOT
50+
"target_field" -> RowConstant.ROOT
5151
).asJava
5252
)
5353
conf = conf.withFallback(defaultConfig)
@@ -65,13 +65,13 @@ class Grok(var conf: Config) extends BaseFilter(conf) {
6565
val grokUDF = udf((str: String) => grokMatch(str))
6666
val keys = getKeysOfPattern(conf.getString("pattern"))
6767
conf.getString("target_field") match {
68-
case Json.ROOT => {
69-
var tmpDf = df.withColumn(Json.TMP, grokUDF(col(conf.getString("source_field"))))
68+
case RowConstant.ROOT => {
69+
var tmpDf = df.withColumn(RowConstant.TMP, grokUDF(col(conf.getString("source_field"))))
7070
while (keys.hasNext) {
7171
val field = keys.next()
72-
tmpDf = tmpDf.withColumn(field, col(Json.TMP)(field))
72+
tmpDf = tmpDf.withColumn(field, col(RowConstant.TMP)(field))
7373
}
74-
tmpDf.drop(Json.TMP)
74+
tmpDf.drop(RowConstant.TMP)
7575
}
7676
case targetField => {
7777
df.withColumn(targetField, grokUDF(col(conf.getString("source_field"))))

waterdrop-core/src/main/scala/io/github/interestinglab/waterdrop/filter/Json.scala

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package io.github.interestinglab.waterdrop.filter
33
import scala.collection.JavaConversions._
44
import com.typesafe.config.{Config, ConfigFactory}
55
import io.github.interestinglab.waterdrop.apis.BaseFilter
6+
import io.github.interestinglab.waterdrop.core.RowConstant
67
import org.apache.spark.sql.{DataFrame, SparkSession}
78
import org.apache.spark.streaming.StreamingContext
89
import org.apache.spark.sql.functions._
@@ -26,7 +27,7 @@ class Json(var conf: Config) extends BaseFilter(conf) {
2627
val defaultConfig = ConfigFactory.parseMap(
2728
Map(
2829
"source_field" -> "raw_message",
29-
"target_field" -> Json.ROOT
30+
"target_field" -> RowConstant.ROOT
3031
)
3132
)
3233
conf = conf.withFallback(defaultConfig)
@@ -38,19 +39,19 @@ class Json(var conf: Config) extends BaseFilter(conf) {
3839
import spark.implicits._
3940

4041
conf.getString("target_field") match {
41-
case Json.ROOT => {
42+
case RowConstant.ROOT => {
4243

4344
val stringDataSet = df.select(srcField).as[String]
4445

4546
val newDF = srcField match {
4647
case "raw_message" => spark.read.json(stringDataSet)
4748
case s: String => {
4849
val schema = spark.read.json(stringDataSet).schema
49-
var tmpDf = df.withColumn(Json.TMP, from_json(col(s), schema))
50+
var tmpDf = df.withColumn(RowConstant.TMP, from_json(col(s), schema))
5051
schema.map { field =>
51-
tmpDf = tmpDf.withColumn(field.name, col(Json.TMP)(field.name))
52+
tmpDf = tmpDf.withColumn(field.name, col(RowConstant.TMP)(field.name))
5253
}
53-
tmpDf.drop(Json.TMP)
54+
tmpDf.drop(RowConstant.TMP)
5455
}
5556
}
5657

@@ -64,8 +65,3 @@ class Json(var conf: Config) extends BaseFilter(conf) {
6465
}
6566
}
6667
}
67-
68-
object Json {
69-
val ROOT = "__root__"
70-
val TMP = "__tmp__"
71-
}

waterdrop-core/src/main/scala/io/github/interestinglab/waterdrop/filter/Kv.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import java.util
44

55
import com.typesafe.config.{Config, ConfigFactory}
66
import io.github.interestinglab.waterdrop.apis.BaseFilter
7+
import io.github.interestinglab.waterdrop.core.RowConstant
78
import org.apache.spark.sql.{DataFrame, SparkSession}
89
import org.apache.spark.streaming.StreamingContext
910
import org.apache.spark.sql.functions.{col, udf}
@@ -33,15 +34,15 @@ class Kv(var conf: Config) extends BaseFilter(conf) {
3334
"include_fields" -> util.Arrays.asList(),
3435
"exclude_fields" -> util.Arrays.asList(),
3536
"source_field" -> "raw_message",
36-
"target_field" -> Json.ROOT
37+
"target_field" -> RowConstant.ROOT
3738
)
3839
)
3940
conf = conf.withFallback(defaultConfig)
4041
}
4142

4243
override def process(spark: SparkSession, df: DataFrame): DataFrame = {
4344
conf.getString("target_field") match {
44-
case Json.ROOT => df // TODO: implement
45+
case RowConstant.ROOT => df // TODO: implement
4546
case targetField: String => {
4647
val kvUDF = udf((s: String) => kv(s))
4748
df.withColumn(targetField, kvUDF(col(conf.getString("source_field"))))
@@ -64,8 +65,7 @@ class Kv(var conf: Config) extends BaseFilter(conf) {
6465

6566
if (includeFields.length == 0 && excludeFields.length == 0) {
6667
map += (conf.getString("field_prefix") + key -> value)
67-
}
68-
else if (includeFields.length > 0 && includeFields.contains(key)) {
68+
} else if (includeFields.length > 0 && includeFields.contains(key)) {
6969
map += (conf.getString("field_prefix") + key -> value)
7070
} else if (excludeFields.length > 0 && !excludeFields.contains(key)) {
7171
map += (conf.getString("field_prefix") + key -> value)

waterdrop-core/src/main/scala/io/github/interestinglab/waterdrop/filter/Split.scala

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package io.github.interestinglab.waterdrop.filter
22

33
import com.typesafe.config.{Config, ConfigFactory}
44
import io.github.interestinglab.waterdrop.apis.BaseFilter
5+
import io.github.interestinglab.waterdrop.core.RowConstant
56
import org.apache.spark.sql.functions._
67
import org.apache.spark.sql.{DataFrame, SparkSession}
78
import org.apache.spark.streaming.StreamingContext
@@ -28,7 +29,7 @@ class Split(var conf: Config) extends BaseFilter(conf) {
2829
Map(
2930
"delimiter" -> " ",
3031
"source_field" -> "raw_message",
31-
"target_field" -> Json.ROOT
32+
"target_field" -> RowConstant.ROOT
3233
)
3334
)
3435

@@ -42,15 +43,15 @@ class Split(var conf: Config) extends BaseFilter(conf) {
4243

4344
// https://stackoverflow.com/a/33345698/1145750
4445
conf.getString("target_field") match {
45-
case Json.ROOT => {
46+
case RowConstant.ROOT => {
4647
val func = udf((s: String) => {
4748
split(s, conf.getString("delimiter"), keys.size())
4849
})
49-
var filterDf = df.withColumn(Json.TMP, func(col(srcField)))
50+
var filterDf = df.withColumn(RowConstant.TMP, func(col(srcField)))
5051
for (i <- 0 until keys.size()) {
51-
filterDf = filterDf.withColumn(keys.get(i), col(Json.TMP)(i))
52+
filterDf = filterDf.withColumn(keys.get(i), col(RowConstant.TMP)(i))
5253
}
53-
filterDf.drop(Json.TMP)
54+
filterDf.drop(RowConstant.TMP)
5455
}
5556
case targetField: String => {
5657
val func = udf((s: String) => {

0 commit comments

Comments
 (0)