This repository was archived by the owner on May 1, 2024. It is now read-only.
File tree 1 file changed +4
-0
lines changed
edx/analytics/tasks/common
1 file changed +4
-0
lines changed Original file line number Diff line number Diff line change @@ -255,9 +255,11 @@ def get_input_source(self, *args):
255
255
# Reading manifest as rdd with spark is alot faster as compared to hadoop.
256
256
# Currently, we're getting only 1 manifest file per request, so we will create a single rdd from it.
257
257
# If there are multiple manifest files, each file can be read as rdd and then union it with other manifest rdds
258
+ self .log .warn ("PYSPARK LOGGER: Reading manifest file :: {} " .format (targets [0 ].path ))
258
259
source_rdd = self ._spark .sparkContext .textFile (targets [0 ].path )
259
260
broadcast_value = self ._spark .sparkContext .broadcast (source_rdd .collect ())
260
261
else :
262
+ self .log .warn ("PYSPARK LOGGER: Reading normal targets" )
261
263
broadcast_value = self ._spark .sparkContext .broadcast ([target .path for target in targets ])
262
264
return broadcast_value
263
265
@@ -298,6 +300,8 @@ def init_spark(self, sc):
298
300
self ._spark_context = sc
299
301
self ._spark = SparkSession .builder .getOrCreate ()
300
302
self ._hive_context = HiveContext (sc )
303
+ log4jLogger = sc ._jvm .org .apache .log4j # using spark logger
304
+ self .log = log4jLogger .LogManager .getLogger (__name__ )
301
305
302
306
@property
303
307
def conf (self ):
You can’t perform that action at this time.
0 commit comments