add spark configuration parameters

rao-abdul-mannan · rao-abdul-mannan · commit fb527a44e77b · 2018-05-13T14:01:46.000+05:00
diff --git a/edx/analytics/tasks/common/spark.py b/edx/analytics/tasks/common/spark.py
@@ -195,32 +195,65 @@ def get_event_log_dataframe(self, spark, *args, **kwargs):
         return dataframe
 
 
-class SparkJobTask(OverwriteOutputMixin, PySparkTask):
-    """
-    Wrapper for spark task
-    """
-
-    _spark = None
-    _spark_context = None
-    _sql_context = None
-    _hive_context = None
-    _tmp_dir = None
-
+class SparkConfigurationMixin(object):
+    """Configuration parameters for spark task."""
     driver_memory = luigi.Parameter(
         config_path={'section': 'spark', 'name': 'driver-memory'},
         description='Memory for spark driver',
         significant=False,
     )
+    driver_cores = luigi.Parameter(
+        config_path={'section': 'spark', 'name': 'driver-cores'},
+        description='Number of cores for driver',
+        significant=False,
+    )
     executor_memory = luigi.Parameter(
         config_path={'section': 'spark', 'name': 'executor-memory'},
         description='Memory for each executor',
         significant=False,
     )
     executor_cores = luigi.Parameter(
         config_path={'section': 'spark', 'name': 'executor-cores'},
-        description='No. of cores for each executor',
+        description='Number of cores for each executor',
+        significant=False,
+    )
+    num_executors = luigi.Parameter(
+        config_path={'section': 'spark', 'name': 'num-executors'},
+        description='Number of executors to launch',
+        significant=False,
+    )
+    master = luigi.Parameter(
+        config_path={'section': 'spark', 'name': 'master'},
+        description='Master url for spark job',
         significant=False,
     )
+    deploy_mode = luigi.Parameter(
+        config_path={'section': 'spark', 'name': 'deploy-mode'},
+        description='Deploy mode for driver program',
+        significant=False,
+    )
+    spark_config = luigi.Parameter(
+        config_path={'section': 'spark', 'name': 'conf'},
+        description='Spark configuration',
+        default=[]
+    )
+
+    @property
+    def conf(self):
+        return self._dict_config(self.spark_config)
+
+
+class SparkJobTask(SparkConfigurationMixin, OverwriteOutputMixin, PySparkTask):
+    """
+    Wrapper for spark task
+    """
+
+    _spark = None
+    _spark_context = None
+    _sql_context = None
+    _hive_context = None
+    _tmp_dir = None
+
     always_log_stderr = False  # log stderr if spark fails, True for verbose log
 
     def init_spark(self, sc):