forked from MeteorVE/Flask-Spark-KNN-Example
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordcount.py
32 lines (29 loc) · 1.09 KB
/
wordcount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Add Spark Python Files to Python Path
import sys
import os
SPARK_HOME = "/opt/bitnami/spark" # Set this to wherever you have compiled Spark
os.environ["SPARK_HOME"] = SPARK_HOME # Add Spark path
os.environ["SPARK_LOCAL_IP"] = "127.0.0.1" # Set Local IP
os.environ["SPARK_SUBMIT_OPTIONS"] = "--conf spark.jars.ivy=/tmp/.ivy"
sys.path.append( SPARK_HOME + "/python") # Add python files to Python Path
import pyspark
from pyspark.mllib.classification import LogisticRegressionWithSGD
from pyspark import SparkConf, SparkContext
def getSparkContext():
"""
Gets the Spark Context
"""
conf = (SparkConf()
.setMaster("local") # run on local
.setAppName("Logistic Regression") # Name of App
.set("spark.executor.memory", "1g")) # Set 1 gig of memory
sc = SparkContext(conf = conf)
return sc
sc = pyspark.SparkContext()
url= './tester.txt'
opt_file = 'output'
text_file = sc.textFile(url)
counts = text_file.flatMap(lambda line: line.split(" ")) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a + b)
counts.saveAsTextFile(opt_file)