Skip to content

Commit 208081e

Browse files
committed
added all the inclass exercises and solutions
1 parent f7e00a3 commit 208081e

File tree

14 files changed

+6347
-0
lines changed

14 files changed

+6347
-0
lines changed
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 2,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"from pyspark import SparkContext\n",
12+
"sc = SparkContext.getOrCreate()"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 3,
18+
"metadata": {
19+
"collapsed": false
20+
},
21+
"outputs": [],
22+
"source": [
23+
"business = sc.textFile(\"../Data/filtered_registered_business_sf.csv\")\n",
24+
"business_zip_name_pair = business.map(lambda x : x.split(\",\"))\\\n",
25+
" .map(lambda x : (x[0],x[1])).distinct()"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": 7,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [
35+
{
36+
"data": {
37+
"text/plain": [
38+
"[(u'94103', u'Razaghi A Kiakojouri A'),\n",
39+
" (u'94590', u'Julio Cesar De Moraes'),\n",
40+
" (u'94103', u'Bernies Pet Shoppe Inc'),\n",
41+
" (u'94122', u'Chan Cindy L'),\n",
42+
" (u'94124', u'Ho Kwok M')]"
43+
]
44+
},
45+
"execution_count": 7,
46+
"metadata": {},
47+
"output_type": "execute_result"
48+
}
49+
],
50+
"source": [
51+
"business_zip_name_pair.take(5)"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": 8,
57+
"metadata": {
58+
"collapsed": false
59+
},
60+
"outputs": [],
61+
"source": [
62+
"supervisor = sc.textFile(\"../Data/supervisor_sf.csv\")\n",
63+
"supervisor_zip_id_pair = supervisor.map(lambda x : x.split(\",\"))\\\n",
64+
" .map(lambda x : (x[0],x[1])).distinct()"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": 9,
70+
"metadata": {
71+
"collapsed": false
72+
},
73+
"outputs": [
74+
{
75+
"data": {
76+
"text/plain": [
77+
"[(u'94118', u'1'),\n",
78+
" (u'94134', u'9'),\n",
79+
" (u'94118', u'5'),\n",
80+
" (u'94134', u'11'),\n",
81+
" (u'94111', u'6')]"
82+
]
83+
},
84+
"execution_count": 9,
85+
"metadata": {},
86+
"output_type": "execute_result"
87+
}
88+
],
89+
"source": [
90+
"supervisor_zip_id_pair.take(5)"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": 10,
96+
"metadata": {
97+
"collapsed": false
98+
},
99+
"outputs": [],
100+
"source": [
101+
"business_without_supervisor = business_zip_name_pair.subtractByKey(supervisor_zip_id_pair)\\\n",
102+
" .values()\\\n",
103+
" .distinct()"
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": 9,
109+
"metadata": {
110+
"collapsed": false
111+
},
112+
"outputs": [
113+
{
114+
"data": {
115+
"text/plain": [
116+
"39422"
117+
]
118+
},
119+
"execution_count": 9,
120+
"metadata": {},
121+
"output_type": "execute_result"
122+
}
123+
],
124+
"source": [
125+
"business_without_supervisor.count()"
126+
]
127+
},
128+
{
129+
"cell_type": "code",
130+
"execution_count": 11,
131+
"metadata": {
132+
"collapsed": false
133+
},
134+
"outputs": [
135+
{
136+
"data": {
137+
"text/plain": [
138+
"[u'Precision Communication Serv',\n",
139+
" u'Schefer Thomas R',\n",
140+
" u'Lucid Systems',\n",
141+
" u'Jacob Abraham',\n",
142+
" u'Daniel Dela Rosa',\n",
143+
" u'Sudhir Marahatta',\n",
144+
" u'Batista Luis S',\n",
145+
" u'\"Wti',\n",
146+
" u'Boutin Jacqueline M',\n",
147+
" u'Avinesh P Singh']"
148+
]
149+
},
150+
"execution_count": 11,
151+
"metadata": {},
152+
"output_type": "execute_result"
153+
}
154+
],
155+
"source": [
156+
"business_without_supervisor.take(10)"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"metadata": {
163+
"collapsed": true
164+
},
165+
"outputs": [],
166+
"source": []
167+
}
168+
],
169+
"metadata": {
170+
"anaconda-cloud": {},
171+
"kernelspec": {
172+
"display_name": "Python [Root]",
173+
"language": "python",
174+
"name": "Python [Root]"
175+
},
176+
"language_info": {
177+
"codemirror_mode": {
178+
"name": "ipython",
179+
"version": 2
180+
},
181+
"file_extension": ".py",
182+
"mimetype": "text/x-python",
183+
"name": "python",
184+
"nbconvert_exporter": "python",
185+
"pygments_lexer": "ipython2",
186+
"version": "2.7.12"
187+
}
188+
},
189+
"nbformat": 4,
190+
"nbformat_minor": 2
191+
}

0 commit comments

Comments
 (0)