Skip to content

Commit 501a608

Browse files
Add files via upload
1 parent 15cea44 commit 501a608

File tree

1 file changed

+307
-0
lines changed

1 file changed

+307
-0
lines changed

A_B Test Analytics.ipynb

+307
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# A/B Testing With Pandas"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 2,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import pandas as pd"
17+
]
18+
},
19+
{
20+
"cell_type": "markdown",
21+
"metadata": {},
22+
"source": [
23+
"## Analyzing Ad Sources"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": 6,
29+
"metadata": {},
30+
"outputs": [
31+
{
32+
"name": "stdout",
33+
"output_type": "stream",
34+
"text": [
35+
" user_id utm_source day \\\n",
36+
"0 008b7c6c-7272-471e-b90e-930d548bd8d7 google 6 - Saturday \n",
37+
"1 009abb94-5e14-4b6c-bb1c-4f4df7aa7557 facebook 7 - Sunday \n",
38+
"2 00f5d532-ed58-4570-b6d2-768df5f41aed twitter 2 - Tuesday \n",
39+
"3 011adc64-0f44-4fd9-a0bb-f1506d2ad439 google 2 - Tuesday \n",
40+
"4 012137e6-7ae7-4649-af68-205b4702169c facebook 7 - Sunday \n",
41+
"\n",
42+
" ad_click_timestamp experimental_group \n",
43+
"0 7:18 A \n",
44+
"1 NaN B \n",
45+
"2 NaN A \n",
46+
"3 NaN B \n",
47+
"4 NaN B \n"
48+
]
49+
}
50+
],
51+
"source": [
52+
"ad_clicks = pd.read_csv('ad_clicks.csv')\n",
53+
"\n",
54+
"print(ad_clicks.head())"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"## Which ad platform is getting the most views ?"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": 7,
67+
"metadata": {},
68+
"outputs": [
69+
{
70+
"name": "stdout",
71+
"output_type": "stream",
72+
"text": [
73+
" utm_source user_id\n",
74+
"0 email 255\n",
75+
"1 facebook 504\n",
76+
"2 google 680\n",
77+
"3 twitter 215\n"
78+
]
79+
}
80+
],
81+
"source": [
82+
"# Most Ad. Viewing Platform\n",
83+
"views_per_platform = ad_clicks.groupby('utm_source').user_id.count().reset_index()\n",
84+
"\n",
85+
"print(views_per_platform)"
86+
]
87+
},
88+
{
89+
"cell_type": "markdown",
90+
"metadata": {},
91+
"source": [
92+
"## Click rates for each source?"
93+
]
94+
},
95+
{
96+
"cell_type": "code",
97+
"execution_count": 9,
98+
"metadata": {},
99+
"outputs": [
100+
{
101+
"name": "stdout",
102+
"output_type": "stream",
103+
"text": [
104+
"is_click utm_source not_clicked clicked percent_clicked\n",
105+
"0 email 175 80 31.372549\n",
106+
"1 facebook 324 180 35.714286\n",
107+
"2 google 441 239 35.147059\n",
108+
"3 twitter 149 66 30.697674\n"
109+
]
110+
}
111+
],
112+
"source": [
113+
"# Percentage of People clicked from Each UTM source\n",
114+
"ad_clicks['is_click'] = ~ad_clicks.ad_click_timestamp.isnull()\n",
115+
"\n",
116+
"\n",
117+
"clicks_by_source = ad_clicks.groupby(['utm_source', 'is_click']).user_id.count().reset_index()\n",
118+
"\n",
119+
"clicks_pivot = clicks_by_source.pivot(columns = 'is_click', index = 'utm_source', values = 'user_id').reset_index()\n",
120+
"\n",
121+
"clicks_pivot = clicks_pivot.rename(columns = {False: 'not_clicked', True: 'clicked'})\n",
122+
"\n",
123+
"clicks_pivot['percent_clicked'] = (clicks_pivot.clicked / (clicks_pivot.not_clicked + clicks_pivot.clicked)) * 100\n",
124+
"\n",
125+
"print(clicks_pivot)"
126+
]
127+
},
128+
{
129+
"cell_type": "markdown",
130+
"metadata": {},
131+
"source": [
132+
"## Analyzing an A/B Test\n",
133+
" \n",
134+
" Were approximately the same number of people shown both adds? "
135+
]
136+
},
137+
{
138+
"cell_type": "code",
139+
"execution_count": 10,
140+
"metadata": {},
141+
"outputs": [
142+
{
143+
"name": "stdout",
144+
"output_type": "stream",
145+
"text": [
146+
" experimental_group user_id\n",
147+
"0 A 827\n",
148+
"1 B 827\n"
149+
]
150+
}
151+
],
152+
"source": [
153+
"# A/B Analytics \n",
154+
"\n",
155+
"AB_test_shown = ad_clicks.groupby('experimental_group').user_id.count().reset_index()\n",
156+
"\n",
157+
"print(AB_test_shown)"
158+
]
159+
},
160+
{
161+
"cell_type": "markdown",
162+
"metadata": {},
163+
"source": [
164+
"## Total A/B test Click"
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": 11,
170+
"metadata": {},
171+
"outputs": [
172+
{
173+
"name": "stdout",
174+
"output_type": "stream",
175+
"text": [
176+
"is_click Not Clicked Clicked\n",
177+
"experimental_group \n",
178+
"A 517 310\n",
179+
"B 572 255\n"
180+
]
181+
}
182+
],
183+
"source": [
184+
"click_percentage = ad_clicks.groupby(['experimental_group', 'is_click']).user_id.count().reset_index()\n",
185+
"\n",
186+
"click_percentage_pivot = click_percentage.pivot(columns = 'is_click', index = 'experimental_group', values = 'user_id')\n",
187+
"\n",
188+
"click_percentage_pivot = click_percentage_pivot.rename(columns = {False: 'Not Clicked', True: 'Clicked'})\n",
189+
"\n",
190+
"print(click_percentage_pivot)"
191+
]
192+
},
193+
{
194+
"cell_type": "markdown",
195+
"metadata": {},
196+
"source": [
197+
"## Clicks Over time across A/B"
198+
]
199+
},
200+
{
201+
"cell_type": "markdown",
202+
"metadata": {},
203+
"source": [
204+
"### For A test:"
205+
]
206+
},
207+
{
208+
"cell_type": "code",
209+
"execution_count": 12,
210+
"metadata": {},
211+
"outputs": [
212+
{
213+
"name": "stdout",
214+
"output_type": "stream",
215+
"text": [
216+
"is_click day click_percentage %\n",
217+
"0 1 - Monday 38.053097\n",
218+
"1 2 - Tuesday 36.134454\n",
219+
"2 3 - Wednesday 30.645161\n",
220+
"3 4 - Thursday 40.517241\n",
221+
"4 5 - Friday 39.843750\n",
222+
"5 6 - Saturday 38.135593\n",
223+
"6 7 - Sunday 39.449541\n"
224+
]
225+
}
226+
],
227+
"source": [
228+
"# change of clicks over time\n",
229+
"a_click = ad_clicks[ad_clicks.experimental_group == 'A']\n",
230+
"\n",
231+
"b_click = ad_clicks[ad_clicks.experimental_group == 'B']\n",
232+
"\n",
233+
"a_click_by_day = a_click.groupby(['day','is_click']).user_id.count().reset_index()\n",
234+
"\n",
235+
"a_click_by_day_pivot = a_click_by_day.pivot(columns = 'is_click', index = 'day', values = 'user_id').reset_index()\n",
236+
"\n",
237+
"\n",
238+
"a_click_by_day_pivot[\"click_percentage %\"] = a_click_by_day_pivot[True] * 100 / (a_click_by_day_pivot[False] + a_click_by_day_pivot[True])\n",
239+
"\n",
240+
"a_percentage_per_day = a_click_by_day_pivot[['day', 'click_percentage %']]\n",
241+
"\n",
242+
"print(a_percentage_per_day)"
243+
]
244+
},
245+
{
246+
"cell_type": "markdown",
247+
"metadata": {},
248+
"source": [
249+
"### For B Test:"
250+
]
251+
},
252+
{
253+
"cell_type": "code",
254+
"execution_count": 13,
255+
"metadata": {},
256+
"outputs": [
257+
{
258+
"name": "stdout",
259+
"output_type": "stream",
260+
"text": [
261+
"is_click day click_percentage %\n",
262+
"0 1 - Monday 28.318584\n",
263+
"1 2 - Tuesday 37.815126\n",
264+
"2 3 - Wednesday 28.225806\n",
265+
"3 4 - Thursday 25.000000\n",
266+
"4 5 - Friday 29.687500\n",
267+
"5 6 - Saturday 35.593220\n",
268+
"6 7 - Sunday 31.192661\n"
269+
]
270+
}
271+
],
272+
"source": [
273+
"b_click_by_day = b_click.groupby(['day','is_click']).user_id.count().reset_index()\n",
274+
"\n",
275+
"b_click_by_day_pivot = b_click_by_day.pivot(columns = 'is_click', index = 'day', values = 'user_id').reset_index()\n",
276+
"\n",
277+
"b_click_by_day_pivot[\"click_percentage %\"] = b_click_by_day_pivot[True] * 100 / (b_click_by_day_pivot[False] + b_click_by_day_pivot[True])\n",
278+
"\n",
279+
"b_percentage_per_day = b_click_by_day_pivot[['day', 'click_percentage %']]\n",
280+
"\n",
281+
"\n",
282+
"print(b_percentage_per_day)"
283+
]
284+
}
285+
],
286+
"metadata": {
287+
"kernelspec": {
288+
"display_name": "Python 3",
289+
"language": "python",
290+
"name": "python3"
291+
},
292+
"language_info": {
293+
"codemirror_mode": {
294+
"name": "ipython",
295+
"version": 3
296+
},
297+
"file_extension": ".py",
298+
"mimetype": "text/x-python",
299+
"name": "python",
300+
"nbconvert_exporter": "python",
301+
"pygments_lexer": "ipython3",
302+
"version": "3.7.6"
303+
}
304+
},
305+
"nbformat": 4,
306+
"nbformat_minor": 4
307+
}

0 commit comments

Comments
 (0)