Skip to content

Commit 0becbfc

Browse files
committed
add Nina's improved plotting functions
1 parent 1689e1e commit 0becbfc

File tree

12 files changed

+92
-13
lines changed

12 files changed

+92
-13
lines changed

README.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Win Vector LLC extras for teaching data science in Python 3
22

33
pip install wvpy
4-
# or: pip install https://github.com/WinVector/wvpy/raw/master/dist/wvpy-0.1.2.tar.gz
4+
# or: pip install https://github.com/WinVector/wvpy/raw/master/dist/wvpy-0.1.3.tar.gz
55

66
~/anaconda3/bin/python3
77

clean.bash

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
pushd pkg
3+
rm -rf dist build wvpy.egg-info wvpy/__pycache__ tests/__pycache__
4+
popd
5+
pip uninstall -y wvpy
6+
7+
8+

pkg/build/lib/wvpy/util.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def plot_roc(prediction, istrue, title="Receiver operating characteristic plot")
9393
return auc
9494

9595

96-
def dual_density_plot(probs, istrue):
96+
def dual_density_plot(probs, istrue, title="Double density plot"):
9797
"""plot a dual density plot of numeric prediction probs against boolean istrue"""
9898
matplotlib.pyplot.gcf().clear()
9999
preds_on_positive = [probs[i] for i in range(len(probs)) if istrue[i]]
@@ -102,6 +102,7 @@ def dual_density_plot(probs, istrue):
102102
seaborn.kdeplot(preds_on_negative, label="negative examples", shade=True)
103103
matplotlib.pyplot.ylabel("density of examples")
104104
matplotlib.pyplot.xlabel("model score")
105+
matplotlib.pyplot.title(title)
105106
matplotlib.pyplot.show()
106107

107108

@@ -129,9 +130,11 @@ def dual_hist_plot_proba1(probs, istrue):
129130
matplotlib.pyplot.show()
130131

131132

132-
def gain_curve_plot(prediction, outcome):
133+
def gain_curve_plot(prediction, outcome, title="Gain curve plot"):
133134
"""plot cumulative outcome as a function of prediction order (descending)"""
134135
df = pandas.DataFrame({"prediction": prediction, "outcome": outcome})
136+
137+
# compute the gain curve
135138
df.sort_values(["prediction"], ascending=[False], inplace=True)
136139
df["fraction_of_observations_by_prediction"] = [
137140
(1 + i) / df.shape[0] for i in range(df.shape[0])
@@ -140,15 +143,37 @@ def gain_curve_plot(prediction, outcome):
140143
df["cumulative_outcome_fraction"] = df["cumulative_outcome"] / numpy.max(
141144
df["cumulative_outcome"]
142145
)
143-
seaborn.scatterplot(
146+
147+
# compute the wizard curve
148+
df.sort_values(["outcome"], ascending=[False], inplace=True)
149+
df["fraction_of_observations_by_wizard"] = [
150+
(1 + i) / df.shape[0] for i in range(df.shape[0])
151+
]
152+
df["cumulative_outcome_by_wizard"] = df["outcome"].cumsum()
153+
df["cumulative_outcome_fraction_wizard"] = df["cumulative_outcome_by_wizard"] / numpy.max(
154+
df["cumulative_outcome_by_wizard"]
155+
)
156+
157+
seaborn.lineplot(
144158
x="fraction_of_observations_by_prediction",
145159
y="cumulative_outcome_fraction",
146160
data=df,
147161
)
162+
163+
seaborn.lineplot(
164+
x="fraction_of_observations_by_wizard",
165+
y="cumulative_outcome_fraction_wizard",
166+
color="gray",
167+
linestyle="--",
168+
data=df,
169+
)
170+
148171
seaborn.lineplot(x=[0, 1], y=[0, 1], color="red")
172+
matplotlib.pyplot.title(title)
173+
matplotlib.pyplot.show()
149174

150175

151-
def lift_curve_plot(prediction, outcome):
176+
def lift_curve_plot(prediction, outcome, title="Lift curve plot"):
152177
"""plot lift as a function of prediction order (descending)"""
153178
df = pandas.DataFrame({"prediction": prediction, "outcome": outcome})
154179
df.sort_values(["prediction"], ascending=[False], inplace=True)
@@ -162,8 +187,10 @@ def lift_curve_plot(prediction, outcome):
162187
df["lift"] = (
163188
df["cumulative_outcome_fraction"] / df["fraction_of_observations_by_prediction"]
164189
)
165-
seaborn.scatterplot(x="fraction_of_observations_by_prediction", y="lift", data=df)
190+
seaborn.lineplot(x="fraction_of_observations_by_prediction", y="lift", data=df)
166191
matplotlib.pyplot.axhline(y=1, color="red")
192+
matplotlib.pyplot.title(title)
193+
matplotlib.pyplot.show()
167194

168195

169196
def dual_hist_plot(probs, istrue):
-4.63 KB
Binary file not shown.

pkg/dist/wvpy-0.1.2.tar.gz

-3.75 KB
Binary file not shown.
4.72 KB
Binary file not shown.

pkg/dist/wvpy-0.1.3.tar.gz

3.84 KB
Binary file not shown.

pkg/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"""
88

99
setuptools.setup(name='wvpy',
10-
version='0.1.2',
10+
version='0.1.3',
1111
author='John Mount',
1212
author_email='[email protected]',
1313
url='https://github.com/WinVector/wvpy',

pkg/wvpy.egg-info/PKG-INFO

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Metadata-Version: 2.1
22
Name: wvpy
3-
Version: 0.1.2
3+
Version: 0.1.3
44
Summary: Simple utilities for teaching Pandas and scikit learn.
55
Home-page: https://github.com/WinVector/wvpy
66
Author: John Mount

pkg/wvpy/util.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def plot_roc(prediction, istrue, title="Receiver operating characteristic plot")
9393
return auc
9494

9595

96-
def dual_density_plot(probs, istrue):
96+
def dual_density_plot(probs, istrue, title="Double density plot"):
9797
"""plot a dual density plot of numeric prediction probs against boolean istrue"""
9898
matplotlib.pyplot.gcf().clear()
9999
preds_on_positive = [probs[i] for i in range(len(probs)) if istrue[i]]
@@ -102,6 +102,7 @@ def dual_density_plot(probs, istrue):
102102
seaborn.kdeplot(preds_on_negative, label="negative examples", shade=True)
103103
matplotlib.pyplot.ylabel("density of examples")
104104
matplotlib.pyplot.xlabel("model score")
105+
matplotlib.pyplot.title(title)
105106
matplotlib.pyplot.show()
106107

107108

@@ -129,9 +130,11 @@ def dual_hist_plot_proba1(probs, istrue):
129130
matplotlib.pyplot.show()
130131

131132

132-
def gain_curve_plot(prediction, outcome):
133+
def gain_curve_plot(prediction, outcome, title="Gain curve plot"):
133134
"""plot cumulative outcome as a function of prediction order (descending)"""
134135
df = pandas.DataFrame({"prediction": prediction, "outcome": outcome})
136+
137+
# compute the gain curve
135138
df.sort_values(["prediction"], ascending=[False], inplace=True)
136139
df["fraction_of_observations_by_prediction"] = [
137140
(1 + i) / df.shape[0] for i in range(df.shape[0])
@@ -140,15 +143,37 @@ def gain_curve_plot(prediction, outcome):
140143
df["cumulative_outcome_fraction"] = df["cumulative_outcome"] / numpy.max(
141144
df["cumulative_outcome"]
142145
)
143-
seaborn.scatterplot(
146+
147+
# compute the wizard curve
148+
df.sort_values(["outcome"], ascending=[False], inplace=True)
149+
df["fraction_of_observations_by_wizard"] = [
150+
(1 + i) / df.shape[0] for i in range(df.shape[0])
151+
]
152+
df["cumulative_outcome_by_wizard"] = df["outcome"].cumsum()
153+
df["cumulative_outcome_fraction_wizard"] = df["cumulative_outcome_by_wizard"] / numpy.max(
154+
df["cumulative_outcome_by_wizard"]
155+
)
156+
157+
seaborn.lineplot(
144158
x="fraction_of_observations_by_prediction",
145159
y="cumulative_outcome_fraction",
146160
data=df,
147161
)
162+
163+
seaborn.lineplot(
164+
x="fraction_of_observations_by_wizard",
165+
y="cumulative_outcome_fraction_wizard",
166+
color="gray",
167+
linestyle="--",
168+
data=df,
169+
)
170+
148171
seaborn.lineplot(x=[0, 1], y=[0, 1], color="red")
172+
matplotlib.pyplot.title(title)
173+
matplotlib.pyplot.show()
149174

150175

151-
def lift_curve_plot(prediction, outcome):
176+
def lift_curve_plot(prediction, outcome, title="Lift curve plot"):
152177
"""plot lift as a function of prediction order (descending)"""
153178
df = pandas.DataFrame({"prediction": prediction, "outcome": outcome})
154179
df.sort_values(["prediction"], ascending=[False], inplace=True)
@@ -162,8 +187,10 @@ def lift_curve_plot(prediction, outcome):
162187
df["lift"] = (
163188
df["cumulative_outcome_fraction"] / df["fraction_of_observations_by_prediction"]
164189
)
165-
seaborn.scatterplot(x="fraction_of_observations_by_prediction", y="lift", data=df)
190+
seaborn.lineplot(x="fraction_of_observations_by_prediction", y="lift", data=df)
166191
matplotlib.pyplot.axhline(y=1, color="red")
192+
matplotlib.pyplot.title(title)
193+
matplotlib.pyplot.show()
167194

168195

169196
def dual_hist_plot(probs, istrue):

0 commit comments

Comments
 (0)