Skip to content

Commit 827d02d

Browse files
committed
better example in README
1 parent da329f9 commit 827d02d

File tree

10 files changed

+128
-55
lines changed

10 files changed

+128
-55
lines changed

README.ipynb

Lines changed: 72 additions & 33 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ They are not replacements for the obvious methods in sklearn.
66

77

88
```python
9+
import numpy.random
10+
import pandas
911
import wvpy.util
1012

1113
wvpy.__version__
@@ -32,27 +34,14 @@ wvpy.util.mk_cross_plan(10,2)
3234

3335

3436

35-
[{'train': [0, 1, 2, 4, 7], 'test': [3, 5, 6, 8, 9]},
36-
{'train': [3, 5, 6, 8, 9], 'test': [0, 1, 2, 4, 7]}]
37+
[{'train': [2, 5, 6, 7, 8], 'test': [0, 1, 3, 4, 9]},
38+
{'train': [0, 1, 3, 4, 9], 'test': [2, 5, 6, 7, 8]}]
3739

3840

3941

4042
Plotting example
4143

4244

43-
```python
44-
import pandas
45-
```
46-
47-
48-
```python
49-
d = pandas.DataFrame({
50-
'x': [1, 2, 3, 4, 5],
51-
'y': [False, False, True, True, False]
52-
})
53-
```
54-
55-
5645
```python
5746
help(wvpy.util.plot_roc)
5847
```
@@ -86,6 +75,23 @@ help(wvpy.util.plot_roc)
8675

8776

8877

78+
```python
79+
d = pandas.concat([
80+
pandas.DataFrame({
81+
'x': numpy.random.normal(size=1000),
82+
'y': numpy.random.choice([True, False],
83+
p=(0.02, 0.98),
84+
size=1000,
85+
replace=True)}),
86+
pandas.DataFrame({
87+
'x': numpy.random.normal(size=200) + 5,
88+
'y': numpy.random.choice([True, False],
89+
size=200,
90+
replace=True)}),
91+
])
92+
```
93+
94+
8995
```python
9096
wvpy.util.plot_roc(
9197
prediction=d.x,
@@ -94,13 +100,13 @@ wvpy.util.plot_roc(
94100
```
95101

96102

97-
![png](output_8_0.png)
103+
![png](output_7_0.png)
98104

99105

100106

101107

102108

103-
0.6666666666666667
109+
0.8826929012345679
104110

105111

106112

@@ -122,7 +128,7 @@ help(wvpy.util.threshold_plot)
122128
:param threshold_range: x-axis range to plot
123129
:param plotvars: list of metrics to plot, must come from ['threshold', 'count', 'fraction', 'precision',
124130
'true_positive_rate', 'false_positive_rate', 'true_negative_rate', 'false_negative_rate',
125-
'enrichment', 'gain', 'lift', 'recall', 'sensitivity', 'specificity']
131+
'recall', 'sensitivity', 'specificity']
126132
:param title: title for plot
127133
:return: None, plot produced as a side effect
128134
@@ -140,7 +146,7 @@ help(wvpy.util.threshold_plot)
140146
d,
141147
pred_var='x',
142148
truth_var='y',
143-
plotvars=("sensitivity", "specificity", "fraction"),
149+
plotvars=("sensitivity", "specificity"),
144150
)
145151

146152

@@ -151,7 +157,24 @@ wvpy.util.threshold_plot(
151157
d,
152158
pred_var='x',
153159
truth_var='y',
154-
plotvars=("sensitivity", "specificity", "fraction"),
160+
plotvars=("sensitivity", "specificity"),
161+
title = "example plot"
162+
)
163+
```
164+
165+
166+
![png](output_9_0.png)
167+
168+
169+
170+
```python
171+
172+
wvpy.util.threshold_plot(
173+
d,
174+
pred_var='x',
175+
truth_var='y',
176+
plotvars=("precision", "recall"),
177+
title = "example plot"
155178
)
156179
```
157180

@@ -160,6 +183,11 @@ wvpy.util.threshold_plot(
160183

161184

162185

186+
```python
187+
188+
```
189+
190+
163191
```python
164192

165193
```

output_10_0.png

-5.92 KB
Loading

output_7_0.png

16.4 KB
Loading

output_8_0.png

-15.9 KB
Binary file not shown.

output_9_0.png

17.6 KB
Loading

pkg/build/lib/wvpy/util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,9 @@ def threshold_statistics(
444444
sorted_frame["truth"].sum() - sorted_frame["truth"].cumsum()
445445
) / max(1, sorted_frame["truth"].sum())
446446

447+
# approximate cdf work
448+
sorted_frame['cdf'] = 1 - sorted_frame['fraction']
449+
447450
# derived facts and synonyms
448451
sorted_frame["recall"] = sorted_frame["true_positive_rate"]
449452
sorted_frame["sensitivity"] = sorted_frame["recall"]
@@ -500,7 +503,7 @@ def threshold_plot(
500503
d,
501504
pred_var='x',
502505
truth_var='y',
503-
plotvars=("sensitivity", "specificity", "fraction"),
506+
plotvars=("sensitivity", "specificity"),
504507
)
505508
"""
506509
frame = d[[pred_var, truth_var]].copy()
25 Bytes
Binary file not shown.

pkg/dist/wvpy-0.2.2.tar.gz

27 Bytes
Binary file not shown.

pkg/wvpy/util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,9 @@ def threshold_statistics(
444444
sorted_frame["truth"].sum() - sorted_frame["truth"].cumsum()
445445
) / max(1, sorted_frame["truth"].sum())
446446

447+
# approximate cdf work
448+
sorted_frame['cdf'] = 1 - sorted_frame['fraction']
449+
447450
# derived facts and synonyms
448451
sorted_frame["recall"] = sorted_frame["true_positive_rate"]
449452
sorted_frame["sensitivity"] = sorted_frame["recall"]
@@ -500,7 +503,7 @@ def threshold_plot(
500503
d,
501504
pred_var='x',
502505
truth_var='y',
503-
plotvars=("sensitivity", "specificity", "fraction"),
506+
plotvars=("sensitivity", "specificity"),
504507
)
505508
"""
506509
frame = d[[pred_var, truth_var]].copy()

0 commit comments

Comments
 (0)