Skip to content

Commit 6a2f0e9

Browse files
committed
de-dup threshold plot rows
1 parent 10e102e commit 6a2f0e9

File tree

10 files changed

+591
-615
lines changed

10 files changed

+591
-615
lines changed

README.ipynb

Lines changed: 27 additions & 37 deletions
Large diffs are not rendered by default.

coverage.txt

Lines changed: 5 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -18,47 +18,13 @@ tests/test_stats1.py . [ 87%]
1818
tests/test_threshold_stats.py . [ 93%]
1919
tests/test_types_in_frame.py . [100%]
2020

21-
=============================== warnings summary ===============================
22-
tests/test_plots.py::test_graphs
23-
/Users/johnmount/Documents/work/wvu/pkg/wvu/util.py:333: FutureWarning:
24-
25-
`shade` is now deprecated in favor of `fill`; setting `fill=True`.
26-
This will become an error in seaborn v0.14.0; please update your code.
27-
28-
seaborn.kdeplot(preds_on_positive, label=positive_label, shade=True)
29-
30-
tests/test_plots.py::test_graphs
31-
/Users/johnmount/Documents/work/wvu/pkg/wvu/util.py:334: FutureWarning:
32-
33-
`shade` is now deprecated in favor of `fill`; setting `fill=True`.
34-
This will become an error in seaborn v0.14.0; please update your code.
35-
36-
seaborn.kdeplot(preds_on_negative, label=negative_label, shade=True)
37-
38-
tests/test_plots.py::test_graphs
39-
/Users/johnmount/Documents/work/wvu/pkg/wvu/util.py:430: FutureWarning:
40-
41-
`shade` is now deprecated in favor of `fill`; setting `fill=True`.
42-
This will become an error in seaborn v0.14.0; please update your code.
43-
44-
seaborn.kdeplot(preds_on_positive, label=positive_label, shade=True)
45-
46-
tests/test_plots.py::test_graphs
47-
/Users/johnmount/Documents/work/wvu/pkg/wvu/util.py:431: FutureWarning:
48-
49-
`shade` is now deprecated in favor of `fill`; setting `fill=True`.
50-
This will become an error in seaborn v0.14.0; please update your code.
51-
52-
seaborn.kdeplot(preds_on_negative, label=negative_label, shade=True)
53-
54-
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
55-
5621
---------- coverage: platform darwin, python 3.9.15-final-0 ----------
5722
Name Stmts Miss Cover
5823
-------------------------------------
59-
wvu/__init__.py 3 0 100%
60-
wvu/util.py 321 7 98%
24+
wvu/__init__.py 4 0 100%
25+
wvu/util.py 324 7 98%
6126
-------------------------------------
62-
TOTAL 324 7 98%
27+
TOTAL 328 7 98%
28+
6329

64-
======================== 16 passed, 4 warnings in 9.37s ========================
30+
============================= 16 passed in 10.38s ==============================

pkg/build/lib/wvu/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@
44
__doc__ = """
55
This<https://github.com/WinVector/wvu> is a package of example files for teaching data science.
66
"""
7+
8+
import wvu.util as util
9+

pkg/build/lib/wvu/util.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,8 @@ def dual_density_plot(
330330
preds_on_negative = [
331331
probs[i] for i in range(len(probs)) if not istrue[i] == truth_target
332332
]
333-
seaborn.kdeplot(preds_on_positive, label=positive_label, shade=True)
334-
seaborn.kdeplot(preds_on_negative, label=negative_label, shade=True)
333+
seaborn.kdeplot(preds_on_positive, label=positive_label, fill=True)
334+
seaborn.kdeplot(preds_on_negative, label=negative_label, fill=True)
335335
matplotlib.pyplot.ylabel(ylabel)
336336
matplotlib.pyplot.xlabel(xlabel)
337337
matplotlib.pyplot.title(title)
@@ -427,8 +427,8 @@ def dual_density_plot_proba1(
427427
preds_on_negative = [
428428
probs[i, 1] for i in range(len(probs)) if not istrue[i] == truth_target
429429
]
430-
seaborn.kdeplot(preds_on_positive, label=positive_label, shade=True)
431-
seaborn.kdeplot(preds_on_negative, label=negative_label, shade=True)
430+
seaborn.kdeplot(preds_on_positive, label=positive_label, fill=True)
431+
seaborn.kdeplot(preds_on_negative, label=negative_label, fill=True)
432432
matplotlib.pyplot.ylabel(ylabel)
433433
matplotlib.pyplot.xlabel(xlabel)
434434
matplotlib.pyplot.title(title)
@@ -722,7 +722,6 @@ def threshold_statistics(
722722
sorted_frame["notY"] = 1 - sorted_frame["truth"] # falses
723723
sorted_frame["one"] = 1
724724
del sorted_frame["orig_index"]
725-
726725
# pseudo-observation to get end-case (accept nothing case)
727726
eps = 1.0e-6
728727
sorted_frame = pandas.concat(
@@ -747,7 +746,6 @@ def threshold_statistics(
747746
]
748747
)
749748
sorted_frame.reset_index(inplace=True, drop=True)
750-
751749
# basic cumulative facts
752750
sorted_frame["count"] = sorted_frame["one"].cumsum() # predicted true so far
753751
sorted_frame["fraction"] = sorted_frame["count"] / max(1, sorted_frame["one"].sum())
@@ -771,25 +769,28 @@ def threshold_statistics(
771769
+ sorted_frame["notY"].sum()
772770
- sorted_frame["notY"].cumsum() # true negative count
773771
) / sorted_frame["one"].sum()
774-
775772
# approximate cdf work
776773
sorted_frame["cdf"] = 1 - sorted_frame["fraction"]
777-
778774
# derived facts and synonyms
779775
sorted_frame["recall"] = sorted_frame["true_positive_rate"]
780776
sorted_frame["sensitivity"] = sorted_frame["recall"]
781777
sorted_frame["specificity"] = 1 - sorted_frame["false_positive_rate"]
782-
783778
# re-order for neatness
784779
sorted_frame["new_index"] = sorted_frame.index.copy()
785780
sorted_frame.sort_values(["new_index"], ascending=[False], inplace=True)
786781
sorted_frame.reset_index(inplace=True, drop=True)
787-
788782
# clean up
789783
del sorted_frame["notY"]
790784
del sorted_frame["one"]
791785
del sorted_frame["new_index"]
792786
del sorted_frame["truth"]
787+
# limit down to last version of each threshold
788+
if sorted_frame.shape[0] > 1:
789+
want_row = [True] + list(
790+
numpy.array(sorted_frame["threshold"][range(sorted_frame.shape[0]-1)])
791+
!= numpy.array(sorted_frame["threshold"][range(1, sorted_frame.shape[0])])
792+
)
793+
sorted_frame = sorted_frame.loc[want_row, :].reset_index(drop=True, inplace=False)
793794
return sorted_frame
794795

795796

@@ -874,7 +875,9 @@ def threshold_plot(
874875
pandas.DataFrame({"measure": plotvars, "value": plotvars}),
875876
control_table_keys=["measure"],
876877
record_keys=["threshold"],
877-
)
878+
strict=False,
879+
),
880+
strict=False,
878881
)
879882
prtlong = reshaper.transform(to_plot)
880883
grid = seaborn.FacetGrid(
123 Bytes
Binary file not shown.

pkg/dist/wvu-0.3.9.tar.gz

127 Bytes
Binary file not shown.

pkg/docs/wvu.html

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ <h1 class="modulename">
5858
</span><span id="L-4"><a href="#L-4"><span class="linenos">4</span></a><span class="vm">__doc__</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
5959
</span><span id="L-5"><a href="#L-5"><span class="linenos">5</span></a><span class="s2">This&lt;https://github.com/WinVector/wvu&gt; is a package of example files for teaching data science.</span>
6060
</span><span id="L-6"><a href="#L-6"><span class="linenos">6</span></a><span class="s2">&quot;&quot;&quot;</span>
61+
</span><span id="L-7"><a href="#L-7"><span class="linenos">7</span></a>
62+
</span><span id="L-8"><a href="#L-8"><span class="linenos">8</span></a><span class="kn">import</span> <span class="nn">wvu.util</span> <span class="k">as</span> <span class="nn">util</span>
6163
</span></pre></div>
6264

6365

0 commit comments

Comments
 (0)