Skip to content

Commit dcb8672

Browse files
committed
vectorize some, and add some notes
1 parent 55d90b2 commit dcb8672

File tree

7 files changed

+131
-56
lines changed

7 files changed

+131
-56
lines changed

coverage.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ wvpy/util.py 305 135 56%
2424
TOTAL 308 135 56%
2525

2626

27-
============================== 13 passed in 5.63s ==============================
27+
============================== 13 passed in 5.65s ==============================

pkg/build/lib/wvpy/util.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,8 @@ def plot_roc(
213213
})
214214
)
215215
"""
216-
# TODO: vectorize
217-
prediction = [v for v in prediction]
218-
istrue = [v == truth_target for v in istrue]
216+
prediction = numpy.asarray(prediction)
217+
istrue = numpy.asarray(istrue) == truth_target
219218
fpr, tpr, _ = sklearn.metrics.roc_curve(istrue, prediction)
220219
auc = sklearn.metrics.auc(fpr, tpr)
221220
ideal_curve = None
@@ -301,9 +300,8 @@ def dual_density_plot(
301300
istrue=d['y'],
302301
)
303302
"""
304-
# TODO: vectorize
305-
probs = [v for v in probs]
306-
istrue = [v == truth_target for v in istrue]
303+
probs = numpy.asarray(probs)
304+
istrue = numpy.asarray(istrue) == truth_target
307305
matplotlib.pyplot.gcf().clear()
308306
preds_on_positive = [
309307
probs[i] for i in range(len(probs)) if istrue[i] == truth_target
@@ -321,19 +319,34 @@ def dual_density_plot(
321319
matplotlib.pyplot.show()
322320

323321

324-
def dual_hist_plot(probs, istrue, title="Dual Histogram Plot", *, show=True):
322+
def dual_hist_plot(probs, istrue, title="Dual Histogram Plot", *, truth_target=True, show=True):
325323
"""
326324
plot a dual histogram plot of numeric prediction probs against boolean istrue
327325
328326
:param probs: vector of numeric predictions.
329327
:param istrue: truth vector
330328
:param title: title of plot
329+
:param truth_target: value to consider in class
331330
:param show: logical, if True call matplotlib.pyplot.show()
332331
:return: None
332+
333+
Example:
334+
335+
import pandas
336+
import wvpy.util
337+
338+
d = pandas.DataFrame({
339+
'x': [.1, .2, .3, .4, .5],
340+
'y': [False, False, True, True, False]
341+
})
342+
343+
wvpy.util.dual_hist_plot(
344+
probs=d['x'],
345+
istrue=d['y'],
346+
)
333347
"""
334-
# TODO: vectorize
335-
probs = [v for v in probs]
336-
istrue = [v for v in istrue]
348+
probs = numpy.asarray(probs)
349+
istrue = numpy.asarray(istrue) == truth_target
337350
matplotlib.pyplot.gcf().clear()
338351
pf = pandas.DataFrame({"prob": probs, "istrue": istrue})
339352
g = seaborn.FacetGrid(pf, row="istrue", height=4, aspect=3)
@@ -359,7 +372,7 @@ def dual_density_plot_proba1(
359372
"""
360373
Plot a dual density plot of numeric prediction probs[:,1] against boolean istrue.
361374
362-
:param probs: vector of numeric predictions
375+
:param probs: matrix of numeric predictions (as returned from predict_proba())
363376
:param istrue: truth target
364377
:param title: title of plot
365378
:param truth_target: value considered true
@@ -371,7 +384,7 @@ def dual_density_plot_proba1(
371384
:return: None
372385
"""
373386
# TODO: vectorize
374-
istrue = [v for v in istrue]
387+
istrue = numpy.asarray(istrue)
375388
matplotlib.pyplot.gcf().clear()
376389
preds_on_positive = [
377390
probs[i, 1] for i in range(len(probs)) if istrue[i] == truth_target
@@ -399,7 +412,7 @@ def dual_hist_plot_proba1(probs, istrue, *, show=True):
399412
:return: None
400413
"""
401414
# TODO: vectorize
402-
istrue = [v for v in istrue]
415+
istrue = numpy.asarray(istrue)
403416
matplotlib.pyplot.gcf().clear()
404417
pf = pandas.DataFrame(
405418
{"prob": [probs[i, 1] for i in range(probs.shape[0])], "istrue": istrue}
39 Bytes
Binary file not shown.

pkg/dist/wvpy-0.2.8.tar.gz

54 Bytes
Binary file not shown.

pkg/docs/search.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/docs/wvpy/util.html

Lines changed: 77 additions & 28 deletions
Large diffs are not rendered by default.

pkg/wvpy/util.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,8 @@ def plot_roc(
213213
})
214214
)
215215
"""
216-
# TODO: vectorize
217-
prediction = [v for v in prediction]
218-
istrue = [v == truth_target for v in istrue]
216+
prediction = numpy.asarray(prediction)
217+
istrue = numpy.asarray(istrue) == truth_target
219218
fpr, tpr, _ = sklearn.metrics.roc_curve(istrue, prediction)
220219
auc = sklearn.metrics.auc(fpr, tpr)
221220
ideal_curve = None
@@ -301,9 +300,8 @@ def dual_density_plot(
301300
istrue=d['y'],
302301
)
303302
"""
304-
# TODO: vectorize
305-
probs = [v for v in probs]
306-
istrue = [v == truth_target for v in istrue]
303+
probs = numpy.asarray(probs)
304+
istrue = numpy.asarray(istrue) == truth_target
307305
matplotlib.pyplot.gcf().clear()
308306
preds_on_positive = [
309307
probs[i] for i in range(len(probs)) if istrue[i] == truth_target
@@ -321,19 +319,34 @@ def dual_density_plot(
321319
matplotlib.pyplot.show()
322320

323321

324-
def dual_hist_plot(probs, istrue, title="Dual Histogram Plot", *, show=True):
322+
def dual_hist_plot(probs, istrue, title="Dual Histogram Plot", *, truth_target=True, show=True):
325323
"""
326324
plot a dual histogram plot of numeric prediction probs against boolean istrue
327325
328326
:param probs: vector of numeric predictions.
329327
:param istrue: truth vector
330328
:param title: title of plot
329+
:param truth_target: value to consider in class
331330
:param show: logical, if True call matplotlib.pyplot.show()
332331
:return: None
332+
333+
Example:
334+
335+
import pandas
336+
import wvpy.util
337+
338+
d = pandas.DataFrame({
339+
'x': [.1, .2, .3, .4, .5],
340+
'y': [False, False, True, True, False]
341+
})
342+
343+
wvpy.util.dual_hist_plot(
344+
probs=d['x'],
345+
istrue=d['y'],
346+
)
333347
"""
334-
# TODO: vectorize
335-
probs = [v for v in probs]
336-
istrue = [v for v in istrue]
348+
probs = numpy.asarray(probs)
349+
istrue = numpy.asarray(istrue) == truth_target
337350
matplotlib.pyplot.gcf().clear()
338351
pf = pandas.DataFrame({"prob": probs, "istrue": istrue})
339352
g = seaborn.FacetGrid(pf, row="istrue", height=4, aspect=3)
@@ -359,7 +372,7 @@ def dual_density_plot_proba1(
359372
"""
360373
Plot a dual density plot of numeric prediction probs[:,1] against boolean istrue.
361374
362-
:param probs: vector of numeric predictions
375+
:param probs: matrix of numeric predictions (as returned from predict_proba())
363376
:param istrue: truth target
364377
:param title: title of plot
365378
:param truth_target: value considered true
@@ -371,7 +384,7 @@ def dual_density_plot_proba1(
371384
:return: None
372385
"""
373386
# TODO: vectorize
374-
istrue = [v for v in istrue]
387+
istrue = numpy.asarray(istrue)
375388
matplotlib.pyplot.gcf().clear()
376389
preds_on_positive = [
377390
probs[i, 1] for i in range(len(probs)) if istrue[i] == truth_target
@@ -399,7 +412,7 @@ def dual_hist_plot_proba1(probs, istrue, *, show=True):
399412
:return: None
400413
"""
401414
# TODO: vectorize
402-
istrue = [v for v in istrue]
415+
istrue = numpy.asarray(istrue)
403416
matplotlib.pyplot.gcf().clear()
404417
pf = pandas.DataFrame(
405418
{"prob": [probs[i, 1] for i in range(probs.shape[0])], "istrue": istrue}

0 commit comments

Comments
 (0)