Skip to content

Commit da329f9

Browse files
committed
denominator safety in stat calculation
1 parent 8325fd1 commit da329f9

File tree

5 files changed

+13
-13
lines changed

5 files changed

+13
-13
lines changed

pkg/build/lib/wvpy/util.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -429,20 +429,20 @@ def threshold_statistics(
429429

430430
# basic cumulative facts
431431
sorted_frame["count"] = sorted_frame["one"].cumsum() # predicted true so far
432-
sorted_frame["fraction"] = sorted_frame["count"] / sorted_frame["one"].sum()
433-
sorted_frame["precision"] = sorted_frame["truth"].cumsum() / sorted_frame["count"]
432+
sorted_frame["fraction"] = sorted_frame["count"] / max(1, sorted_frame["one"].sum())
433+
sorted_frame["precision"] = sorted_frame["truth"].cumsum() / sorted_frame["count"].clip(lower=1)
434434
sorted_frame["true_positive_rate"] = (
435-
sorted_frame["truth"].cumsum() / sorted_frame["truth"].sum()
435+
sorted_frame["truth"].cumsum() / max(1, sorted_frame["truth"].sum())
436436
)
437437
sorted_frame["false_positive_rate"] = (
438-
sorted_frame["notY"].cumsum() / sorted_frame["notY"].sum()
438+
sorted_frame["notY"].cumsum() / max(1, sorted_frame["notY"].sum())
439439
)
440440
sorted_frame["true_negative_rate"] = (
441441
sorted_frame["notY"].sum() - sorted_frame["notY"].cumsum()
442-
) / sorted_frame["notY"].sum()
442+
) / max(1, sorted_frame["notY"].sum())
443443
sorted_frame["false_negative_rate"] = (
444444
sorted_frame["truth"].sum() - sorted_frame["truth"].cumsum()
445-
) / sorted_frame["truth"].sum()
445+
) / max(1, sorted_frame["truth"].sum())
446446

447447
# derived facts and synonyms
448448
sorted_frame["recall"] = sorted_frame["true_positive_rate"]
18 Bytes
Binary file not shown.

pkg/dist/wvpy-0.2.2.tar.gz

17 Bytes
Binary file not shown.

pkg/tests/test_stats1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def test_stats1():
1313
"threshold": [0.999999, 1.0, 2.0, 3.0, 4.0, 5.0, 5.000001],
1414
"count": [5, 5, 4, 3, 2, 1, 0],
1515
"fraction": [1.0, 1.0, 0.8, 0.6, 0.4, 0.2, 0.0],
16-
"precision": [0.4, 0.4, 0.5, 0.6666666666666666, 0.5, 0.0, None],
16+
"precision": [0.4, 0.4, 0.5, 0.6666666666666666, 0.5, 0.0, 0.0],
1717
"true_positive_rate": [1.0, 1.0, 1.0, 1.0, 0.5, 0.0, 0.0],
1818
"false_positive_rate": [
1919
1.0,

pkg/wvpy/util.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -429,20 +429,20 @@ def threshold_statistics(
429429

430430
# basic cumulative facts
431431
sorted_frame["count"] = sorted_frame["one"].cumsum() # predicted true so far
432-
sorted_frame["fraction"] = sorted_frame["count"] / sorted_frame["one"].sum()
433-
sorted_frame["precision"] = sorted_frame["truth"].cumsum() / sorted_frame["count"]
432+
sorted_frame["fraction"] = sorted_frame["count"] / max(1, sorted_frame["one"].sum())
433+
sorted_frame["precision"] = sorted_frame["truth"].cumsum() / sorted_frame["count"].clip(lower=1)
434434
sorted_frame["true_positive_rate"] = (
435-
sorted_frame["truth"].cumsum() / sorted_frame["truth"].sum()
435+
sorted_frame["truth"].cumsum() / max(1, sorted_frame["truth"].sum())
436436
)
437437
sorted_frame["false_positive_rate"] = (
438-
sorted_frame["notY"].cumsum() / sorted_frame["notY"].sum()
438+
sorted_frame["notY"].cumsum() / max(1, sorted_frame["notY"].sum())
439439
)
440440
sorted_frame["true_negative_rate"] = (
441441
sorted_frame["notY"].sum() - sorted_frame["notY"].cumsum()
442-
) / sorted_frame["notY"].sum()
442+
) / max(1, sorted_frame["notY"].sum())
443443
sorted_frame["false_negative_rate"] = (
444444
sorted_frame["truth"].sum() - sorted_frame["truth"].cumsum()
445-
) / sorted_frame["truth"].sum()
445+
) / max(1, sorted_frame["truth"].sum())
446446

447447
# derived facts and synonyms
448448
sorted_frame["recall"] = sorted_frame["true_positive_rate"]

0 commit comments

Comments
 (0)