@@ -99,19 +99,20 @@ def matching_roc_area_curve(auc):
9999 q_eps = 1e-6
100100 q_low = 0
101101 q_high = 1
102- while (q_low + q_eps < q_high ):
103- q_mid = (q_low + q_high )/ 2.0
104- q_mid_area = numpy .mean (
105- 1 - (1 - (1 - eval_pts )** q_mid )** (1 / q_mid ))
102+ while q_low + q_eps < q_high :
103+ q_mid = (q_low + q_high ) / 2.0
104+ q_mid_area = numpy .mean (1 - (1 - (1 - eval_pts ) ** q_mid ) ** (1 / q_mid ))
106105 if q_mid_area <= auc :
107106 q_high = q_mid
108107 else :
109108 q_low = q_mid
110109 q = (q_low + q_high ) / 2.0
111- return {'auc' : auc ,
112- 'q' : q ,
113- 'x' : 1 - eval_pts ,
114- 'y' : 1 - (1 - (1 - eval_pts )** q )** (1 / q )}
110+ return {
111+ "auc" : auc ,
112+ "q" : q ,
113+ "x" : 1 - eval_pts ,
114+ "y" : 1 - (1 - (1 - eval_pts ) ** q ) ** (1 / q ),
115+ }
115116
116117
117118# https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
@@ -160,7 +161,7 @@ def plot_roc(
160161 lw = 2
161162 matplotlib .pyplot .gcf ().clear ()
162163 fig1 , ax1 = matplotlib .pyplot .subplots ()
163- ax1 .set_aspect (' equal' )
164+ ax1 .set_aspect (" equal" )
164165 matplotlib .pyplot .plot (
165166 fpr ,
166167 tpr ,
@@ -172,10 +173,8 @@ def plot_roc(
172173 matplotlib .pyplot .plot ([0 , 1 ], [0 , 1 ], color = "navy" , lw = lw , linestyle = "--" )
173174 if ideal_curve is not None :
174175 matplotlib .pyplot .plot (
175- ideal_curve ['x' ],
176- ideal_curve ['y' ],
177- linestyle = '--' ,
178- color = ideal_line_color )
176+ ideal_curve ["x" ], ideal_curve ["y" ], linestyle = "--" , color = ideal_line_color
177+ )
179178 matplotlib .pyplot .xlim ([0.0 , 1.0 ])
180179 matplotlib .pyplot .ylim ([0.0 , 1.0 ])
181180 matplotlib .pyplot .xlabel ("False Positive Rate (1-Specificity)" )
@@ -186,14 +185,28 @@ def plot_roc(
186185 return auc
187186
188187
189- def dual_density_plot (probs , istrue , title = "Double density plot" , * , truth_target = True ):
188+ def dual_density_plot (
189+ probs ,
190+ istrue ,
191+ title = "Double density plot" ,
192+ * ,
193+ truth_target = True ,
194+ positive_label = "positive examples" ,
195+ negative_lable = "negative examples" ,
196+ ylable = "density of examples" ,
197+ xlabel = "model score"
198+ ):
190199 """
191200 Plot a dual density plot of numeric prediction probs against boolean istrue.
192201
193202 :param probs: vector of numeric predictions.
194203 :param istrue: truth vector
195204 :param title: tiotle of plot
196205 :param truth_target: value considerd true
206+ :param positive_label=label for positive class
207+ :param negative_lable=label for negative class
208+ :param ylable=y axis label
209+ :param xlabel=x axis label
197210 :return: None, plot produced by function call.
198211
199212 Example:
@@ -220,10 +233,10 @@ def dual_density_plot(probs, istrue, title="Double density plot", *, truth_targe
220233 preds_on_negative = [
221234 probs [i ] for i in range (len (probs )) if not istrue [i ] == truth_target
222235 ]
223- seaborn .kdeplot (preds_on_positive , label = "positive examples" , shade = True )
224- seaborn .kdeplot (preds_on_negative , label = "negative examples" , shade = True )
225- matplotlib .pyplot .ylabel ("density of examples" )
226- matplotlib .pyplot .xlabel ("model score" )
236+ seaborn .kdeplot (preds_on_positive , label = positive_label , shade = True )
237+ seaborn .kdeplot (preds_on_negative , label = negative_lable , shade = True )
238+ matplotlib .pyplot .ylabel (ylable )
239+ matplotlib .pyplot .xlabel (xlabel )
227240 matplotlib .pyplot .title (title )
228241 matplotlib .pyplot .show ()
229242
@@ -242,7 +255,15 @@ def dual_hist_plot(probs, istrue, title="Dual Histogram Plot"):
242255
243256
244257def dual_density_plot_proba1 (
245- probs , istrue , title = "Double density plot" , * , truth_target = True
258+ probs ,
259+ istrue ,
260+ title = "Double density plot" ,
261+ * ,
262+ truth_target = True ,
263+ positive_label = "positive examples" ,
264+ negative_lable = "negative examples" ,
265+ ylable = "density of examples" ,
266+ xlabel = "model score"
246267):
247268 """
248269 Plot a dual density plot of numeric prediction probs[:,1] against boolean istrue.
@@ -251,6 +272,10 @@ def dual_density_plot_proba1(
251272 :param istrue: truth target
252273 :param title: title of plot
253274 :param truth_target: value considered true
275+ :param positive_label=label for positive class
276+ :param negative_lable=label for negative class
277+ :param ylable=y axis label
278+ :param xlabel=x axis label
254279 :return: None, plot produced by call.
255280 """
256281 istrue = [v for v in istrue ]
@@ -261,10 +286,10 @@ def dual_density_plot_proba1(
261286 preds_on_negative = [
262287 probs [i , 1 ] for i in range (len (probs )) if not istrue [i ] == truth_target
263288 ]
264- seaborn .kdeplot (preds_on_positive , label = "positive examples" , shade = True )
265- seaborn .kdeplot (preds_on_negative , label = "negative examples" , shade = True )
266- matplotlib .pyplot .ylabel ("density of examples" )
267- matplotlib .pyplot .xlabel ("model score" )
289+ seaborn .kdeplot (preds_on_positive , label = positive_label , shade = True )
290+ seaborn .kdeplot (preds_on_negative , label = negative_lable , shade = True )
291+ matplotlib .pyplot .ylabel (ylable )
292+ matplotlib .pyplot .xlabel (xlabel )
268293 matplotlib .pyplot .title (title )
269294 matplotlib .pyplot .show ()
270295
@@ -471,12 +496,14 @@ def threshold_statistics(
471496 # basic cumulative facts
472497 sorted_frame ["count" ] = sorted_frame ["one" ].cumsum () # predicted true so far
473498 sorted_frame ["fraction" ] = sorted_frame ["count" ] / max (1 , sorted_frame ["one" ].sum ())
474- sorted_frame ["precision" ] = sorted_frame ["truth" ].cumsum () / sorted_frame ["count" ].clip (lower = 1 )
475- sorted_frame ["true_positive_rate" ] = (
476- sorted_frame ["truth" ].cumsum () / max (1 , sorted_frame ["truth" ].sum ())
499+ sorted_frame ["precision" ] = sorted_frame ["truth" ].cumsum () / sorted_frame [
500+ "count"
501+ ].clip (lower = 1 )
502+ sorted_frame ["true_positive_rate" ] = sorted_frame ["truth" ].cumsum () / max (
503+ 1 , sorted_frame ["truth" ].sum ()
477504 )
478- sorted_frame ["false_positive_rate" ] = (
479- sorted_frame [ "notY" ]. cumsum () / max ( 1 , sorted_frame ["notY" ].sum () )
505+ sorted_frame ["false_positive_rate" ] = sorted_frame [ "notY" ]. cumsum () / max (
506+ 1 , sorted_frame ["notY" ].sum ()
480507 )
481508 sorted_frame ["true_negative_rate" ] = (
482509 sorted_frame ["notY" ].sum () - sorted_frame ["notY" ].cumsum ()
@@ -486,7 +513,7 @@ def threshold_statistics(
486513 ) / max (1 , sorted_frame ["truth" ].sum ())
487514
488515 # approximate cdf work
489- sorted_frame [' cdf' ] = 1 - sorted_frame [' fraction' ]
516+ sorted_frame [" cdf" ] = 1 - sorted_frame [" fraction" ]
490517
491518 # derived facts and synonyms
492519 sorted_frame ["recall" ] = sorted_frame ["true_positive_rate" ]
0 commit comments