@@ -419,7 +419,7 @@ def calculate_fisher_information_hist1d(
419419 theta ,
420420 luminosity ,
421421 observable ,
422- nbins ,
422+ bins ,
423423 histrange = None ,
424424 cuts = None ,
425425 efficiency_functions = None ,
@@ -441,12 +441,14 @@ def calculate_fisher_information_hist1d(
441441 Expression for the observable to be histogrammed. The str will be parsed by Python's `eval()` function
442442 and can use the names of the observables in the MadMiner files.
443443
444- nbins : int
445- Number of bins in the histogram, excluding overflow bins.
444+ bins : int or ndarray
445+ If int: number of bins in the histogram, excluding overflow bins. Otherwise, defines the bin boundaries
446+ (excluding overflow bins).
446447
447448 histrange : tuple of float or None, optional
448449 Minimum and maximum value of the histogram in the form `(min, max)`. Overflow bins are always added. If
449- None, variable-width bins with equal cross section are constructed automatically. Default value: None.
450+ None and bins is an int, variable-width bins with equal cross section are constructed automatically.
451+ Default value: None.
450452
451453 cuts : None or list of str, optional
452454 Cuts. Each entry is a parseable Python expression that returns a bool (True if the event should pass a cut,
@@ -478,19 +480,11 @@ def calculate_fisher_information_hist1d(
478480 cuts = []
479481 if efficiency_functions is None :
480482 efficiency_functions = []
481- include_nuisance_parameters = include_nuisance_parameters and (self .nuisance_parameters is not None )
482483
483484 # Binning
484- dynamic_binning = histrange is None
485- if dynamic_binning :
486- n_bins_total = nbins
487- bin_boundaries = self ._calculate_dynamic_binning (
488- observable , theta , nbins , n_events_dynamic_binning , cuts , efficiency_functions
489- )
490- logger .debug ("Automatic dynamic binning: bin boundaries %s" , bin_boundaries )
491- else :
492- n_bins_total = nbins + 2
493- bin_boundaries = np .linspace (histrange [0 ], histrange [1 ], num = nbins + 1 )
485+ bin_boundaries , n_bins_total = self ._calculate_binning (
486+ bins , cuts , efficiency_functions , histrange , n_events_dynamic_binning , observable , theta
487+ )
494488
495489 # Loop over batches
496490 weights_benchmarks = np .zeros ((n_bins_total , self .n_benchmarks ))
@@ -512,17 +506,20 @@ def calculate_fisher_information_hist1d(
512506 histo_observables = np .asarray ([self ._eval_observable (obs_event , observable ) for obs_event in observations ])
513507
514508 # Find bins
515- bins = np .searchsorted (bin_boundaries , histo_observables )
516- assert ((0 <= bins ) & (bins < n_bins_total )).all (), "Wrong bin {}" .format (bins )
509+ i_bins = np .searchsorted (bin_boundaries , histo_observables )
510+ assert ((0 <= i_bins ) & (i_bins < n_bins_total )).all (), "Wrong bin {}" .format (i_bins )
517511
518512 # Add up
519513 for i in range (n_bins_total ):
520- if len (weights [bins == i ]) > 0 :
521- weights_benchmarks [i ] += np .sum (weights [bins == i ], axis = 0 )
522- weights_squared_benchmarks [i ] += np .sum (weights [bins == i ] ** 2 , axis = 0 )
514+ if len (weights [i_bins == i ]) > 0 :
515+ weights_benchmarks [i ] += np .sum (weights [i_bins == i ], axis = 0 )
516+ weights_squared_benchmarks [i ] += np .sum (weights [i_bins == i ] ** 2 , axis = 0 )
523517
524518 weights_benchmark_uncertainties = weights_squared_benchmarks ** 0.5
525519
520+ # Check cross sections per bin
521+ self ._check_binning_stats (weights_benchmarks , weights_benchmark_uncertainties , theta )
522+
526523 # Calculate Fisher information in histogram
527524 fisher_info , covariance = self ._calculate_fisher_information (
528525 theta ,
@@ -534,14 +531,55 @@ def calculate_fisher_information_hist1d(
534531 )
535532 return fisher_info , covariance
536533
534+ def _check_binning_stats (
535+ self , weights_benchmarks , weights_benchmark_uncertainties , theta , report = 5 , n_bins_first_axis = None
536+ ):
537+ theta_matrix = self ._get_theta_benchmark_matrix (theta , zero_pad = False ) # (n_benchmarks_phys,)
538+ sigma = mdot (theta_matrix , weights_benchmarks ) # Shape (n_bins,)
539+ sigma_uncertainties = mdot (theta_matrix , weights_benchmark_uncertainties ) # Shape (n_bins,)
540+ rel_uncertainties = sigma_uncertainties / np .maximum (sigma , 1.0e-12 )
541+
542+ order = np .argsort (rel_uncertainties )[::- 1 ]
543+
544+ logger .info ("Bins with largest statistical uncertainties on rates:" )
545+ for i_bin in order [:report ]:
546+ bin_nd = i_bin + 1
547+ if n_bins_first_axis is not None :
548+ bin_nd = (i_bin // n_bins_first_axis + 1 , i_bin % n_bins_first_axis + 1 )
549+ logger .info (
550+ " Bin %s: (%.5f +/- %.5f) fb (%.0f %%)" ,
551+ bin_nd ,
552+ 1000.0 * sigma [i_bin ],
553+ 1000.0 * sigma_uncertainties [i_bin ],
554+ 100.0 * rel_uncertainties [i_bin ],
555+ )
556+
557+ def _calculate_binning (
558+ self , bins , cuts , efficiency_functions , histrange , n_events_dynamic_binning , observable , theta
559+ ):
560+ dynamic_binning = histrange is None and isinstance (bins , int )
561+ if dynamic_binning :
562+ n_bins_total = bins
563+ bin_boundaries = self ._calculate_dynamic_binning (
564+ observable , theta , bins , n_events_dynamic_binning , cuts , efficiency_functions
565+ )
566+ logger .debug ("Automatic dynamic binning: bin boundaries %s" , bin_boundaries )
567+ elif isinstance (bins , int ):
568+ n_bins_total = bins + 2
569+ bin_boundaries = np .linspace (histrange [0 ], histrange [1 ], num = bins + 1 )
570+ else :
571+ bin_boundaries = bins
572+ n_bins_total = len (bins ) + 1
573+ return bin_boundaries , n_bins_total
574+
537575 def calculate_fisher_information_hist2d (
538576 self ,
539577 theta ,
540578 luminosity ,
541579 observable1 ,
542- nbins1 ,
580+ bins1 ,
543581 observable2 ,
544- nbins2 ,
582+ bins2 ,
545583 histrange1 = None ,
546584 histrange2 = None ,
547585 cuts = None ,
@@ -565,15 +603,17 @@ def calculate_fisher_information_hist2d(
565603 Expression for the first observable to be histogrammed. The str will be parsed by Python's `eval()` function
566604 and can use the names of the observables in the MadMiner files.
567605
568- nbins1 : int
569- Number of bins along the first axis in the histogram, excluding overflow bins.
606+ bins1 : int or ndarray
607+ If int: number of bins along the first axis in the histogram in the histogram, excluding overflow bins.
608+ Otherwise, defines the bin boundaries along the first axis in the histogram (excluding overflow bins).
570609
571610 observable2 : str
572611 Expression for the first observable to be histogrammed. The str will be parsed by Python's `eval()` function
573612 and can use the names of the observables in the MadMiner files.
574613
575- nbins2 : int
576- Number of bins along the first axis in the histogram, excluding overflow bins.
614+ bins2 : int or ndarray
615+ If int: number of bins along the second axis in the histogram in the histogram, excluding overflow bins.
616+ Otherwise, defines the bin boundaries along the second axis in the histogram (excluding overflow bins).
577617
578618 histrange1 : tuple of float or None, optional
579619 Minimum and maximum value of the first axis of the histogram in the form `(min, max)`. Overflow bins are
@@ -617,27 +657,12 @@ def calculate_fisher_information_hist2d(
617657 efficiency_functions = []
618658
619659 # Binning
620- dynamic_binning1 = histrange1 is None
621- if dynamic_binning1 :
622- n_bins1_total = nbins1
623- bin1_boundaries = self ._calculate_dynamic_binning (
624- observable1 , theta , nbins1 , n_events_dynamic_binning , cuts , efficiency_functions
625- )
626- logger .debug ("Automatic dynamic binning for observable 1: bin boundaries %s" , bin1_boundaries )
627- else :
628- n_bins1_total = nbins1 + 2
629- bin1_boundaries = np .linspace (histrange1 [0 ], histrange1 [1 ], num = nbins1 + 1 )
630-
631- dynamic_binning2 = histrange2 is None
632- if dynamic_binning2 :
633- n_bins2_total = nbins2
634- bin2_boundaries = self ._calculate_dynamic_binning (
635- observable2 , theta , nbins2 , n_events_dynamic_binning , cuts , efficiency_functions
636- )
637- logger .debug ("Automatic dynamic binning for observable 2: bin boundaries %s" , bin2_boundaries )
638- else :
639- n_bins2_total = nbins1 + 2
640- bin2_boundaries = np .linspace (histrange2 [0 ], histrange2 [1 ], num = nbins2 + 1 )
660+ bin1_boundaries , n_bins1_total = self ._calculate_binning (
661+ bins1 , cuts , efficiency_functions , histrange1 , n_events_dynamic_binning , observable1 , theta
662+ )
663+ bin2_boundaries , n_bins2_total = self ._calculate_binning (
664+ bins2 , cuts , efficiency_functions , histrange2 , n_events_dynamic_binning , observable2 , theta
665+ )
641666
642667 # Loop over batches
643668 weights_benchmarks = np .zeros ((n_bins1_total , n_bins2_total , self .n_benchmarks ))
@@ -664,25 +689,31 @@ def calculate_fisher_information_hist2d(
664689 )
665690
666691 # Find bins
667- bins1 = np .searchsorted (bin1_boundaries , histo1_observables )
668- bins2 = np .searchsorted (bin2_boundaries , histo2_observables )
692+ i_bins1 = np .searchsorted (bin1_boundaries , histo1_observables )
693+ i_bins2 = np .searchsorted (bin2_boundaries , histo2_observables )
669694
670- assert ((0 <= bins1 ) & (bins1 < n_bins1_total )).all (), "Wrong bin {}" .format (bins1 )
671- assert ((0 <= bins1 ) & (bins1 < n_bins1_total )).all (), "Wrong bin {}" .format (bins1 )
695+ assert ((0 <= i_bins1 ) & (i_bins1 < n_bins1_total )).all (), "Wrong bin {}" .format (i_bins1 )
696+ assert ((0 <= i_bins2 ) & (i_bins2 < n_bins1_total )).all (), "Wrong bin {}" .format (i_bins2 )
672697
673698 # Add up
674699 for i in range (n_bins1_total ):
675700 for j in range (n_bins2_total ):
676- if len (weights [(bins1 == i ) & (bins2 == j )]) > 0 :
677- weights_benchmarks [i , j ] += np .sum (weights [(bins1 == i ) & (bins2 == j )], axis = 0 )
678- weights_squared_benchmarks [i , j ] += np .sum (weights [(bins1 == i ) & (bins2 == j )] ** 2 , axis = 0 )
701+ if len (weights [(i_bins1 == i ) & (i_bins2 == j )]) > 0 :
702+ weights_benchmarks [i , j ] += np .sum (weights [(i_bins1 == i ) & (i_bins2 == j )], axis = 0 )
703+ weights_squared_benchmarks [i , j ] += np .sum (
704+ weights [(i_bins1 == i ) & (i_bins2 == j )] ** 2 , axis = 0
705+ )
679706
680707 weights_benchmark_uncertainties = weights_squared_benchmarks ** 0.5
681708
682709 # Calculate Fisher information in histogram
683710 weights_benchmarks = weights_benchmarks .reshape (- 1 , self .n_benchmarks )
684711 weights_benchmark_uncertainties = weights_benchmark_uncertainties .reshape (- 1 , self .n_benchmarks )
685712
713+ self ._check_binning_stats (
714+ weights_benchmarks , weights_benchmark_uncertainties , theta , n_bins_first_axis = n_bins1_total
715+ )
716+
686717 fisher_info , covariance = self ._calculate_fisher_information (
687718 theta ,
688719 weights_benchmarks ,
0 commit comments