diff --git a/ShapeAnalysis/python/KernelFactory.py b/ShapeAnalysis/python/KernelFactory.py new file mode 100755 index 000000000..2d35a6e55 --- /dev/null +++ b/ShapeAnalysis/python/KernelFactory.py @@ -0,0 +1,430 @@ +#!/usr/bin/env python + +from LatinoAnalysis.ShapeAnalysis.ShapeFactoryMulti import ShapeFactory as SF +import array +from sklearn.utils import resample +import tempfile +import root_numpy as rnp +import numpy as np +import math +from numpy import linalg as LA + +# Common Tools & batch +from LatinoAnalysis.Tools.commonTools import * + + +# ----------------------------------------------------- KernelFactory -------------------------------------- + +class KernelFactory: + + # _____________________________________________________________________________ + def __init__(self): + + self._fileIn = None + self._skipMissingNuisance = False + + + def datasetFromTree(self, treeIn, roovars, name, reweight=""): + if reweight == "": + rooweight = ROOT.RooRealVar('weight', 'weight', -1000., 1000) + return ROOT.RooDataSet (name, name, treeIn, ROOT.RooArgSet(*(roovars + [rooweight])), "", "weight") + + origweight = ROOT.RooRealVar('weight', 'weight', -1000., 1000) + rew = ROOT.RooRealVar(reweight, reweight, -1000., 1000) + origdataset = ROOT.RooDataSet (name, name, treeIn, ROOT.RooArgSet(*(roovars + [origweight, rew])), "") + wFunc = ROOT.RooFormulaVar('reweighted', 'reweighted', 'weight*'+reweight, ROOT.RooArgList(origweight, rew)) + w = origdataset.addColumn(wFunc) + return ROOT.RooDataSet (origdataset.GetName(), origdataset.GetTitle(), origdataset, origdataset.get(), "", w.GetName()) + + + # _____________________________________________________________________________ + # _____________________________________________________________________________ + def runAlgo(self, fileTreeIn, treeName, cutName, variableName, variable, sampleName, newSampleName, reweight = "", saveEigenVariations=True, resamples=100): + treeInOrig = fileTreeIn.Get(cutName + "/" + treeName +"/"+treeName+'_'+sampleName) + if treeInOrig == None: + raise RuntimeError('Missing tree '+ cutName + "/" + treeName+"/"+treeName+'_'+sampleName) + #print variable + branches = variable['name'].split(':') + for branch in branches: + if treeInOrig.GetBranch(branch) == None: + raise RuntimeError('Missing branch ' + branch + ' in tree '+ cutName + "/" + variableName+"/"+treeName+'_'+sampleName) + if treeInOrig.GetBranch('weight') == None: + raise RuntimeError("Missing branch weight in tree "+ cutName + "/" + variableName+"/"+treeName+'_'+sampleName) + if reweight != "": + if treeInOrig.GetBranch(reweight) == None: + raise RuntimeError('Missing branch weight'+reweight+" in tree "+ cutName + "/" + variableName+"/"+treeName+'_'+sampleName) + shape = SF._makeshape(variableName, variable['range']) + if len(branches) != shape.GetDimension(): + raise RuntimeError('Mismatch between expression '+variable['name']+" and dimension of the allocated shape "+str(shape.GetDimension())) + fold = 0 + if 'fold' in variable.keys(): + fold = variable['fold'] + rho=1. + if 'rho' in variable.keys(): + rho = variable['rho'] + # select only events with weight different from 0 + selectionW = "abs(weight) > 0." + #if weightSuffix != "": + # selectionW = "abs(weight"+weightSuffix+") > 0." + treeIn = treeInOrig.CopyTree(selectionW) + + roovars = [] + vbinedges = [] + # remember y:x + for ib, branch in enumerate(reversed(branches)): + if ib == 0: + axis = shape.GetXaxis() + elif ib == 1: + axis = shape.GetYaxis() + else: + raise RuntimeError("cannot handle histo with "+len(branches)+" dimensions") + lowerbound = axis.GetXmin() if fold != 1 and fold != 3 else 0 # will not work for non positive deined observables + upperbound = axis.GetXmax() if fold != 2 and fold != 3 else axis.GetXmax()*20 + #if (ib == 0): + #lowerbound = axis.GetXmin() + #upperbound = axis.GetXmax() + binning = ROOT.RooBinning(lowerbound, upperbound) + binedges = [lowerbound] + for ibin in range(1, axis.GetNbins()): + edge = axis.GetBinUpEdge(ibin) + binning.addBoundary(edge) + binedges.append(edge) + binedges.append(upperbound) + vbinedges.append(binedges) + roovar = ROOT.RooRealVar(branch, branch, lowerbound, upperbound) + print "setting", roovar.GetName(), lowerbound, upperbound + #roovar.setBinning(binning) + roovars.append(roovar) + print vbinedges + if treeIn.GetEntries() == 0: + print "WARNING: tree"+ cutName + "/" + variableName+"/"+treeName+'_'+sampleName+" has 0 entries" + return 0 + + # generate samples cutting overflows/underflows + #atree = rnp.tree2array(treeIn) + #for ivar,var in enumerate(roovars): + # atree[var.GetName()][atree[var.GetName()]vbinedges[ivar][-1]] = vbinedges[ivar][-1] - 1 + tf = tempfile.NamedTemporaryFile() + fileout=ROOT.TFile(tf.name, "recreate") + fileout.cd() + #print atree + #treeInCut = rnp.array2tree(atree) + + ROOT.RooNumIntConfig.defaultConfig().setEpsAbs(1e-5) + ROOT.RooNumIntConfig.defaultConfig().setEpsRel(1e-5) + + dataset = self.datasetFromTree(treeIn, roovars, 'dataset', reweight) + if len(branches)==1: + pdf = ROOT.RooKeysPdf ("weighted", "weighted", roovars[0], dataset, ROOT.RooKeysPdf.MirrorBoth) + else: + pdf = ROOT.RooNDKeysPdfAnalytical("weighted", "weighted", ROOT.RooArgList(*roovars), dataset, "mad", rho) + print "BUILT!" + nvariarions = (len(vbinedges[0])-1) if len(branches)==1 else (len(vbinedges[0])-1)*(len(vbinedges[1])-1) + average=np.empty([(len(vbinedges[0])-1) if len(branches)==1 else (len(vbinedges[0])-1)*(len(vbinedges[1])-1)]) + for binX in range(len(vbinedges[0])-1): + if len(branches)==1: + roovars[0].setRange("binX"+str(binX), vbinedges[0][binX], vbinedges[0][binX+1]) + value = pdf.createIntegral(roovars[0], roovars[0], "binX"+str(bin)).getVal() + average[binX] = value + else: + for binY in range(len(vbinedges[1])-1): + rangename = "binX"+str(binX)+"binY"+str(binY) + print rangename + roovars[0].setRange(vbinedges[0][binX], vbinedges[0][binX+1]) + roovars[1].setRange(vbinedges[1][binY], vbinedges[1][binY+1]) + if binX == len(vbinedges[0])-2 or binY == len(vbinedges[1])-2: + value = pdf.analyticalIntegral(1) + print "falling back to analytical for bin",binX,binY,value + else: + value = pdf.createIntegral(ROOT.RooArgSet(*roovars), ROOT.RooArgSet()).getVal() + valueana = pdf.analyticalIntegral(1) + if abs(value-valueana)/max(value, 1e-20) > 200: + value = max(value, valueana) + print binX,binY,value,valueana + #value = pdf.analyticalIntegral(1) + average[binX*(len(vbinedges[1])-1)+binY] = value if not math.isnan(value) else 0 # protect? + print average + #average /= np.sum(average) + self._fileOut.cd ( cutName + "/" + variableName) + nominal = ROOT.TH1D("histo_"+newSampleName, "histo_"+newSampleName, nvariarions, 0., float(nvariarions)) + #rnp.array2hist(average*dataset.sumEntries(), nominal) + rnp.array2hist(average, nominal) + nominal.Write() + # in this case we resample + if saveEigenVariations: + atree = rnp.tree2array(treeIn) + #tf = tempfile.NamedTemporaryFile() + #fileout=ROOT.TFile(tf.name, "recreate") + #fileout.cd() + integrals=np.empty([resamples, (len(vbinedges[0])-1) if len(branches)==1 else (len(vbinedges[0])-1)*(len(vbinedges[1])-1)]) + for ir in range(resamples): + sumw = -999 + while sumw < 0: + atreesub = resample(atree, n_samples=treeIn.GetEntries(), random_state=1000+ir) + sumw = np.sum(atreesub['weight']) + treesub = rnp.array2tree(atreesub) + for ivar in range(len(branches)): + roovars[ivar].setRange(vbinedges[ivar][0], vbinedges[ivar][-1]) + print "resetting", roovars[ivar].GetName(), vbinedges[ivar][0], vbinedges[ivar][-1] + datasetsub = self.datasetFromTree(treesub, roovars, "datasetsub"+str(ir), reweight) + print datasetsub.sumEntries() + datasetsub.get().Print() + #ROOT.RooDataSet("datasetsub"+str(ir), "datasetsub"+str(ir), treesub, ROOT.RooArgSet(*(roovars + [rooweight])), "", "weight") + if len(branches)==1: + pdf = ROOT.RooKeysPdf ("weighted"+str(ir), "weighted"+str(ir), roovars[0], datasetsub, ROOT.RooKeysPdf.MirrorBoth) + else: + pdf = ROOT.RooNDKeysPdfAnalytical ("weighted"+str(ir), "weighted"+str(ir), ROOT.RooArgList(*roovars), datasetsub, "mad", rho) + #normalization = pdf.analyticalIntegral(1) + for binX in range(len(vbinedges[0])-1): + if len(branches)==1: + roovars[0].setRange("binX"+str(binX), vbinedges[0][binX], vbinedges[0][binX+1]) + value = pdf.createIntegral(roovars[0], roovars[0], "binX"+str(bin)).getVal() + integrals[i, binX] = value + else: + for binY in range(len(vbinedges[1])-1): + rangename = "binX"+str(binX)+"binY"+str(binY) + roovars[0].setRange(vbinedges[0][binX], vbinedges[0][binX+1]) + roovars[1].setRange(vbinedges[1][binY], vbinedges[1][binY+1]) + if binX == len(vbinedges[0])-2 or binY == len(vbinedges[1])-2: + value = pdf.analyticalIntegral(1) + else: + value = pdf.createIntegral(ROOT.RooArgSet(*roovars), ROOT.RooArgSet()).getVal() + valueana = pdf.analyticalIntegral(1) + if abs(value-valueana)/max(value, 1e-20) > 200: + value = max(value, valueana) + #value = pdf.analyticalIntegral(1) + #value = pdf.createIntegral(ROOT.RooArgSet(*roovars)).getVal() + #print "setting bin", binX*(len(vbinedges[1])-1)+binY, "to", value + integrals[ir, binX*(len(vbinedges[1])-1)+binY] = value if not math.isnan(value) else 0 # protect? + del datasetsub + del pdf + print integrals + ###normalizations = np.sum(integrals, axis=1) + #print normalizations + ###integrals = integrals[normalizations>0, :] + ###normalizations = normalizations[normalizations>0] + #print normalizations + ###integrals /= np.where(normalizations == 0, 1, normalizations)[:, np.newaxis] + #print normalizations, integrals + #print integrals.sum(axis = 1) + #average = np.mean(integrals, axis=0) + #print average + + cova_m = np.cov(np.transpose(integrals)) + try: + w, v = LA.eig(cova_m) + invv = LA.inv(v) + except: + w = np.zeros_like(average) + v = np.identity(average.shape[0]) + invv = v + + #protect against negative eigenvalues + w = w.clip(0,10000) + print v.shape + print w + print average.shape + sqrtw = np.sqrt(w) + average_rotated = np.matmul(v,average) + + # compute a number of variations equal to the number of bins, but in the orthogonal space + variations_up=np.empty([nvariarions, nvariarions]) + variations_do=np.empty([nvariarions, nvariarions]) + + for bin in range(nvariarions): + average_rotated_shifted_up = np.copy(average_rotated) + average_rotated_shifted_up[bin] = average_rotated_shifted_up[bin]+sqrtw[bin] + shifted_up = np.matmul(invv,average_rotated_shifted_up) + variations_up[bin] = shifted_up + + average_rotated_shifted_do = np.copy(average_rotated) + average_rotated_shifted_do[bin] = average_rotated_shifted_do[bin]-sqrtw[bin] + shifted_do = np.matmul(invv,average_rotated_shifted_do) + variations_do[bin] = shifted_do + #print average + #print variations_up + #print variations_do + + #self._fileOut.cd ( cutName + "/" + variableName) + #nominal = ROOT.TH1D("histo_"+newSampleName, "histo_"+newSampleName, nvariarions, 0., float(nvariarions)) + #rnp.array2hist(average*dataset.sumEntries(), nominal) + #nominal.Write() + + #if saveEigenVariations == True: + self._fileOut.cd ( cutName + "/" + variableName) + for ivar in range(nvariarions): + variedUp = ROOT.TH1D("histo_"+newSampleName+"_eigenVariation"+str(ivar)+"Up", "histo_"+newSampleName+"_eigenVariation"+str(ivar)+"Up", nvariarions, 0., float(nvariarions)) + #rnp.array2hist(variations_up[ivar]*dataset.sumEntries(), variedUp) + rnp.array2hist(variations_up[ivar], variedUp) + variedDown = ROOT.TH1D("histo_"+newSampleName+"_eigenVariation"+str(ivar)+"Down", "histo_"+newSampleName+"_eigenVariation"+str(ivar)+"Down", nvariarions, 0., float(nvariarions)) + #rnp.array2hist(variations_do[ivar]*dataset.sumEntries(), variedDown) + rnp.array2hist(variations_do[ivar], variedDown) + variedUp.Write() + variedDown.Write() + #return nominal + + + + + + def mkKernel( self, inputFile, outputFile, fileWithTree, treeName, variables, cuts, samples, structureFile, nuisances, samplesToTreat): + + print "==================" + print "==== mkKernel ====" + print "==================" + + # + # copied from mkdatacards.py + # + if os.path.isdir(inputFile): + # ONLY COMPATIBLE WITH OUTPUTS MERGED TO SAMPLE LEVEL!! + self._fileIn = {} + for sampleName in samples: + self._fileIn[sampleName] = ROOT.TFile.Open(inputFile+'/plots_%s_ALL_%s.root' % (self._tag, sampleName)) + if not self._fileIn[sampleName]: + raise RuntimeError('Input file for sample ' + sampleName + ' missing') + else: + self._fileIn = ROOT.TFile(inputFile, "READ") + + # get the tree from the file with the correct tree: + if not os.path.exists(fileWithTree): + raise RuntimeError('Input file ' + fileWithTree + ' missing') + fileTreeIn = ROOT.TFile(fileWithTree, "READ") + + + # + # the new output file with histograms + # + + self._fileOut = ROOT.TFile(outputFile, "recreate") + + # + # and prepare the structure of the output file as it is the input file + # + for cutName in cuts: + self._fileOut.mkdir ( cutName ) + for variableName, variable in variables.iteritems(): + self._fileOut.mkdir ( cutName + "/" + variableName) + + + # + # loop over cuts + # + for cutName in cuts: + + # + # prepare the signals and background list of samples + # after removing the ones not to be used in this specific phase space + # + + for sampleName in samples: + + # loop over variables + for variableName, variable in variables.iteritems(): + # skip trees + if 'name' not in variable.keys(): + continue + + self._fileOut.cd ( cutName + "/" + variableName) + + # + # check if this variable is available only for a selected list of cuts + # + if 'cuts' in variable and cutName not in variable['cuts']: + continue + + #print " variableName = ", variableName + + histo = self._getHisto(cutName, variableName, sampleName) + try: + # save "events unmodified, just change the name" + float(variable['name']) + if sampleName not in samplesToTreat: + continue + histo.SetName("histo_"+sampleName+"_KEYS") + histo.SetTitle("histo_"+sampleName+"_KEYS") + print "saving unmodified", cutName, variableName, sampleName + histo.Write() + for nuisanceName, nuisance in nuisances.iteritems(): + if nuisanceName == 'stat': + continue + histoUp = self._getHisto(cutName, variableName, sampleName, "_"+nuisance['name']+"Up") + if histoUp != None: + histoUp.SetName(str(histoUp.GetName()).replace("histo_"+sampleName, "histo_"+sampleName+"_KEYS")) + histoUp.Write() + print "saving unmodified", cutName, variableName, sampleName, nuisanceName+"Up" + histoDown = self._getHisto(cutName, variableName, sampleName, "_"+nuisance['name']+"Down") + if histoDown != None: + histoDown.SetName(str(histoDown.GetName()).replace("histo_"+sampleName, "histo_"+sampleName+"_KEYS")) + histoDown.Write() + print "saving unmodified", cutName, variableName, sampleName, nuisanceName+"Down" + continue + except ValueError: + pass + + if structureFile[sampleName]['isData'] == 1 : + pass + elif sampleName in samplesToTreat: + self.runAlgo(fileTreeIn, treeName, cutName, variableName, variable, sampleName, sampleName+"_KEYS", "", False) + + + # + # Now check the nuisances: + # Nuisances + # + + for nuisanceName, nuisance in nuisances.iteritems(): + if 'type' not in nuisance: + raise RuntimeError('Nuisance ' + nuisanceName + ' is missing the type specification') + + if nuisanceName == 'stat' or nuisance['type'] == 'rateParam' or nuisance['type'] in ['lnN', 'lnU']: + # nothing to do ... + continue + + # check if a nuisance can be skipped because not in this particular cut + if 'cuts' in nuisance and cutName not in nuisance['cuts']: + continue + + if 'samples' in nuisance and sampleName not in nuisance['samples']: + continue + ''' + if nuisance['type'] == 'shape': + if nuisance['kind'] == 'weight': + self.runAlgo(fileTreeIn, treeName, cutName, variableName, variable, sampleName, sampleName+"_KEYS_"+nuisance['name']+"Up", 'reweight_'+nuisance['name']+"Up", saveEigenVariations=False) + self.runAlgo(fileTreeIn, treeName, cutName, variableName, variable, sampleName, sampleName+"_KEYS_"+nuisance['name']+"Down", 'reweight_'+nuisance['name']+"Down", saveEigenVariations=False) + ''' + + #elif nuisance['kind'] == 'tree' + # + + self._fileOut.Close() + print "-------------------------" + print " outputFile written : " , outputFile + print "-------------------------" + + + if type(self._fileIn) is dict: + for source in self._fileIn.values(): + source.Close() + else: + self._fileIn.Close() + + # _____________________________________________________________________________ + def _getHisto(self, cutName, variableName, sampleName, suffix = None): + shapeName = '%s/%s/histo_%s' % (cutName, variableName, sampleName) + if suffix: + shapeName += suffix + + if type(self._fileIn) is dict: + # by-sample ROOT file + histo = self._fileIn[sampleName].Get(shapeName) + else: + # Merged single ROOT file + histo = self._fileIn.Get(shapeName) + + #if not histo: + #print shapeName, 'not found' + + return histo + diff --git a/ShapeAnalysis/scripts/RooNDKeysPdfAnalytical.cxx b/ShapeAnalysis/scripts/RooNDKeysPdfAnalytical.cxx new file mode 100644 index 000000000..5c0699ff8 --- /dev/null +++ b/ShapeAnalysis/scripts/RooNDKeysPdfAnalytical.cxx @@ -0,0 +1,1882 @@ +#include +using namespace std; +/* +class RooNDKeysPdfAnalyticalAnalyticalAnalyticalAnalytical : public RooNDKeysPdfAnalyticalAnalyticalAnalytical { + + public: + RooNDKeysPdfAnalyticalAnalyticalAnalyticalAnalytical( const char *name, const char *title, const RooArgList &varList, const RooDataSet &data, + TString options = "ma", Double_t rho = 1, Double_t nSigma = 3, Bool_t rotate = kTRUE, + Bool_t sortInput = kTRUE) : RooNDKeysPdfAnalyticalAnalyticalAnalytical(name, title, varList, data, options, rho, nSigma, rotate, sortInput) {}; + + virtual Int_t getAnalyticalIntegral(RooArgSet& allVars, RooArgSet& analVars, const char* rangeName) const + { + cout << "CIAOOOOO" << endl; + Int_t code=0; + if (matchArgs(allVars,analVars,RooArgSet(_varList))) { code=1; } + return code; + }; + + + ClassDef(RooNDKeysPdfAnalyticalAnalyticalAnalyticalAnalytical, 1) +}; + +*/ +/***************************************************************************** + * Project: RooFit * + * Package: RooFitModels * + * File: $Id: RooNDKeysPdf.h 44368 2012-05-30 15:38:44Z axel $ + * Authors: * + * Max Baak, CERN, mbaak@cern.ch * + * * + * Copyright (c) 2000-2005, Regents of the University of California * + * and Stanford University. All rights reserved. * + * * + * Redistribution and use in source and binary forms, * + * with or without modification, are permitted according to the terms * + * listed in LICENSE (http://roofit.sourceforge.net/license.txt) * + *****************************************************************************/ +#ifndef ROO_NDKEYS_PDF_analytical +#define ROO_NDKEYS_PDF_analytical + +#include "RooAbsPdf.h" +#include "RooNDKeysPdf.h" +#include "RooRealProxy.h" +#include "RooSetProxy.h" +#include "RooRealConstant.h" +#include "RooDataSet.h" +#include "TH1.h" +#include "TAxis.h" +#include "TVectorD.h" +#include "TMatrixD.h" +#include "TMatrixDSym.h" +#include +#include +#include + +class RooRealVar; +class RooArgList; +class RooArgSet; +class RooChangeTracker; + +#ifndef __CINT__ +//class VecVecDouble : public std::vector > { } ; +//class VecTVecDouble : public std::vector { } ; +typedef std::pair iiPair; +typedef std::vector< iiPair > iiVec; +typedef std::pair itPair; +typedef std::vector< itPair > itVec; +#else +class itPair ; +#endif + +class RooNDKeysPdfAnalytical : public RooAbsPdf { + +public: + + enum Mirror {NoMirror, MirrorLeft, MirrorRight, MirrorBoth, + MirrorAsymLeft, MirrorAsymLeftRight, + MirrorAsymRight, MirrorLeftAsymRight, + MirrorAsymBoth }; + + RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const RooAbsData &data, + TString options = "ma", Double_t rho = 1, Double_t nSigma = 3, Bool_t rotate = kTRUE, + Bool_t sortInput = kTRUE); + + RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const TH1 &hist, TString options = "ma", + Double_t rho = 1, Double_t nSigma = 3, Bool_t rotate = kTRUE, Bool_t sortInput = kTRUE); + + RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const RooAbsData &data, + const TVectorD &rho, TString options = "ma", Double_t nSigma = 3, Bool_t rotate = kTRUE, + Bool_t sortInput = kTRUE); + + RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const RooAbsData &data, + const RooArgList &rhoList, TString options = "ma", Double_t nSigma = 3, Bool_t rotate = kTRUE, + Bool_t sortInput = kTRUE); + + RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const TH1 &hist, + const RooArgList &rhoList, TString options = "ma", Double_t nSigma = 3, Bool_t rotate = kTRUE, + Bool_t sortInput = kTRUE); + + RooNDKeysPdfAnalytical(const char *name, const char *title, RooAbsReal &x, const RooAbsData &data, Mirror mirror = NoMirror, + Double_t rho = 1, Double_t nSigma = 3, Bool_t rotate = kTRUE, Bool_t sortInput = kTRUE); + + RooNDKeysPdfAnalytical(const char *name, const char *title, RooAbsReal &x, RooAbsReal &y, const RooAbsData &data, + TString options = "ma", Double_t rho = 1.0, Double_t nSigma = 3, Bool_t rotate = kTRUE, + Bool_t sortInput = kTRUE); + + RooNDKeysPdfAnalytical(const RooNDKeysPdfAnalytical& other, const char* name=0); + virtual ~RooNDKeysPdfAnalytical(); + + virtual TObject* clone(const char* newname) const { return new RooNDKeysPdfAnalytical(*this,newname); } + + Int_t getAnalyticalIntegral(RooArgSet& allVars, RooArgSet& analVars, const char* rangeName=0) const ; + Double_t analyticalIntegral(Int_t code, const char* rangeName=0) const ; + + inline void fixShape(Bool_t fix) { + createPdf(kFALSE); + _fixedShape=fix; + } + + TMatrixD getWeights(const int &k) const; + + struct BoxInfo { + Bool_t filled; + Bool_t netFluxZ; + Double_t nEventsBW; + Double_t nEventsBMSW; + std::vector xVarLo, xVarHi; + std::vector xVarLoM3s, xVarLoP3s, xVarHiM3s, xVarHiP3s; + std::map bpsIdcs; + std::vector sIdcs; + std::vector bIdcs; + std::vector bmsIdcs; + } ; + +protected: + + RooListProxy _varList ; + TIterator* _varItr ; //! do not persist + + RooListProxy _rhoList; + TIterator *_rhoItr; //! do not persist +public: + Double_t evaluate() const; +protected: + void createPdf(Bool_t firstCall = kTRUE) const; + void setOptions() const; + void initialize() const; + void loadDataSet(Bool_t firstCall) const; + void mirrorDataSet() const; + void loadWeightSet() const; + void calculateShell(BoxInfo *bi) const; + void calculatePreNorm(BoxInfo *bi) const; + void sortDataIndices(BoxInfo *bi = 0) const; + void calculateBandWidth() const; + Double_t gauss(std::vector &x, std::vector> &weights) const; + void loopRange(std::vector &x, std::map &ibMap) const; + void boxInfoInit(BoxInfo *bi, const char *rangeName, Int_t code) const; + RooDataSet *createDatasetFromHist(const RooArgList &varList, const TH1 &hist) const; + void updateRho() const; + + mutable RooDataSet *_dataP; //! do not persist + const RooAbsData &_data; //! + mutable TString _options; + mutable Double_t _widthFactor; + mutable Double_t _nSigma; + + mutable Bool_t _fixedShape; + mutable Bool_t _mirror; + mutable Bool_t _debug; + mutable Bool_t _verbose; + + mutable Double_t _sqrt2pi; + mutable Int_t _nDim; + mutable Int_t _nEvents; + mutable Int_t _nEventsM; + mutable Double_t _nEventsW; + mutable Double_t _d; + mutable Double_t _n; + + // cached info on variable + + mutable std::vector > _dataPts; + mutable std::vector _dataPtsR; + mutable std::vector > _weights0; + mutable std::vector > _weights1; + mutable std::vector >* _weights; //! + +#ifndef __CINT__ + mutable std::vector _sortIdcs; //! + mutable std::vector _sortTVIdcs; //! +#endif + + mutable std::vector _varName; + mutable std::vector _rho; + mutable RooArgSet _dataVars; + mutable std::vector _x; + mutable std::vector _x0, _x1, _x2; + mutable std::vector _mean, _sigma; + mutable std::vector _xDatLo, _xDatHi; + mutable std::vector _xDatLo3s, _xDatHi3s; + + mutable Bool_t _netFluxZ; + mutable Double_t _nEventsBW; + mutable Double_t _nEventsBMSW; + mutable std::vector _xVarLo, _xVarHi; + mutable std::vector _xVarLoM3s, _xVarLoP3s, _xVarHiM3s, _xVarHiP3s; + mutable std::map _bpsIdcs; + mutable std::map _ibNoSort; + mutable std::vector _sIdcs; + mutable std::vector _bIdcs; + mutable std::vector _bmsIdcs; + + mutable std::map,BoxInfo*> _rangeBoxInfo ; + mutable BoxInfo _fullBoxInfo ; + + mutable std::vector _idx; + mutable Double_t _minWeight; + mutable Double_t _maxWeight; + mutable std::map _wMap; + + mutable TMatrixDSym* _covMat; + mutable TMatrixDSym* _corrMat; + mutable TMatrixD* _rotMat; + mutable TVectorD* _sigmaR; + mutable TVectorD* _dx; + mutable Double_t _sigmaAvgR; + + mutable Bool_t _rotate; + mutable Bool_t _sortInput; + mutable Int_t _nAdpt; + + mutable RooChangeTracker *_tracker; //! do not persist + + public: + /// sorter function + struct SorterTV_L2H { + Int_t idx; + + SorterTV_L2H (Int_t index) : idx(index) {} + bool operator() (const itPair& a, const itPair& b) { + const TVectorD& aVec = *(a.second); + const TVectorD& bVec = *(b.second); + return (aVec[idx](this)->calculateBandWidth(); + } + + //ClassDef(RooNDKeysPdfAnalytical, 2) // General N-dimensional non-parametric kernel estimation p.d.f +}; + +#endif + + +/***************************************************************************** + * Project: RooFit * + * Package: RooFitModels * + * File: $Id: RooNDKeysPdfAnalytical.cxx 31258 2009-11-17 22:41:06Z wouter $ + * Authors: * + * Max Baak, CERN, mbaak@cern.ch * + * * + * Redistribution and use in source and binary forms, * + * with or without modification, are permitted according to the terms * + * listed in LICENSE (http://roofit.sourceforge.net/license.txt) * + *****************************************************************************/ + +/** \class RooNDKeysPdfAnalytical + \ingroup Roofit + +Generic N-dimensional implementation of a kernel estimation p.d.f. This p.d.f. models the distribution +of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point, +each contributing 1/N to the total integral of the p.d.f. +If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the +local density of events, i.e. narrow for regions with high event density to preserve details and +wide for regions with log event density to promote smoothness. The details of the general algorithm +are described in the following paper: +Cranmer KS, Kernel Estimation in High-Energy Physics. + Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057 +For multi-dimensional datasets, the kernels are modeled by multidimensional Gaussians. The kernels are +constructed such that they reflect the correlation coefficients between the observables +in the input dataset. +**/ + +#include +#include +#include + +#include "TMath.h" +#include "TMatrixDSymEigen.h" +//#include "RooNDKeysPdfAnalytical.h" +#include "RooAbsReal.h" +#include "RooRealVar.h" +#include "RooRandom.h" +#include "RooHist.h" +#include "RooMsgService.h" +#include "RooChangeTracker.h" + +#include "TError.h" + +using namespace std; + +//ClassImp(RooNDKeysPdfAnalytical); + +//////////////////////////////////////////////////////////////////////////////// +/// Construct N-dimensional kernel estimation p.d.f. in observables 'varList' +/// from dataset 'data'. Options can be +/// +/// - 'a' = Use adaptive kernels (width varies with local event density) +/// - 'm' = Mirror data points over observable boundaries. Improves modeling +/// behavior at edges for distributions that are not close to zero +/// at edge +/// - 'd' = Debug flag +/// - 'v' = Verbose flag +/// +/// The parameter rho (default = 1) provides an overall scale factor that can +/// be applied to the bandwith calculated for each kernel. The nSigma parameter +/// determines the size of the box that is used to search for contributing kernels +/// around a given point in observable space. The nSigma parameters is used +/// in case of non-adaptive bandwidths and for the 1st non-adaptive pass for +/// the calculation of adaptive keys p.d.f.s. +/// +/// The optional weight arguments allows to specify an observable or function +/// expression in observables that specifies the weight of each event. + +RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const RooAbsData &data, + TString options, Double_t rho, Double_t nSigma, Bool_t rotate, Bool_t sortInput) + : RooAbsPdf(name, title), _varList("varList", "List of variables", this), + _rhoList("rhoList", "List of rho parameters", this), _dataP(0), _data(data), _options(options), _widthFactor(rho), + _nSigma(nSigma), _weights(&_weights0), _rotate(rotate), _sortInput(sortInput), _nAdpt(1), _tracker(0) +{ + // Constructor + _varItr = _varList.createIterator() ; + _rhoItr = _rhoList.createIterator(); + + TIterator* varItr = varList.createIterator() ; + RooAbsArg* var ; + for (Int_t i=0; (var = (RooAbsArg*)varItr->Next()); ++i) { + if (!dynamic_cast(var)) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::ctor(" << GetName() << ") ERROR: variable " << var->GetName() + << " is not of type RooAbsReal" << endl ; + R__ASSERT(0) ; + } + _varList.add(*var) ; + _varName.push_back(var->GetName()); + } + delete varItr ; + + createPdf(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor + +RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const TH1 &hist, + TString options, Double_t rho, Double_t nSigma, Bool_t rotate, Bool_t sortInput) + : RooAbsPdf(name, title), _varList("varList", "List of variables", this), + _rhoList("rhoList", "List of rho parameters", this), _dataP(createDatasetFromHist(varList, hist)), _data(*_dataP), + _options(options), _widthFactor(rho), _nSigma(nSigma), _weights(&_weights0), _rotate(rotate), + _sortInput(sortInput), _nAdpt(1), _tracker(0) +{ + _varItr = _varList.createIterator(); + _rhoItr = _rhoList.createIterator(); + + TIterator *varItr = varList.createIterator(); + RooAbsArg *var; + for (Int_t i = 0; (var = (RooAbsArg *)varItr->Next()); ++i) { + if (!dynamic_cast(var)) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::ctor(" << GetName() << ") ERROR: variable " << var->GetName() + << " is not of type RooAbsReal" << endl; + assert(0); + } + _varList.add(*var); + _varName.push_back(var->GetName()); + } + delete varItr; + + createPdf(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor + +RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const RooAbsData &data, + const TVectorD &rho, TString options, Double_t nSigma, Bool_t rotate, Bool_t sortInput) + : RooAbsPdf(name, title), _varList("varList", "List of variables", this), + _rhoList("rhoList", "List of rho parameters", this), _dataP(0), _data(data), _options(options), _widthFactor(-1.0), + _nSigma(nSigma), _weights(&_weights0), _rotate(rotate), _sortInput(sortInput), _nAdpt(1), _tracker(0) +{ + _varItr = _varList.createIterator() ; + _rhoItr = _rhoList.createIterator(); + + TIterator* varItr = varList.createIterator() ; + RooAbsArg* var ; + for (Int_t i=0; (var = (RooAbsArg*)varItr->Next()); ++i) { + if (!dynamic_cast(var)) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::ctor(" << GetName() << ") ERROR: variable " << var->GetName() + << " is not of type RooAbsReal" << endl; + R__ASSERT(0); + } + _varList.add(*var) ; + _varName.push_back(var->GetName()); + } + delete varItr ; + + // copy rho widths + if( _varList.getSize() != rho.GetNrows() ) { + coutE(InputArguments) + << "ERROR: RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical() : The vector-size of rho is different from that of varList." + << "Unable to create the PDF." << endl; + R__ASSERT(_varList.getSize() == rho.GetNrows()); + } + + // negative width factor will serve as a switch in initialize() + // negative value means that a vector has been provided as input, + // and that _rho has already been set ... + _rho.resize( rho.GetNrows() ); + for (Int_t j = 0; j < rho.GetNrows(); j++) { + _rho[j] = rho[j]; /*cout<<"RooNDKeysPdfAnalytical ctor, _rho["<Next()); ++i) { + if (!dynamic_cast(var)) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::ctor(" << GetName() << ") ERROR: variable " << var->GetName() + << " is not of type RooAbsReal" << endl; + assert(0); + } + _varList.add(*var); + _varName.push_back(var->GetName()); + } + delete varItr; + + TIterator *rhoItr = rhoList.createIterator(); + RooAbsArg *rho; + _rho.resize(rhoList.getSize(), 1.0); + + for (Int_t i = 0; (rho = (RooAbsArg *)rhoItr->Next()); ++i) { + if (!dynamic_cast(rho)) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::ctor(" << GetName() << ") ERROR: parameter " << rho->GetName() + << " is not of type RooRealVar" << endl; + assert(0); + } + _rhoList.add(*rho); + _rho[i] = (dynamic_cast(rho))->getVal(); + } + delete rhoItr; + + // copy rho widths + if ((_varList.getSize() != _rhoList.getSize())) { + coutE(InputArguments) << "ERROR: RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical() : The size of rhoList is different from varList." + << "Unable to create the PDF." << endl; + assert(_varList.getSize() == _rhoList.getSize()); + } + + // keep track of changes in rho parameters + _tracker = new RooChangeTracker("tracker", "track rho parameters", _rhoList, true); // check for value updates + (void)_tracker->hasChanged(true); // first evaluation always true for new parameters (?) + + createPdf(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor + +RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical(const char *name, const char *title, const RooArgList &varList, const TH1 &hist, + const RooArgList &rhoList, TString options, Double_t nSigma, Bool_t rotate, Bool_t sortInput) + : RooAbsPdf(name, title), _varList("varList", "List of variables", this), + _rhoList("rhoList", "List of rho parameters", this), _dataP(createDatasetFromHist(varList, hist)), _data(*_dataP), + _options(options), _widthFactor(-1), _nSigma(nSigma), _weights(&_weights0), _rotate(rotate), _sortInput(sortInput), + _nAdpt(1) +{ + _varItr = _varList.createIterator(); + _rhoItr = _rhoList.createIterator(); + + TIterator *varItr = varList.createIterator(); + RooAbsArg *var; + for (Int_t i = 0; (var = (RooAbsArg *)varItr->Next()); ++i) { + if (!dynamic_cast(var)) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::ctor(" << GetName() << ") ERROR: variable " << var->GetName() + << " is not of type RooAbsReal" << endl; + assert(0); + } + _varList.add(*var); + _varName.push_back(var->GetName()); + } + delete varItr; + + // copy rho widths + TIterator *rhoItr = rhoList.createIterator(); + RooAbsArg *rho; + _rho.resize(rhoList.getSize(), 1.0); + + for (Int_t i = 0; (rho = (RooAbsArg *)rhoItr->Next()); ++i) { + if (!dynamic_cast(rho)) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::ctor(" << GetName() << ") ERROR: parameter " << rho->GetName() + << " is not of type RooRealVar" << endl; + assert(0); + } + _rhoList.add(*rho); + _rho[i] = (dynamic_cast(rho))->getVal(); + } + delete rhoItr; + + if ((_varList.getSize() != _rhoList.getSize())) { + coutE(InputArguments) << "ERROR: RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical() : The size of rhoList is different from varList." + << "Unable to create the PDF." << endl; + assert(_varList.getSize() == _rhoList.getSize()); + } + + // keep track of changes in rho parameters + _tracker = new RooChangeTracker("tracker", "track rho parameters", _rhoList, true); // check for value updates + (void)_tracker->hasChanged(true); // first evaluation always true for new parameters (?) + + createPdf(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor + +RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical(const char *name, const char *title, RooAbsReal &x, const RooAbsData &data, Mirror mirror, + Double_t rho, Double_t nSigma, Bool_t rotate, Bool_t sortInput) + : RooAbsPdf(name, title), _varList("varList", "List of variables", this), + _rhoList("rhoList", "List of rho parameters", this), _dataP(0), _data(data), _options("a"), _widthFactor(rho), + _nSigma(nSigma), _weights(&_weights0), _rotate(rotate), _sortInput(sortInput), _nAdpt(1), _tracker(0) +{ + _varItr = _varList.createIterator(); + _rhoItr = _rhoList.createIterator(); + + _varList.add(x); + _varName.push_back(x.GetName()); + + if (mirror != NoMirror) { + if (mirror != MirrorBoth) + coutW(InputArguments) << "RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical() : Warning : asymmetric mirror(s) no longer supported." + << endl; + _options = "m"; + } + + createPdf(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Backward compatibility constructor for Roo2DKeysPdf. If you are a new user, +/// please use the first constructor form. + +RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical(const char *name, const char *title, RooAbsReal &x, RooAbsReal &y, const RooAbsData &data, + TString options, Double_t rho, Double_t nSigma, Bool_t rotate, Bool_t sortInput) + : RooAbsPdf(name, title), _varList("varList", "List of variables", this), + _rhoList("rhoList", "List of rho parameters", this), _dataP(0), _data(data), _options(options), _widthFactor(rho), + _nSigma(nSigma), _weights(&_weights0), _rotate(rotate), _sortInput(sortInput), _nAdpt(1), _tracker(0) +{ + _varItr = _varList.createIterator(); + _rhoItr = _rhoList.createIterator(); + + _varList.add(RooArgSet(x, y)); + _varName.push_back(x.GetName()); + _varName.push_back(y.GetName()); + + createPdf(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor + +RooNDKeysPdfAnalytical::RooNDKeysPdfAnalytical(const RooNDKeysPdfAnalytical &other, const char *name) + : RooAbsPdf(other, name), _varList("varList", this, other._varList), _rhoList("rhoList", this, other._rhoList), + _dataP(other._dataP != NULL ? new RooDataSet(*other._dataP) : NULL), + _data(other._dataP != NULL ? *_dataP : other._data), _options(other._options), _widthFactor(other._widthFactor), + _nSigma(other._nSigma), _weights(&_weights0), _rotate(other._rotate), _sortInput(other._sortInput), + _nAdpt(other._nAdpt) +{ + _tracker = (other._tracker != NULL ? new RooChangeTracker(*other._tracker) : NULL); + // if (_tracker!=NULL) { _tracker->hasChanged(true); } + + _varItr = _varList.createIterator(); + _rhoItr = _rhoList.createIterator(); + + _fixedShape = other._fixedShape; + _mirror = other._mirror; + _debug = other._debug; + _verbose = other._verbose; + _sqrt2pi = other._sqrt2pi; + _nDim = other._nDim; + _nEvents = other._nEvents; + _nEventsM = other._nEventsM; + _nEventsW = other._nEventsW; + _d = other._d; + _n = other._n; + _dataPts = other._dataPts; + _dataPtsR = other._dataPtsR; + _weights0 = other._weights0; + _weights1 = other._weights1; + if (_options.Contains("a")) { + _weights = &_weights1; + } + //_sortIdcs = other._sortIdcs; + _sortTVIdcs = other._sortTVIdcs; + _varName = other._varName; + _rho = other._rho; + _x = other._x; + _x0 = other._x0; + _x1 = other._x1; + _x2 = other._x2; + _xDatLo = other._xDatLo; + _xDatHi = other._xDatHi; + _xDatLo3s = other._xDatLo3s; + _xDatHi3s = other._xDatHi3s; + _mean = other._mean; + _sigma = other._sigma; + + // BoxInfo + _netFluxZ = other._netFluxZ; + _nEventsBW = other._nEventsBW; + _nEventsBMSW = other._nEventsBMSW; + _xVarLo = other._xVarLo; + _xVarHi = other._xVarHi; + _xVarLoM3s = other._xVarLoM3s; + _xVarLoP3s = other._xVarLoP3s; + _xVarHiM3s = other._xVarHiM3s; + _xVarHiP3s = other._xVarHiP3s; + _bpsIdcs = other._bpsIdcs; + _ibNoSort = other._ibNoSort; + _sIdcs = other._sIdcs; + _bIdcs = other._bIdcs; + _bmsIdcs = other._bmsIdcs; + + _rangeBoxInfo = other._rangeBoxInfo; + _fullBoxInfo = other._fullBoxInfo; + + _idx = other._idx; + _minWeight = other._minWeight; + _maxWeight = other._maxWeight; + _wMap = other._wMap; + + _covMat = new TMatrixDSym(*other._covMat); + _corrMat = new TMatrixDSym(*other._corrMat); + _rotMat = new TMatrixD(*other._rotMat); + _sigmaR = new TVectorD(*other._sigmaR); + _dx = new TVectorD(*other._dx); + _sigmaAvgR = other._sigmaAvgR; +} + +//////////////////////////////////////////////////////////////////////////////// + +RooNDKeysPdfAnalytical::~RooNDKeysPdfAnalytical() +{ + if (_varItr) delete _varItr; + if (_rhoItr) + delete _rhoItr; + if (_covMat) delete _covMat; + if (_corrMat) delete _corrMat; + if (_rotMat) delete _rotMat; + if (_sigmaR) delete _sigmaR; + if (_dx) delete _dx; + if (_dataP) + delete _dataP; + if (_tracker) + delete _tracker; + + // delete all the boxinfos map + while ( !_rangeBoxInfo.empty() ) { + map,BoxInfo*>::iterator iter = _rangeBoxInfo.begin(); + BoxInfo* box= (*iter).second; + _rangeBoxInfo.erase(iter); + delete box; + } + + _dataPts.clear(); + _dataPtsR.clear(); + _weights0.clear(); + _weights1.clear(); + //_sortIdcs.clear(); + _sortTVIdcs.clear(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// evaluation order of constructor. + +void RooNDKeysPdfAnalytical::createPdf(Bool_t firstCall) const +{ + if (firstCall) { + // set options + setOptions(); + // initialization + initialize(); + } + + + // copy dataset, calculate sigma_i's, determine min and max event weight + loadDataSet(firstCall); + + // mirror dataset around dataset boundaries -- does not depend on event weights + if (_mirror) mirrorDataSet(); + + // store indices and weights of events with high enough weights + loadWeightSet(); + + // store indices of events in variable boundaries and box shell. +//calculateShell(&_fullBoxInfo); + // calculate normalization needed in analyticalIntegral() +//calculatePreNorm(&_fullBoxInfo); + + // lookup table for determining which events contribute to a certain coordinate + sortDataIndices(); + + // determine static and/or adaptive bandwidth + calculateBandWidth(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// set the configuration + +void RooNDKeysPdfAnalytical::setOptions() const +{ + _options.ToLower(); + + if( _options.Contains("a") ) { _weights = &_weights1; } + else { _weights = &_weights0; } + if( _options.Contains("m") ) _mirror = true; + else _mirror = false; + if( _options.Contains("d") ) _debug = true; + else _debug = false; + if( _options.Contains("v") ) { _debug = true; _verbose = true; } + else { _debug = false; _verbose = false; } + + cxcoutD(InputArguments) << "RooNDKeysPdfAnalytical::setOptions() options = " << _options + << "\n\tbandWidthType = " << _options.Contains("a") + << "\n\tmirror = " << _mirror + << "\n\tdebug = " << _debug + << "\n\tverbose = " << _verbose + << endl; + + if (_nSigma<2.0) { + coutW(InputArguments) << "RooNDKeysPdfAnalytical::setOptions() : Warning : nSigma = " << _nSigma << " < 2.0. " + << "Calculated normalization could be too large." + << endl; + } + + // number of adaptive width iterations. Default is 1. + if (_options.Contains("a")) { + if (!sscanf(_options.Data(), "%d%*s", &_nAdpt)) { + _nAdpt = 1; + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// initialization + +void RooNDKeysPdfAnalytical::initialize() const +{ + _sqrt2pi = sqrt(2.0*TMath::Pi()) ; + _nDim = _varList.getSize(); + _nEvents = (Int_t)_data.numEntries(); + _nEventsM = _nEvents; + _fixedShape= kFALSE; + + _netFluxZ = kFALSE; + _nEventsBW = 0; + _nEventsBMSW = 0; + + if(_nDim==0) { + coutE(InputArguments) << "ERROR: RooNDKeysPdfAnalytical::initialize() : The observable list is empty. " + << "Unable to begin generating the PDF." << endl; + R__ASSERT (_nDim!=0); + } + + if(_nEvents==0) { + coutE(InputArguments) << "ERROR: RooNDKeysPdfAnalytical::initialize() : The input data set is empty. " + << "Unable to begin generating the PDF." << endl; + R__ASSERT (_nEvents!=0); + } + + _d = static_cast(_nDim); + + vector dummy(_nDim,0.); + _dataPts.resize(_nEvents,dummy); + _weights0.resize(_nEvents,dummy); + //_sortIdcs.resize(_nDim); + _sortTVIdcs.resize(_nDim); + + //rdh _rho.resize(_nDim,_widthFactor); + + if (_widthFactor>0) { _rho.resize(_nDim,_widthFactor); } + // else: _rho has been provided as external input + + _x.resize(_nDim,0.); + _x0.resize(_nDim,0.); + _x1.resize(_nDim,0.); + _x2.resize(_nDim,0.); + + _mean.resize(_nDim,0.); + _sigma.resize(_nDim,0.); + + _xDatLo.resize(_nDim,0.); + _xDatHi.resize(_nDim,0.); + _xDatLo3s.resize(_nDim,0.); + _xDatHi3s.resize(_nDim,0.); + + boxInfoInit(&_fullBoxInfo,0,0xFFFF); + + _minWeight=0; + _maxWeight=0; + _wMap.clear(); + + _covMat = 0; + _corrMat= 0; + _rotMat = 0; + _sigmaR = 0; + _dx = new TVectorD(_nDim); _dx->Zero(); + _dataPtsR.resize(_nEvents,*_dx); + + _varItr->Reset() ; + RooRealVar* var ; + for(Int_t j=0; (var=(RooRealVar*)_varItr->Next()); ++j) { + _xDatLo[j] = var->getMin(); + _xDatHi[j] = var->getMax(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// copy the dataset and calculate some useful variables + +void RooNDKeysPdfAnalytical::loadDataSet(Bool_t firstCall) const +{ + // first some initialization + _nEventsW=0.; + + TMatrixD mat(_nDim,_nDim); + if (!_covMat) _covMat = new TMatrixDSym(_nDim); + if (!_corrMat) _corrMat= new TMatrixDSym(_nDim); + if (!_rotMat) _rotMat = new TMatrixD(_nDim,_nDim); + if (!_sigmaR) _sigmaR = new TVectorD(_nDim); + + mat.Zero(); + _covMat->Zero(); + _corrMat->Zero(); + _rotMat->Zero(); + _sigmaR->Zero(); + + const RooArgSet* values= _data.get(); + vector dVars(_nDim); + for (Int_t j=0; j<_nDim; j++) { + dVars[j] = (RooRealVar*)values->find(_varName[j].c_str()); + _x0[j]=_x1[j]=_x2[j]=0.; + } + + _idx.clear(); + for (Int_t i=0; i<_nEvents; i++) { + + _data.get(i); // fills dVars + _idx.push_back(i); + vector& point = _dataPts[i]; + TVectorD& pointV = _dataPtsR[i]; + + Double_t myweight = _data.weight(); // default is one? + if ( TMath::Abs(myweight)>_maxWeight ) { _maxWeight = TMath::Abs(myweight); } + _nEventsW += myweight; + + for (Int_t j=0; j<_nDim; j++) { + for (Int_t k=0; k<_nDim; k++) { + mat(j,k) += dVars[j]->getVal() * dVars[k]->getVal() * myweight; + } + // only need to do once + if (firstCall) + point[j] = pointV[j] = dVars[j]->getVal(); + + _x0[j] += 1. * myweight; + _x1[j] += point[j] * myweight ; + _x2[j] += point[j] * point[j] * myweight ; + if (_x2[j]!=_x2[j]) exit(3); + + // only need to do once + if (firstCall) { + if (point[j]<_xDatLo[j]) { _xDatLo[j]=point[j]; } + if (point[j]>_xDatHi[j]) { _xDatHi[j]=point[j]; } + } + } + } + + _n = TMath::Power(4./(_nEventsW*(_d+2.)), 1./(_d+4.)) ; + // = (4/[n(dim(R) + 2)])^1/(dim(R)+4); dim(R) = 2 + _minWeight = (0.5 - TMath::Erf(_nSigma/sqrt(2.))/2.) * _maxWeight; + + for (Int_t j=0; j<_nDim; j++) { + _mean[j] = _x1[j]/_x0[j]; + _sigma[j] = sqrt(_x2[j]/_x0[j]-_mean[j]*_mean[j]); + } + + for (Int_t j=0; j<_nDim; j++) { + for (Int_t k=0; k<_nDim; k++) { + (*_covMat)(j,k) = mat(j,k)/_x0[j] - _mean[j]*_mean[k]; + } + } + + for (Int_t j=0; j<_nDim; j++) { + for (Int_t k=0; k<_nDim; k++) + (*_corrMat)(j,k) = (*_covMat)(j,k)/(_sigma[j]*_sigma[k]) ; + } + + // use raw sigmas (without rho) for sigmaAvgR + TMatrixDSymEigen evCalculator(*_covMat); + TVectorD sigmaRraw = evCalculator.GetEigenValues(); + for (Int_t j=0; j<_nDim; j++) { sigmaRraw[j] = sqrt(sigmaRraw[j]); } + + _sigmaAvgR=1.; + for (Int_t j=0; j<_nDim; j++) { _sigmaAvgR *= sigmaRraw[j]; } + _sigmaAvgR = TMath::Power(_sigmaAvgR, 1./_d) ; + + // find decorrelation matrix and eigenvalues (R) + if (_nDim > 1 && _rotate) { + // new: rotation matrix now independent of rho evaluation + *_rotMat = evCalculator.GetEigenVectors(); + *_rotMat = _rotMat->T(); // transpose + } else { + TMatrixD haar(_nDim, _nDim); + TMatrixD unit(TMatrixD::kUnit, haar); + *_rotMat = unit; + } + + // update sigmas (rho dependent) + updateRho(); + + //// rho no longer used after this. + //// Now set rho = 1 because sigmaR now contains rho + // for (Int_t j=0; j<_nDim; j++) { _rho[j] = 1.; } // reset: important! + + if (_verbose) { + //_covMat->Print(); + _rotMat->Print(); + _corrMat->Print(); + _sigmaR->Print(); + } + + if (_nDim > 1 && _rotate) { + // apply rotation + for (Int_t i = 0; i < _nEvents; i++) { + TVectorD &pointR = _dataPtsR[i]; + pointR *= *_rotMat; + } + } + + coutI(Contents) << "RooNDKeysPdfAnalytical::loadDataSet(" << this << ")" + << "\n Number of events in dataset: " << _nEvents + << "\n Weighted number of events in dataset: " << _nEventsW << endl; +} + +//////////////////////////////////////////////////////////////////////////////// +/// determine mirror dataset. +/// mirror points are added around the physical boundaries of the dataset +/// Two steps: +/// 1. For each entry, determine if it should be mirrored (the mirror configuration). +/// 2. For each mirror configuration, make the mirror points. + +void RooNDKeysPdfAnalytical::mirrorDataSet() const +{ + for (Int_t j=0; j<_nDim; j++) { + _xDatLo3s[j] = _xDatLo[j] + _nSigma * (_n * _sigma[j]); + _xDatHi3s[j] = _xDatHi[j] - _nSigma * (_n * _sigma[j]); + + // cout<<"xDatLo3s["<xVarLo[j]==_xDatLo[j] && bi->xVarHi[j]==_xDatHi[j]) { + bi->netFluxZ = bi->netFluxZ && kTRUE; + } else { bi->netFluxZ = kFALSE; } + + bi->xVarLoM3s[j] = bi->xVarLo[j] - _nSigma * (_n * _sigma[j]); + bi->xVarLoP3s[j] = bi->xVarLo[j] + _nSigma * (_n * _sigma[j]); + bi->xVarHiM3s[j] = bi->xVarHi[j] - _nSigma * (_n * _sigma[j]); + bi->xVarHiP3s[j] = bi->xVarHi[j] + _nSigma * (_n * _sigma[j]); + + //cout<<"bi->xVarLoM3s["<xVarLoM3s[j]<xVarLoP3s["<xVarLoP3s[j]<xVarHiM3s["<xVarHiM3s[j]<xVarHiM3s["<xVarHiM3s[j]<::iterator wMapItr = _wMap.begin(); + + //for (Int_t i=0; i<_nEventsM; i++) { + for (; wMapItr!=_wMap.end(); ++wMapItr) { + Int_t i = (*wMapItr).first; + + const vector& x = _dataPts[i]; + Bool_t inVarRange(kTRUE); + Bool_t inVarRangePlusShell(kTRUE); + + for (Int_t j=0; j<_nDim; j++) { + + if (x[j]>bi->xVarLo[j] && x[j]xVarHi[j]) { + inVarRange = inVarRange && kTRUE; + } else { inVarRange = inVarRange && kFALSE; } + + if (x[j]>bi->xVarLoM3s[j] && x[j]xVarHiP3s[j]) { + inVarRangePlusShell = inVarRangePlusShell && kTRUE; + } else { inVarRangePlusShell = inVarRangePlusShell && kFALSE; } + } + + // event in range? + if (inVarRange) { + bi->bIdcs.push_back(i); + } + + // event in shell? + if (inVarRangePlusShell) { + bi->bpsIdcs[i] = kTRUE; + Bool_t inShell(kFALSE); + for (Int_t j=0; j<_nDim; j++) { + if ((x[j]>bi->xVarLoM3s[j] && x[j]xVarLoP3s[j]) && x[j]<(bi->xVarLo[j]+bi->xVarHi[j])/2.) { + inShell = kTRUE; + } else if ((x[j]>bi->xVarHiM3s[j] && x[j]xVarHiP3s[j]) && x[j]>(bi->xVarLo[j]+bi->xVarHi[j])/2.) { + inShell = kTRUE; + } + } + if (inShell) bi->sIdcs.push_back(i); // needed for normalization + else { + bi->bmsIdcs.push_back(i); // idem + } + } + } + + coutI(Contents) << "RooNDKeysPdfAnalytical::calculateShell() : " + << "\n Events in shell " << bi->sIdcs.size() + << "\n Events in box " << bi->bIdcs.size() + << "\n Events in box and shell " << bi->bpsIdcs.size() + << endl; +} + +//////////////////////////////////////////////////////////////////////////////// +///bi->nEventsBMSW=0.; +///bi->nEventsBW=0.; + +void RooNDKeysPdfAnalytical::calculatePreNorm(BoxInfo* bi) const +{ + // box minus shell + for (Int_t i=0; ibmsIdcs.size()); i++) + bi->nEventsBMSW += _wMap[bi->bmsIdcs[i]]; + + // box + for (Int_t i=0; ibIdcs.size()); i++) + bi->nEventsBW += _wMap[bi->bIdcs[i]]; + + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::calculatePreNorm() : " + << "\n nEventsBMSW " << bi->nEventsBMSW + << "\n nEventsBW " << bi->nEventsBW + << endl; +} + +//////////////////////////////////////////////////////////////////////////////// +/// sort entries, as needed for loopRange() + +void RooNDKeysPdfAnalytical::sortDataIndices(BoxInfo* bi) const +{ + // will loop over all events by default + if (!_sortInput) { + _ibNoSort.clear(); + for (unsigned int i = 0; i < _dataPtsR.size(); ++i) { + _ibNoSort[i] = kTRUE; + } + return; + } + + itVec itrVecR; + vector::iterator dpRItr = _dataPtsR.begin(); + for (Int_t i = 0; dpRItr != _dataPtsR.end(); ++dpRItr, ++i) { + if (bi) { + if (bi->bpsIdcs.find(i) != bi->bpsIdcs.end()) + // if (_wMap.find(i)!=_wMap.end()) + itrVecR.push_back(itPair(i, dpRItr)); + } else + itrVecR.push_back(itPair(i, dpRItr)); + } + + for (Int_t j=0; j<_nDim; j++) { + _sortTVIdcs[j].clear(); + sort(itrVecR.begin(),itrVecR.end(),SorterTV_L2H(j)); + _sortTVIdcs[j] = itrVecR; + } + + for (Int_t j=0; j<_nDim; j++) { + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::sortDataIndices() : Number of sorted events : " << _sortTVIdcs[j].size() << endl; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +void RooNDKeysPdfAnalytical::calculateBandWidth() const +{ + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::calculateBandWidth()" << endl; + + // non-adaptive bandwidth + // (default, and needed to calculate adaptive bandwidth) + + if(!_options.Contains("a")) { + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::calculateBandWidth() Using static bandwidth." << endl; + } + + // fixed width approximation + for (Int_t i=0; i<_nEvents; i++) { + vector& weight = _weights0[i]; + for (Int_t j = 0; j < _nDim; j++) { + weight[j] = _n * (*_sigmaR)[j]; + // cout<<"j: "< dummy(_nDim, 0.); + _weights1.resize(_nEvents, dummy); + + std::vector> *weights_prev(0); + std::vector> *weights_new(0); + + // cout << "Number of adaptive iterations: " << _nAdpt << endl; + + for (Int_t k = 1; k <= _nAdpt; ++k) { + + // cout << " Cycle: " << k << endl; + + // if multiple adaptive iterations, need to swap weight sets + if (k % 2) { + weights_prev = &_weights0; + weights_new = &_weights1; + } else { + weights_prev = &_weights1; + weights_new = &_weights0; + } + + for (Int_t i = 0; i < _nEvents; ++i) { + vector &x = _dataPts[i]; + Double_t f = TMath::Power(gauss(x, *weights_prev) / _nEventsW, -1. / (2. * _d)); + + vector &weight = (*weights_new)[i]; + for (Int_t j = 0; j < _nDim; j++) { + Double_t norm = (_n * (*_sigmaR)[j]) / sqrtSigmaAvgR; + weight[j] = norm * f / sqrt12; // note additional factor of sqrt(12) compared with HEP-EX/0011057 + } + } + } + // this is the latest updated weights set + _weights = weights_new; + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// loop over all closest point to x, as determined by loopRange() + +Double_t RooNDKeysPdfAnalytical::gauss(vector& x, vector >& weights) const +{ + if(_nEvents==0) return 0.; + + Double_t z=0.; + map ibMap; + + // determine input loop range for event x + if (_sortInput) { + loopRange(x, ibMap); + } + + map::iterator ibMapItr, ibMapEnd; + ibMapItr = (_sortInput ? ibMap.begin() : _ibNoSort.begin()); + ibMapEnd = (_sortInput ? ibMap.end() : _ibNoSort.end()); + + for (; ibMapItr != ibMapEnd; ++ibMapItr) { + Int_t i = (*ibMapItr).first; + + Double_t g(1.); + + if (i >= (Int_t)_idx.size()) { + continue; + } //---> 1.myline + + const vector &point = _dataPts[i]; + const vector &weight = weights[_idx[i]]; + + for (Int_t j = 0; j < _nDim; j++) { + (*_dx)[j] = x[j] - point[j]; + } + + if (_nDim > 1 && _rotate) { + *_dx *= *_rotMat; // rotate to decorrelated frame! + } + + for (Int_t j = 0; j < _nDim; j++) { + Double_t r = (*_dx)[j]; // x[j] - point[j]; + Double_t c = 1. / (2. * weight[j] * weight[j]); + + // cout << "j = " << j << " x[j] = " << point[j] << " w = " << weight[j] << endl; + + g *= exp(-c * r * r); + g *= 1. / (_sqrt2pi * weight[j]); + } + z += (g * _wMap[_idx[i]]); + } + return z; +} + +//////////////////////////////////////////////////////////////////////////////// +/// determine closest points to x, to loop over in evaluate() + +void RooNDKeysPdfAnalytical::loopRange(vector& x, map& ibMap) const +{ + ibMap.clear(); + TVectorD xRm(_nDim); + TVectorD xRp(_nDim); + + for (Int_t j = 0; j < _nDim; j++) { + xRm[j] = xRp[j] = x[j]; + } + + if (_nDim > 1 && _rotate) { + xRm *= *_rotMat; + xRp *= *_rotMat; + } + for (Int_t j = 0; j < _nDim; j++) { + xRm[j] -= _nSigma * (_n * (*_sigmaR)[j]); + xRp[j] += _nSigma * (_n * (*_sigmaR)[j]); + // cout<<"xRm["<=1E-20) + return val ; + else + return (1E-20) ; +} + +//////////////////////////////////////////////////////////////////////////////// +Int_t RooNDKeysPdfAnalytical::getAnalyticalIntegral(RooArgSet& allVars, RooArgSet& analVars, const char* rangeName) const +{ + if (rangeName) return 0 ; + Int_t code=0; + if (matchArgs(allVars,analVars,RooArgSet(_varList))) { code=1; } + + return code; + +} + +//////////////////////////////////////////////////////////////////////////////// +/* +Double_t RooNDKeysPdfAnalytical::analyticalIntegral(Int_t code, const char* rangeName) const +{ + checkInitWeights(); + + cxcoutD(Eval) << "Calling RooNDKeysPdfAnalytical::analyticalIntegral(" << GetName() << ") with code " << code + << " and rangeName " << (rangeName?rangeName:"") << endl; + + // determine which observables need to be integrated over ... + Int_t nComb = 1 << (_nDim); + R__ASSERT(code>=1 && codehasChanged(kTRUE)) || (_weights != &_weights0 && _weights != &_weights1) ) { + updateRho(); // update internal rho parameters + // redetermine static and/or adaptive bandwidth + const_cast(this)->calculateBandWidth(); + } + + std::vector xLowerBound; + xLowerBound.resize(_nDim); + std::vector xHigherBound; + xHigherBound.resize(_nDim); + + std::vector xLowerBoundMinShell; + std::vector xHigherBoundMinShell; + std::vector xLowerBoundMaxShell; + std::vector xHigherBoundMaxShell; + xLowerBoundMinShell.resize(_nDim); + xHigherBoundMinShell.resize(_nDim); + xLowerBoundMaxShell.resize(_nDim); + xHigherBoundMaxShell.resize(_nDim); + + TVectorD dxLowerBound(_nDim); + TVectorD dxHigherBound(_nDim); + + _varItr->Reset() ; + RooRealVar* var ; + for (unsigned int j=0; (var=(RooRealVar*)_varItr->Next()); ++j) { + + //for (unsigned int j=0; j < _varList.size(); ++j) { + // auto var = static_cast(_varList.at(j)); + xLowerBound[j] = var->getMin(rangeName); + xHigherBound[j] = var->getMax(rangeName); + + xLowerBoundMinShell[j] = xLowerBound[j] - _nSigma * (_n * _sigma[j]); + xLowerBoundMaxShell[j] = xLowerBound[j] + _nSigma * (_n * _sigma[j]); + xHigherBoundMinShell[j] = xHigherBound[j] - _nSigma * (_n * _sigma[j]); + xHigherBoundMaxShell[j] = xHigherBound[j] + _nSigma * (_n * _sigma[j]); + } + + double val = 0.; + for (size_t i = 0; i < _dataPts.size(); ++i) { + double g = 1.; + const vector &point = _dataPts[i]; + const vector &weight = (*_weights)[_idx[i]]; + + // skip if the data point is not in the shell of the lower bound or higher bound + bool inShell = false; + bool inRange = true; + for (int j = 0; j < _nDim; j++) { + inShell |= point[j] - xLowerBoundMinShell[j] > 0 && point[j] - xLowerBoundMaxShell[j] < 0; + inShell |= point[j] - xHigherBoundMinShell[j] > 0 && point[j] - xHigherBoundMaxShell[j] < 0; + inRange &= point[j] - xLowerBound[j] > 0 && point[j] - xHigherBound[j] < 0; + } + if( !__builtin_expect(inShell, false)) { + if (inRange) { + val += _wMap.at(_idx[i]); + //cout << i << " " << _wMap.at(_idx[i]) << endl; + } + continue; + } + + for (int j = 0; j < _nDim; j++) { + dxLowerBound[j] = xLowerBound[j] - point[j]; + dxHigherBound[j] = xHigherBound[j] - point[j]; + } + + if (_nDim > 1 && _rotate) { + // rotate to decorrelated frame! + dxLowerBound *= *_rotMat; + dxHigherBound *= *_rotMat; + } + + for (int j = 0; j < _nDim; j++) { + if (std::abs(weight[j])>0){ + const double min = dxLowerBound[j]/(weight[j] * std::sqrt(2)); + const double max = dxHigherBound[j]/(weight[j] * std::sqrt(2)); + + const double ecmin = std::erfc(std::abs(min)); + const double ecmax = std::erfc(std::abs(max)); + //cout << "min,max,ecmin,ecmax: " << min << " " << max << " " << ecmin << " " << ecmax << endl; + g *= 0.5 * (min*max < 0.0 ? 2.0 - (ecmin + ecmax) + : max <= 0. ? ecmax - ecmin : ecmin - ecmax); + } + } + val += (g * _wMap.at(_idx[i])); + } + //cout << "RooNDKeysPdfAnalytical::analyticalIntegral() : Final normalization : " << val << endl; + return val; + } + + vector doInt(_nDim,kTRUE); + + // get BoxInfo + BoxInfo* bi(0); + + if (rangeName) { + string rangeNameStr(rangeName) ; + bi = _rangeBoxInfo[make_pair(rangeNameStr,code)] ; + if (!bi) { + bi = new BoxInfo ; + _rangeBoxInfo[make_pair(rangeNameStr,code)] = bi ; + boxInfoInit(bi,rangeName,code); + } + } else bi= &_fullBoxInfo ; + + // have boundaries changed? + Bool_t newBounds(kFALSE); + _varItr->Reset() ; + RooRealVar* var ; + for (unsigned int j=0; (var=(RooRealVar*)_varItr->Next()); ++j) { + //auto var = static_cast(_varList.at(j)); + if ((var->getMin(rangeName)-bi->xVarLo[j]!=0) || + (var->getMax(rangeName)-bi->xVarHi[j]!=0)) { + newBounds = kTRUE; + } + } + + // reset + if (newBounds) { + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::analyticalIntegral() : Found new boundaries ... " << (rangeName?rangeName:"") << endl; + boxInfoInit(bi,rangeName,code); + } + + // recalculates netFluxZero and nEventsIR + if (!bi->filled || newBounds) { + // Fill box info with contents + calculateShell(bi); + calculatePreNorm(bi); + bi->filled = kTRUE; + const_cast(this)->sortDataIndices(bi); + } + + // first guess + Double_t norm=bi->nEventsBW; + + if (_mirror && bi->netFluxZ) { + // KEYS expression is self-normalized + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::analyticalIntegral() : Using mirrored normalization : " << bi->nEventsBW << endl; + cout << "RooNDKeysPdfAnalytical::analyticalIntegral() : Using mirrored normalization : " << bi->nEventsBW << endl; + return bi->nEventsBW; + } + // calculate leakage in and out of variable range box + else + { + norm = bi->nEventsBMSW; + if (norm<0.) norm=0.; + + for (Int_t i=0; isIdcs.size()); ++i) { + Double_t prob=1.; + const vector& x = _dataPts[bi->sIdcs[i]]; + const vector& weight = (*_weights)[_idx[bi->sIdcs[i]]]; + + vector chi(_nDim,100.); + + for (Int_t j=0; j<_nDim; j++) { + if(!doInt[j]) continue; + + if ((x[j]>bi->xVarLoM3s[j] && x[j]xVarLoP3s[j]) && x[j]<(bi->xVarLo[j]+bi->xVarHi[j])/2.) + chi[j] = (x[j]-bi->xVarLo[j])/weight[j]; + else if ((x[j]>bi->xVarHiM3s[j] && x[j]xVarHiP3s[j]) && x[j]>(bi->xVarLo[j]+bi->xVarHi[j])/2.) + chi[j] = (bi->xVarHi[j]-x[j])/weight[j]; + + if (chi[j]>0) // inVarRange + prob *= (0.5 + TMath::Erf(fabs(chi[j])/sqrt(2.))/2.); + else // outside Var range + prob *= (0.5 - TMath::Erf(fabs(chi[j])/sqrt(2.))/2.); + } + + norm += prob * _wMap.at(_idx[bi->sIdcs[i]]); + } + + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::analyticalIntegral() : Final normalization : " << norm << " " << bi->nEventsBW << endl; + cout << "RooNDKeysPdfAnalytical::analyticalIntegral() : Final normalization : " << norm << " " << bi->nEventsBW << endl; + return norm; + } +} +*/ + +Double_t RooNDKeysPdfAnalytical::analyticalIntegral(Int_t code, const char* rangeName) const +{ + cxcoutD(Eval) << "Calling RooNDKeysPdfAnalytical::analyticalIntegral(" << GetName() << ") with code " << code + << " and rangeName " << (rangeName?rangeName:"") << endl; + + // determine which observables need to be integrated over ... + Int_t nComb = 1 << (_nDim); + R__ASSERT(code>=1 && code doInt(_nDim,kTRUE); + + // get BoxInfo + BoxInfo* bi(0); + + if (rangeName) { + string rangeNameStr(rangeName) ; + bi = _rangeBoxInfo[make_pair(rangeNameStr,code)] ; + if (!bi) { + bi = new BoxInfo ; + _rangeBoxInfo[make_pair(rangeNameStr,code)] = bi ; + boxInfoInit(bi,rangeName,code); + } + } else bi= &_fullBoxInfo ; + + // have boundaries changed? + Bool_t newBounds(kFALSE); + _varItr->Reset() ; + RooRealVar* var ; + for(Int_t j=0; (var=(RooRealVar*)_varItr->Next()); ++j) { + if ((var->getMin(rangeName)-bi->xVarLo[j]!=0) || + (var->getMax(rangeName)-bi->xVarHi[j]!=0)) { + newBounds = kTRUE; + } + } + + // reset + if (newBounds) { + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::analyticalIntegral() : Found new boundaries ... " << (rangeName?rangeName:"") << endl; + boxInfoInit(bi,rangeName,code); + } + + // recalculates netFluxZero and nEventsIR + if (!bi->filled || newBounds) { + // Fill box info with contents + calculateShell(bi); + calculatePreNorm(bi); + bi->filled = kTRUE; + sortDataIndices(bi); + } + + // first guess + Double_t norm=bi->nEventsBW; + + if (_mirror && bi->netFluxZ) { + // KEYS expression is self-normalized + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::analyticalIntegral() : Using mirrored normalization : " << bi->nEventsBW << endl; + return bi->nEventsBW; + } + // calculate leakage in and out of variable range box + else + { + norm = bi->nEventsBMSW; + if (norm<0.) norm=0.; + + for (Int_t i=0; isIdcs.size()); ++i) { + Double_t prob=1.; + const vector& x = _dataPts[bi->sIdcs[i]]; + const vector& weight = (*_weights)[_idx[bi->sIdcs[i]]]; + + vector chi(_nDim,100.); + + for (Int_t j=0; j<_nDim; j++) { + if(!doInt[j]) continue; + if (!(fabs(weight[j])>0)) continue; + if ((x[j]>bi->xVarLoM3s[j] && x[j]xVarLoP3s[j]) && x[j]<(bi->xVarLo[j]+bi->xVarHi[j])/2.) + chi[j] = (x[j]-bi->xVarLo[j])/weight[j]; + else if ((x[j]>bi->xVarHiM3s[j] && x[j]xVarHiP3s[j]) && x[j]>(bi->xVarLo[j]+bi->xVarHi[j])/2.) + chi[j] = (bi->xVarHi[j]-x[j])/weight[j]; + + if (chi[j]>0) // inVarRange + prob *= (0.5 + TMath::Erf(fabs(chi[j])/sqrt(2.))/2.); + else // outside Var range + prob *= (0.5 - TMath::Erf(fabs(chi[j])/sqrt(2.))/2.); + } + + norm += prob * _wMap[_idx[bi->sIdcs[i]]]; + } + + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::analyticalIntegral() : Final normalization : " << norm << " " << bi->nEventsBW << endl; + //cout << "RooNDKeysPdfAnalytical::analyticalIntegral() : Final normalization : " << norm << " " << bi->nEventsBW << endl; + return max(norm, 1E-20); //like evaluate - GIULIO + } +} + + +RooDataSet * + //////////////////////////////////////////////////////////////////////////////// + + RooNDKeysPdfAnalytical::createDatasetFromHist(const RooArgList &varList, const TH1 &hist) const +{ + std::vector varVec; + RooArgSet varsAndWeightSet; + + TIterator *varItr = varList.createIterator(); + RooAbsArg *var; + for (Int_t i = 0; (var = (RooAbsArg *)varItr->Next()); ++i) { + if (!dynamic_cast(var)) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::createDatasetFromHist(" << GetName() << ") WARNING: variable " + << var->GetName() << " is not of type RooRealVar. Skip." << endl; + continue; + } + varsAndWeightSet.add(*var); // used for dataset creation + varVec.push_back(static_cast(var)); // used for setting the variables. + } + delete varItr; + + /// Add weight + RooRealVar weight("weight", "event weight", 0); + varsAndWeightSet.add(weight); + + /// determine histogram dimensionality + unsigned int histndim(0); + std::string classname = hist.ClassName(); + if (classname.find("TH1") == 0) { + histndim = 1; + } else if (classname.find("TH2") == 0) { + histndim = 2; + } else if (classname.find("TH3") == 0) { + histndim = 3; + } + assert(histndim == varVec.size()); + + if (histndim > 3 || histndim <= 0) { + coutE(InputArguments) << "RooNDKeysPdfAnalytical::createDatasetFromHist(" << GetName() + << ") ERROR: input histogram dimension not between [1-3]: " << histndim << endl; + assert(0); + } + + /// dataset creation + RooDataSet *dataFromHist = new RooDataSet("datasetFromHist", "datasetFromHist", varsAndWeightSet, weight.GetName()); + + /// dataset filling + for (int i = 1; i <= hist.GetXaxis()->GetNbins(); ++i) { + // 1 or more dimension + + Double_t xval = hist.GetXaxis()->GetBinCenter(i); + varVec[0]->setVal(xval); + + if (varVec.size() == 1) { + Double_t fval = hist.GetBinContent(i); + weight.setVal(fval); + dataFromHist->add(varsAndWeightSet, fval); + } else { // 2 or more dimensions + + for (int j = 1; j <= hist.GetYaxis()->GetNbins(); ++j) { + Double_t yval = hist.GetYaxis()->GetBinCenter(j); + varVec[1]->setVal(yval); + + if (varVec.size() == 2) { + Double_t fval = hist.GetBinContent(i, j); + weight.setVal(fval); + dataFromHist->add(varsAndWeightSet, fval); + } else { // 3 dimensions + + for (int k = 1; k <= hist.GetZaxis()->GetNbins(); ++k) { + Double_t zval = hist.GetZaxis()->GetBinCenter(k); + varVec[2]->setVal(zval); + + Double_t fval = hist.GetBinContent(i, j, k); + weight.setVal(fval); + dataFromHist->add(varsAndWeightSet, fval); + } + } + } + } + } + + return dataFromHist; +} + +TMatrixD + //////////////////////////////////////////////////////////////////////////////// + /// Return evaluated weights + + RooNDKeysPdfAnalytical::getWeights(const int &k) const +{ + TMatrixD mref(_nEvents, _nDim + 1); + + cxcoutD(Eval) << "RooNDKeysPdfAnalytical::getWeights() Return evaluated weights." << endl; + + for (Int_t i = 0; i < _nEvents; ++i) { + vector &x = _dataPts[i]; + for (Int_t j = 0; j < _nDim; j++) { + mref(i, j) = x[j]; + } + + vector &weight = (*_weights)[i]; + mref(i, _nDim) = weight[k]; + } + + return mref; +} + +void + //////////////////////////////////////////////////////////////////////////////// + + RooNDKeysPdfAnalytical::updateRho() const +{ + _rhoItr->Reset(); + RooAbsReal *rho(0); + for (Int_t j = 0; (rho = (RooAbsReal *)_rhoItr->Next()); ++j) { + _rho[j] = rho->getVal(); + } + + if (_nDim > 1 && _rotate) { + TMatrixDSym covMatRho(_nDim); // covariance matrix times rho parameters + for (Int_t j = 0; j < _nDim; j++) { + for (Int_t k = 0; k < _nDim; k++) { + covMatRho(j, k) = (*_covMat)(j, k) * _rho[j] * _rho[k]; + } + } + // find decorrelation matrix and eigenvalues (R) + TMatrixDSymEigen evCalculatorRho(covMatRho); + *_sigmaR = evCalculatorRho.GetEigenValues(); + for (Int_t j = 0; j < _nDim; j++) { + (*_sigmaR)[j] = sqrt((*_sigmaR)[j]); + } + } else { + for (Int_t j = 0; j < _nDim; j++) { + (*_sigmaR)[j] = (_sigma[j] * _rho[j]); + } // * rho + } +} + + diff --git a/ShapeAnalysis/scripts/mkDatacards.py b/ShapeAnalysis/scripts/mkDatacards.py index 9e62e44f5..cafe853d8 100755 --- a/ShapeAnalysis/scripts/mkDatacards.py +++ b/ShapeAnalysis/scripts/mkDatacards.py @@ -322,6 +322,8 @@ def makeDatacards( self, inputFile, outputDirDatacard, variables, cuts, samples, entryName = nuisance['name'] else: entryName = 'CMS_' + nuisance['name'] + if 'perRecoBin' in nuisance.keys() and nuisance['perRecoBin'] == True: + entryName += "_"+cutName card.write(entryName.ljust(80-20)) @@ -401,7 +403,12 @@ def makeDatacards( self, inputFile, outputDirDatacard, variables, cuts, samples, suffixOut = None else: suffixOut = '_CMS_' + nuisance['name'] - + if 'perRecoBin' in nuisance.keys() and nuisance['perRecoBin'] == True: + if ('skipCMS' in nuisance.keys()) and nuisance['skipCMS'] == 1: + suffixOut = "_"+nuisance['name'] + else: + suffixOut = '_CMS_' + nuisance['name'] + suffixOut += "_"+cutName symmetrize = 'symmetrize' in nuisance and nuisance['symmetrize'] saved = self._saveNuisanceHistos(cutName, variableName, sampleName, '_' + nuisance['name'], suffixOut, symmetrize) diff --git a/ShapeAnalysis/scripts/mkKernel.py b/ShapeAnalysis/scripts/mkKernel.py new file mode 100755 index 000000000..690cc8703 --- /dev/null +++ b/ShapeAnalysis/scripts/mkKernel.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python + +import json +import sys +argv = sys.argv +sys.argv = argv[:1] +import ROOT +import optparse +import LatinoAnalysis.Gardener.hwwtools as hwwtools +from LatinoAnalysis.ShapeAnalysis.ShapeFactoryMulti import ShapeFactory as SF +from LatinoAnalysis.ShapeAnalysis.KernelFactory import KernelFactory +import logging +import collections +import os.path +import shutil +import array +from sklearn.utils import resample +import tempfile +import root_numpy as rnp +import numpy as np +import math +from numpy import linalg as LA +import itertools +from LatinoAnalysis.Tools.batchTools import * + +# Common Tools & batch +from LatinoAnalysis.Tools.commonTools import * + + +if __name__ == '__main__': + sys.argv = argv + + print ''' + + + ```` ``````` + `.----------... .:/+++/++++++++/:.` + .--..............--//::---:://+ossss++o+:` + .:-..----..--------.----`` `-/os++o+. + -:..-------...........-:-. `:o++s+ + .:.------.`..............--. -++o+ + `:.---:/-.`...........-::--... -++y` + ``----::/o-`...........:osssmy-... `++y` + ```..-::/+os+:yy/.`..........-+d+/ohmo...` `/++/ + ```.----------:::::-:hd:`..........`-:hNho/++:.`.` .:/::: + ```..----------:::::-------omm-`............--+yhmho+///::------:://:--.. + ```..---...----:::-----------::/++smN:`...``...........-shdhyso+++///::::-...` + -----..------------------:::-------:-smd:...`````````````...+yysssy:::---..`` + .-------------------------```..``-:...``/...-.`````````````````.-:+: + o/------------..------..`` ` `-...:...``````````````````.. + -//:-----....`... `....--......```````````` `.`` + `...`` `` `...`.--........`````````````.`` + `...`..-.......:..`.-...`````` + `.-.`.--....``.--.` ````` + `..`..-.....``...-`` + `-...--......```` + .....--.....``` + .....---....``..` + `.-..---.....`.-.` + `--..------....` + `/..-------.``.` + .-::-----..-` + -s----...``` + :.```..` + + +''' + +# +# This code is run between mkShape and mkDatacard/mkPlot +# The idea is to massage the input histograms to deal with empty bins for selected samples +# + + usage = 'usage: %prog [options]' + parser = optparse.OptionParser(usage) + + parser.add_option('--inputFile' , dest='inputFile' , help='input file ' , default='./input.root') + parser.add_option('--inputFileTree' , dest='inputFileTree' , help='input file with the tree holding the branches to make the RooKeysPdf' , default='./input.root') + parser.add_option('--treeNameForKernel' , dest='treeNameForKernel' , help='name of the tree in inputTreeFile' , default='tree') + parser.add_option('--outputFile' , dest='outputFile' , help='output file. Copy of the input file with modified histograms' , default='./output.root') + parser.add_option('--structureFile' , dest='structureFile' , help='file with datacard configurations' , default=None ) + parser.add_option('--doBatch' , dest='doBatch' , help='Run on batch' , default=False) + parser.add_option('--batchQueue' , dest='batchQueue' , help='Queue on batch' , default='workday') + parser.add_option('--nuisancesFile' , dest='nuisancesFile' , help='file with nuisances configurations' , default=None ) + parser.add_option('--sample' , dest='samplesToTreat' , help='sample to tun on (can repeat multiple times)' , default=[], action='append' ) + parser.add_option('--cut', dest='cuts_to_run', help='list of cuts to run on (can repear multiple times)', default=[], action='append') + parser.add_option("-W" , "--iihe-wall-time" , dest="IiheWallTime" , help="Requested IIHE queue Wall Time" , default='168:00:00') + parser.add_option("-n", "--dry-run" , dest="dryRun" , help="do not make shapes" , default=False, action="store_true") + + cmssw_base = os.getenv('CMSSW_BASE') + try: + ROOT.gROOT.LoadMacro(cmssw_base+'/src/LatinoAnalysis/ShapeAnalysis/scripts/RooNDKeysPdfAnalytical.cxx+g') + except RuntimeError: + ROOT.gROOT.LoadMacro(cmssw_base+'/src/LatinoAnalysis/ShapeAnalysis/scripts/RooNDKeysPdfAnalytical.cxx++g') + + # read default parsing options as well + hwwtools.addOptions(parser) + hwwtools.loadOptDefaults(parser) + (opt, args) = parser.parse_args() + + sys.argv.append( '-b' ) + ROOT.gROOT.SetBatch() + + print " configuration file = ", opt.pycfg + + print " inputFile = ", opt.inputFile + print " outputFile = ", opt.outputFile + + if not opt.debug: + pass + elif opt.debug == 2: + print 'Logging level set to DEBUG (%d)' % opt.debug + logging.basicConfig( level=logging.DEBUG ) + elif opt.debug == 1: + print 'Logging level set to INFO (%d)' % opt.debug + logging.basicConfig( level=logging.INFO ) + + if opt.nuisancesFile == None : + print " Please provide the nuisances structure if you want to add nuisances " + + if opt.structureFile == None : + print " Please provide the datacard structure " + exit () + + ROOT.TH1.SetDefaultSumw2(True) + + factory = KernelFactory() + + if os.path.exists(opt.pycfg) : + handle = open(opt.pycfg,'r') + exec(handle) + handle.close() + + + ## load the samples + samples = {} + if os.path.exists(opt.samplesFile) : + handle = open(opt.samplesFile,'r') + exec(handle) + handle.close() + + ## load the cuts + cuts = {} + if os.path.exists(opt.cutsFile) : + handle = open(opt.cutsFile,'r') + exec(handle) + handle.close() + + ## load the variables + variables = {} + if os.path.exists(opt.variablesFile) : + handle = open(opt.variablesFile,'r') + exec(handle) + handle.close() + + ## load the nuisances + nuisances = collections.OrderedDict() + if os.path.exists(opt.nuisancesFile) : + handle = open(opt.nuisancesFile,'r') + exec(handle) + handle.close() + for n in nuisances: + nuisances[n].pop('cutspost', None) + import LatinoAnalysis.ShapeAnalysis.utils as utils + + subsamplesmap = utils.flatten_samples(samples) + categoriesmap = utils.flatten_cuts(cuts) + + utils.update_variables_with_categories(variables, categoriesmap) + utils.update_nuisances_with_subsamples(nuisances, subsamplesmap) + utils.update_nuisances_with_categories(nuisances, categoriesmap) + + ## load the structure file (use flattened sample and cut names) + structure = collections.OrderedDict() + if os.path.exists(opt.structureFile) : + handle = open(opt.structureFile,'r') + exec(handle) + handle.close() + + ## command-line cuts restriction + cutstorun = cuts if len(opt.cuts_to_run) == 0 else opt.cuts_to_run + samplestorun = samples.keys() if len(opt.samplesToTreat) == 0 else opt.samplesToTreat + + if not opt.doBatch: + factory.mkKernel( opt.inputFile, opt.outputFile, opt.inputFileTree, opt.treeNameForKernel, variables, cutstorun, samples.keys(), structure, nuisances, samplestorun) + else: + if 'slc7' in os.environ['SCRAM_ARCH'] and 'iihe' in os.uname()[1] : use_singularity = True + else : use_singularity = False + bpostFix='' + targetList = list(itertools.product(cutstorun, range(len(samplestorun)))) + jobs = batchJobs('mkKernel',tag,['all'],targetList,'Target',bpostFix,JOB_DIR_SPLIT_READY=True,USE_SINGULARITY=use_singularity) + + jobs.AddPy2Sh() + jobs.InitPy('import os') + jobs.InitPy('import ROOT') + jobs.InitPy('from collections import OrderedDict') + jobs.InitPy("from LatinoAnalysis.ShapeAnalysis.KernelFactory import KernelFactory\n") + jobs.InitPy("factory = KernelFactory()") + jobs.InitPy("\n") + + #outputDir=os.getcwd()+'/'+opt.outputDir + + for iTarget in targetList: + tname = '%s.%d' % iTarget + + outfile = opt.outputFile.replace('.root', "_"+tname+".root") + instructions_for_configuration_file = "" + instructions_for_configuration_file += "os.system(\"cp "+os.getcwd()+'/'+opt.inputFile+" .\") \n" + instructions_for_configuration_file += "os.system(\"cp "+os.getcwd()+'/'+opt.inputFileTree+" .\") \n" + instructions_for_configuration_file += "try:\n" + instructions_for_configuration_file += " ROOT.gROOT.LoadMacro('"+cmssw_base+"/src/LatinoAnalysis/ShapeAnalysis/scripts/RooNDKeysPdfAnalytical.cxx+g')\n" + instructions_for_configuration_file += "except RuntimeError:\n" + instructions_for_configuration_file += " ROOT.gROOT.LoadMacro('"+cmssw_base+"/src/LatinoAnalysis/ShapeAnalysis/scripts/RooNDKeysPdfAnalytical.cxx++g')\n" + + instructions_for_configuration_file += "factory.mkKernel( \n" + instructions_for_configuration_file += " '" + opt.inputFile.split('/')[-1] +"', \n" + instructions_for_configuration_file += " '" + outfile + "', \n" + instructions_for_configuration_file += " '" + opt.inputFileTree.split('/')[-1] + "', \n" + instructions_for_configuration_file += " '" + opt.treeNameForKernel + "', \n" + instructions_for_configuration_file += " " + str(variables) + ", \n" + instructions_for_configuration_file += " ['" + str(iTarget[0]) + "'], \n" + instructions_for_configuration_file += " " + str(samples.keys()) + ", \n" + instructions_for_configuration_file += " " + str(structure) + ", \n" + instructions_for_configuration_file += " " + str(nuisances) + ", \n" + instructions_for_configuration_file += " ['" + str(samplestorun[iTarget[1]]) + "']) \n" + instructions_for_configuration_file += "os.system(\"cp "+outfile+" "+os.getcwd()+"\") \n" + + jobs.AddPy ('all', iTarget, instructions_for_configuration_file) + + if not opt.dryRun: + jobs.Sub(opt.batchQueue,opt.IiheWallTime,True)