cms-rwth · Ming-Yan · Mar 23, 2023 · Oct 31, 2022 · Nov 10, 2022 · Nov 16, 2022
diff --git a/.github/workflows/python_linting.yml b/.github/workflows/python_linting.yml
@@ -38,5 +38,5 @@ jobs:
     - name: Lint with black
       uses: psf/black@stable
       with:
-        options: "--check --verbose"
+        options: "--check --verbose --exclude=src/BTVNanoCommissioning/jsonpog-integration"
         src: "./"
diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml
@@ -13,14 +13,22 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+
+        python-version: ["3.8","3.10"]
 
     defaults:
       run:
         shell: "bash -l {0}"
 
     steps:
     - uses: actions/checkout@v2
+    - name: update submodules
+      run: |
+          git config  --file=.gitmodules submodule.src/BTVNanoCommissioning/jsonpog-integration.url https://milee:${{ secrets.GIT_CERN_PWD }}@gitlab.cern.ch/cms-nanoAOD/jsonpog-integration.git
+          git submodule sync
+          git config --list
+          git submodule update --init --recursive 
+
     - uses: cvmfs-contrib/github-action-cvmfs@v2
       with:
         cvmfs_repositories: 'grid.cern.ch'
@@ -29,10 +37,9 @@ jobs:
       uses: conda-incubator/setup-miniconda@v2
       with:
         python-version: ${{ matrix.python-version }}
-        mamba-version: "*"
+        miniforge-variant: Mambaforge
         channels: conda-forge,defaults
         channel-priority: true
-        auto-update-conda: true
         activate-environment: CoffeaRunner
         environment-file: env.yml
         auto-activate-base: false
@@ -77,6 +84,7 @@ jobs:
         pip install -e .
 
 
+
     - name: Run test workflow
       run: |
-        python runner_wconfig.py --cfg config/example.py
+        python runner_wconfig.py --cfg config/example.py
diff --git a/.gitignore b/.gitignore
@@ -11,11 +11,13 @@ plot/
 plotting/plot/
 src/BTVNanoCommissioning/version.py
 src/BTVNanoCommissioning/workflows/
+src/BTVNanoCommissioning/jsonpog-integration/examples/
 src/BTVNanoCommissioning.egg-info/
 ## not track BTV stuff
 src/BTVNanoCommissioning/helpers/definitions.py
 src/BTVNanoCommissioning/utils/AK4_parameters.py
 src/BTVNanoCommissioning/utils/selection.py
+src/BTVNanoCommissioning/utils/histogrammer.py
 .github/workflows/ctag_DY_workflow.yml
 .github/workflows/ctag_Wc_workflow.yml
 .github/workflows/ttbar_SL_DL_workflow.yml
@@ -33,6 +35,6 @@ dask-worker-space/
 .job_wrapper_failure
 plot/
 plotting/plot/
-
+*array*
 ## other submodules
 src/Hpluscharm/
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "src/BTVNanoCommissioning/jsonpog-integration"]
+	path = src/BTVNanoCommissioning/jsonpog-integration
+	url = https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration.git
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ Clone repository from git
 
 ```bash
 # only first time 
-git clone [email protected]:cms-rwth/CoffeaRunner.git
+git clone --recursive [email protected]:cms-rwth/CoffeaRunner.git
 ```
 
 For installing Miniconda, see also https://hackmd.io/GkiNxag0TUmHnnCiqdND1Q#Local-or-remote
@@ -45,16 +45,12 @@ conda install -c conda-forge p-tqdm
 </details>
 <br>
 
-=======
-
 Once the environment is set up, compile the python package:
 ```
 pip install -e .
 ```
 
 
-
-
 ## Structures of code
 
 The development of the code is driven by user-friendliness, reproducibility and efficiency.
@@ -183,6 +179,10 @@ Example in [weight_splitcat.py](https://github.com/cms-rwth/CoffeaRunner/blob/ma
 
 - In case you have correction depends on sample ,i.e. k-factor, use `"bysample":{$sample_name:$weight_nested_dict}`
 
+<details><summary>example with customize weight files
+</summary>
+<p>
+
 ```
 "weights":{
         "common":{
@@ -227,6 +227,46 @@ Example in [weight_splitcat.py](https://github.com/cms-rwth/CoffeaRunner/blob/ma
         },
     }
 ```
+</p>
+</details>
+
+
+-  Use central maintained jsonpog-integration 
+The official correction files collected in [jsonpog-integration](https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration) is updated by POG except `lumiMask` and `JME` still updated by maintainer. No longer to request input files in the `correction_config`.  
+
+<details><summary>See the example with `2017_UL`.</summary>
+<p>
+
+```python
+  "2017_UL": {
+        # Same with custom config
+        "lumiMask": "Cert_294927-306462_13TeV_UL2017_Collisions17_MuonJSON.txt",
+        "JME": "jec_compiled.pkl.gz",
+        # no config need to be specify for PU weights
+        "PU": None,
+        # Btag SFs - specify $TAGGER : $TYPE-> find [$TAGGER_$TYPE] in json file
+        "BTV": {"deepCSV": "shape", "deepJet": "shape"},
+
+        "LSF": {
+        # Electron SF - Following the scheme: "${SF_name} ${year}": "${WP}"
+        # https://github.com/cms-egamma/cms-egamma-docs/blob/master/docs/EgammaSFJSON.md
+            "ele_ID 2017": "wp90iso",
+            "ele_Reco 2017": "RecoAbove20",
+
+        # Muon SF - Following the scheme: "${SF_name} ${year}": "${WP}"
+        # WPs : ['NUM_GlobalMuons_DEN_genTracks', 'NUM_HighPtID_DEN_TrackerMuons', 'NUM_HighPtID_DEN_genTracks', 'NUM_IsoMu27_DEN_CutBasedIdTight_and_PFIsoTight', 'NUM_LooseID_DEN_TrackerMuons', 'NUM_LooseID_DEN_genTracks', 'NUM_LooseRelIso_DEN_LooseID', 'NUM_LooseRelIso_DEN_MediumID', 'NUM_LooseRelIso_DEN_MediumPromptID', 'NUM_LooseRelIso_DEN_TightIDandIPCut', 'NUM_LooseRelTkIso_DEN_HighPtIDandIPCut', 'NUM_LooseRelTkIso_DEN_TrkHighPtIDandIPCut', 'NUM_MediumID_DEN_TrackerMuons', 'NUM_MediumID_DEN_genTracks', 'NUM_MediumPromptID_DEN_TrackerMuons', 'NUM_MediumPromptID_DEN_genTracks', 'NUM_Mu50_or_OldMu100_or_TkMu100_DEN_CutBasedIdGlobalHighPt_and_TkIsoLoose', 'NUM_SoftID_DEN_TrackerMuons', 'NUM_SoftID_DEN_genTracks', 'NUM_TightID_DEN_TrackerMuons', 'NUM_TightID_DEN_genTracks', 'NUM_TightRelIso_DEN_MediumID', 'NUM_TightRelIso_DEN_MediumPromptID', 'NUM_TightRelIso_DEN_TightIDandIPCut', 'NUM_TightRelTkIso_DEN_HighPtIDandIPCut', 'NUM_TightRelTkIso_DEN_TrkHighPtIDandIPCut', 'NUM_TrackerMuons_DEN_genTracks', 'NUM_TrkHighPtID_DEN_TrackerMuons', 'NUM_TrkHighPtID_DEN_genTracks']
+
+            "mu_Reco 2017_UL": "NUM_TrackerMuons_DEN_genTracks",
+            "mu_HLT 2017_UL": "NUM_IsoMu27_DEN_CutBasedIdTight_and_PFIsoTight",
+            "mu_ID 2017_UL": "NUM_TightID_DEN_TrackerMuons",
+            "mu_Iso 2017_UL": "NUM_TightRelIso_DEN_TightIDandIPCut",
+        },
+    },
+```
+
+</p>
+</details>
+
 ##### Systematic 
 
 Specify whether run systematics or not
@@ -361,6 +401,9 @@ In `plodataMC.py` config files (i.e. `testfile/btv_datamc.yaml`), you can specif
 
 In `comparison.py` config file (`testfile/btv_compare.yaml`),  color and label name and label names are created with `dict` under `reference`  and `compare`. `reference` only accept one entry. 
 
+<details><summary>Code snipped</summary>
+<p>
+
 ```yaml
 ## plodataMC.py
 mergemap:
@@ -395,6 +438,9 @@ compare:
     Muon_Run2022D-PromptReco-v2: 
 ```
 
+</p>
+</details>
+
 #### Variables 
 
 Common definitions for both usage, use default settings if leave empty value for the keys. 
@@ -407,6 +453,9 @@ Common definitions for both usage, use default settings if leave empty value for
 | `rebin` | no rebinning |
 | `blind` | no blind region | 
 
+<details><summary>Code snipped</summary>
+<p>
+
 ```yaml
 ## specify variable to plot
     btagDeepFlavB_0:
@@ -422,6 +471,8 @@ Common definitions for both usage, use default settings if leave empty value for
             # discr: 2
             # or just put a number, would rebin distribution the last axis (usually the variable)
             2
+            # One can try non-uniform  rebin now! you can specify the rebin axis with rebin value
+            #discr : [-0.2,0.04,0.2,0.4,0.48,0.6,0.64,0.68,0.72,0.76,0.8,0.84,0.88,0.92,0.96,1.]
         # Optional(only for data/MC), blind variables
         blind : -10, #blind variable[-10:], if put -10,-5 would blind variable[-10:-5]
 
@@ -442,6 +493,9 @@ Common definitions for both usage, use default settings if leave empty value for
         rebin: 2
 ``` 
 
+</p>
+</details>
+
 ### Running jupyter remotely
 See also https://hackmd.io/GkiNxag0TUmHnnCiqdND1Q#Remote-jupyter
 

diff --git a/config/HWW2l2nu_split.py b/config/HWW2l2nu_split.py
@@ -0,0 +1,104 @@
+from Hpluscharm.workflows import workflows as hplusc_wf
+
+cfg = {
+    "dataset": {
+        "jsons": [
+            # "src/Hpluscharm/input_json/higgs_UL17.json",
+            # "src/Hpluscharm/input_json/signal_UL17.json",
+            "src/Hpluscharm/input_json/mcbkg_UL17.json"
+            # "src/Hpluscharm/input_json/st_local.json"
+        ],
+        "campaign": "UL17",
+        "year": "2017",
+        "filter": {
+            "samples": [
+                # # #             # "ZZ_TuneCP5_13TeV-pythia8",
+                # # #             # "WZ_TuneCP5_13TeV-pythia8",
+                # # #             # "WW_TuneCP5_13TeV-pythia8"
+                # # #             # "gchcWW2L2Nu_4f"
+                #        "TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8",
+                #    "TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8",
+                "ST_tW_top_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8",
+                "ST_tW_antitop_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8",
+                "ST_s-channel_4f_leptonDecays_TuneCP5_13TeV-amcatnlo-pythia8",
+                "ST_t-channel_antitop_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8",
+                "ST_t-channel_top_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8",
+                #    "WJetsToLNu_TuneCP5_13TeV-amcatnloFXFX-pythia8",
+                #    "DYJetsToLL_M-10to50_TuneCP5_13TeV-madgraphMLM-pythia8",
+                #    "DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8",
+                #     "DYJetsToTauTauToMuTauh_M-50_TuneCP5_13TeV-madgraphMLM-pythia8"
+            ]
+        },
+    },
+    # Input and output files
+    "workflow": hplusc_wf["HWWtest"],
+    "output": "st_all_array",
+    "run_options": {
+        "executor": "parsl/condor/naf_lite",
+        # "executor":"iterative",
+        "workers": 4,
+        "scaleout": 200,
+        "walltime": "03:00:00",
+        "mem_per_worker": 2,  # GB
+        "chunk": 15000,
+        "skipbadfiles": True,
+        "sample_size": 20,
+        "retries": 50,
+        "index": "0,0",
+    },
+    ## selections
+    "categories": {"cats": [], "cats2": []},
+    "preselections": {
+        "mu1hlt": ["IsoMu27"],
+        "mu2hlt": [
+            "Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_DZ_Mass3p8",
+            "Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_DZ_Mass8",
+        ],
+        "e1hlt": ["Ele35_WPTight_Gsf"],
+        "e2hlt": ["Ele23_Ele12_CaloIdL_TrackIdL_IsoVL"],
+        "emuhlt": [
+            "Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL",
+            "Mu12_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL",
+            "Mu12_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_DZ",
+            "Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL_DZ",
+        ],
+    },
+    ## weights
+    "weights": {
+        "common": {
+            "inclusive": {
+                "lumiMask": "Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt",
+                "PU": "puweight_UL17.histo.root",
+                "JME": "mc_compile_jec.pkl.gz",
+                "BTV": {
+                    "DeepJetC": "DeepJet_ctagSF_Summer20UL17_interp.root",
+                },
+                "LSF": {
+                    # "ele_Trig TrigSF": "Ele32_L1DoubleEG_TrigSF_vhcc.histo.root",
+                    "ele_Rereco_above20 EGamma_SF2D": "egammaEffi_ptAbove20.txt_EGM2D_UL2017.histo.root",
+                    "ele_Rereco_below20 EGamma_SF2D": "egammaEffi_ptBelow20.txt_EGM2D_UL2017.histo.root",
+                    "ele_ID EGamma_SF2D": "egammaEffi.txt_EGM2D_MVA90iso_UL17.histo.root",
+                    "mu_ID NUM_TightID_DEN_TrackerMuons_abseta_pt": "Efficiencies_muon_generalTracks_Z_Run2017_UL_ID.histo.root",
+                    "mu_Iso NUM_TightRelIso_DEN_TightIDandIPCut_abseta_pt": "Efficiencies_muon_generalTracks_Z_Run2017_UL_ISO.histo.root",
+                    "ele_Rereco_above20_error EGamma_SF2D_error": "egammaEffi_ptAbove20.txt_EGM2D_UL2017.histo.root",
+                    "ele_Rereco_below20_error EGamma_SF2D_error": "egammaEffi_ptBelow20.txt_EGM2D_UL2017.histo.root",
+                    "ele_ID_error EGamma_SF2D_error": "egammaEffi.txt_EGM2D_MVA90iso_UL17.histo.root",
+                    "mu_ID_error NUM_TightID_DEN_TrackerMuons_abseta_pt_error": "Efficiencies_muon_generalTracks_Z_Run2017_UL_ID.histo.root",
+                    "mu_Iso_error NUM_TightRelIso_DEN_TightIDandIPCut_abseta_pt_error": "Efficiencies_muon_generalTracks_Z_Run2017_UL_ISO.histo.root",
+                },
+            },
+        },
+    },
+    "systematic": {
+        "JERC": False,
+        "weights": False,
+    },
+    ## user specific
+    "userconfig": {
+        "export_array": True,
+        "BDT": {
+            "ll": "src/Hpluscharm/MVA/xgb_output/SR_ll_scangamma_2017_gamma2.json",
+            "emu": "src/Hpluscharm/MVA/xgb_output/SR_emu_scangamma_2017_gamma2.json",
+        },
+    },
+}
diff --git a/config/example.py b/config/example.py
@@ -39,8 +39,22 @@
     "weights": {
         "common": {
             "inclusive": {
-                "lumiMask": "Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt"
-            }
-        }
+                "lumiMask": "Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt",
+                "PU": None,
+                "JME": "jec_compiled.pkl.gz",
+                "BTV": {"deepJet": "shape"},
+                "LSF": {
+                    "ele_ID 2017": "wp90iso",
+                    "ele_Reco 2017": "RecoAbove20",
+                    "ele_Reco_low 2017": "RecoBelow20",
+                    "mu_Reco 2017_UL": "NUM_TrackerMuons_DEN_genTracks",
+                    "mu_HLT 2017_UL": "NUM_IsoMu27_DEN_CutBasedIdTight_and_PFIsoTight",
+                    "mu_ID 2017_UL": "NUM_TightID_DEN_TrackerMuons",
+                    "mu_Iso 2017_UL": "NUM_TightRelIso_DEN_TightIDandIPCut",
+                    "mu_ID_low NUM_TightID_DEN_TrackerMuons": "Efficiency_muon_trackerMuon_Run2017_UL_ID.histo.json",
+                    "mu_Reco_low NUM_TrackerMuons_DEN_genTracks": "Efficiency_muon_generalTracks_Run2017_UL_trackerMuon.histo.json",
+                },
+            },
+        },
     },
 }
diff --git a/env.yml b/env.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - python>=3.8, <3.11
+  - python[version='<=3.10']
   - voms
   - ca-policy-lcg
   - ca-certificates
@@ -15,6 +15,7 @@ dependencies:
   - parsl
   - arrow
   - dask-jobqueue
+  - jupyter
   - p-tqdm
   - pip
   - pip: 

diff --git a/filefetcher/fetch.py b/filefetcher/fetch.py
@@ -78,7 +78,6 @@ def getFilesFromDas(args):
 
 
 def getFilesFromPath(args, lim=None):
-
     fdict = {}
     fset = []
     with open(args.input) as fp:
@@ -102,7 +101,6 @@ def getFilesFromPath(args, lim=None):
 
 
 def getRootFilesFromPath(d, lim=None):
-
     import subprocess
 
     if "xrootd" in d:
@@ -148,14 +146,12 @@ def getRootFilesFromPath(d, lim=None):
 
 
 def main(args):
-
     if args.from_path:
         print("do it from path: ")
 
         fdict = getFilesFromPath(args)
 
     else:
-
         fdict = getFilesFromDas(args)
 
     # print(fdict)