Skip to content

Commit cce1afb

Browse files
feat: fixes towards v1.2 (#179)
* fix b-tagging threshold comparison * remove event offsetting by 1e-6 in histograms * add updated histogram references
1 parent a03c3fd commit cce1afb

12 files changed

+30157
-30159
lines changed

analyses/cms-open-data-ttbar/reference/histos_100_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_10_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_1_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_200_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_20_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_2_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_50_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_5_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_all_file_per_process.json

+3,348-3,348
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.ipynb

+19-19
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def process(self, events):
212212
event_filters = event_filters & (ak.count(selected_jets.pt * pt_var_modifier, axis=1) >= 4)
213213
# at least one b-tagged jet ("tag" means score above threshold)
214214
B_TAG_THRESHOLD = 0.5
215-
event_filters = event_filters & (ak.sum(selected_jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) >= 1)
215+
event_filters = event_filters & (ak.sum(selected_jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) >= 1)
216216

217217
# apply event filters
218218
selected_events = events[event_filters]
@@ -223,7 +223,7 @@ def process(self, events):
223223
for region in ["4j1b", "4j2b"]:
224224
# further filtering: 4j1b CR with single b-tag, 4j2b SR with two or more tags
225225
if region == "4j1b":
226-
region_filter = ak.sum(selected_jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) == 1
226+
region_filter = ak.sum(selected_jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) == 1
227227
selected_jets_region = selected_jets[region_filter]
228228
# use HT (scalar sum of jet pT) as observable
229229
pt_var_modifier = (
@@ -333,12 +333,12 @@ def postprocess(self, accumulator):
333333
def get_query(source: ObjectStream) -> ObjectStream:
334334
"""Query for event / column selection: >=4j >=1b, ==1 lep with pT>25 GeV, return relevant columns
335335
"""
336-
return source.Where(lambda e: e.Electron_pt.Where(lambda pt: pt > 25).Count()
336+
return source.Where(lambda e: e.Electron_pt.Where(lambda pt: pt > 25).Count()
337337
+ e.Muon_pt.Where(lambda pt: pt > 25).Count() == 1)\
338338
.Where(lambda f: f.Jet_pt.Where(lambda pt: pt > 25).Count() >= 4)\
339-
.Where(lambda g: {"pt": g.Jet_pt,
340-
"btagCSVV2": g.Jet_btagCSVV2}.Zip().Where(lambda jet:
341-
jet.btagCSVV2 >= 0.5
339+
.Where(lambda g: {"pt": g.Jet_pt,
340+
"btagCSVV2": g.Jet_btagCSVV2}.Zip().Where(lambda jet:
341+
jet.btagCSVV2 > 0.5
342342
and jet.pt > 25).Count() >= 1)\
343343
.Select(lambda h: {"Electron_pt": h.Electron_pt,
344344
"Muon_pt": h.Muon_pt,

analyses/cms-open-data-ttbar/utils/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,6 @@ def construct_fileset(n_files_max_per_sample, use_xcache=False, af_name=""):
9393
def save_histograms(all_histograms, fileset, filename):
9494
nominal_samples = [sample for sample in fileset.keys() if "nominal" in sample]
9595

96-
all_histograms += 1e-6 # add minimal event count to all bins to avoid crashes when processing a small number of samples
97-
9896
pseudo_data = (all_histograms[:, :, "ttbar", "ME_var"] + all_histograms[:, :, "ttbar", "PS_var"]) / 2 + all_histograms[:, :, "wjets", "nominal"]
9997

10098
with uproot.recreate(filename) as f:

0 commit comments

Comments
 (0)