Skip to content

Commit 56bf0aa

Browse files
minimize jupytext-related changes
1 parent 7c983ff commit 56bf0aa

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.py

+21-21
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
# %% [markdown]
4141
# ### Imports: setting up our environment
4242

43-
# %%
43+
# %% tags=[]
4444
import asyncio
4545
import logging
4646
import os
@@ -89,7 +89,7 @@
8989
#
9090
# The input files are all in the 1–3 GB range.
9191

92-
# %%
92+
# %% tags=[]
9393
### GLOBAL CONFIGURATION
9494
# input files per process, set to e.g. 10 (smaller number = faster)
9595
N_FILES_MAX_PER_SAMPLE = 5
@@ -114,7 +114,7 @@
114114
# - calculating systematic uncertainties at the event and object level,
115115
# - filling all the information into histograms that get aggregated and ultimately returned to us by `coffea`.
116116

117-
# %%
117+
# %% tags=[]
118118
# functions creating systematic variations
119119
def flat_variation(ones):
120120
# 2.5% weight variations
@@ -316,7 +316,7 @@ def postprocess(self, accumulator):
316316
#
317317
# Here, we gather all the required information about the files we want to process: paths to the files and asociated metadata.
318318

319-
# %%
319+
# %% tags=[]
320320
fileset = utils.construct_fileset(N_FILES_MAX_PER_SAMPLE, use_xcache=False, af_name=config["benchmarking"]["AF_NAME"]) # local files on /data for ssl-dev
321321

322322
print(f"processes in fileset: {list(fileset.keys())}")
@@ -329,7 +329,7 @@ def postprocess(self, accumulator):
329329
#
330330
# Define the func_adl query to be used for the purpose of extracting columns and filtering.
331331

332-
# %%
332+
# %% tags=[]
333333
def get_query(source: ObjectStream) -> ObjectStream:
334334
"""Query for event / column selection: >=4j >=1b, ==1 lep with pT>25 GeV, return relevant columns
335335
"""
@@ -355,7 +355,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
355355
#
356356
# Using the queries created with `func_adl`, we are using `ServiceX` to read the CMS Open Data files to build cached files with only the specific event information as dictated by the query.
357357

358-
# %%
358+
# %% tags=[]
359359
if USE_SERVICEX:
360360
# dummy dataset on which to generate the query
361361
dummy_ds = ServiceXSourceUpROOT("cernopendata://dummy", "Events", backend_name="uproot")
@@ -385,7 +385,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
385385
#
386386
# When `USE_SERVICEX` is false, the input files need to be processed during this step as well.
387387

388-
# %%
388+
# %% tags=[]
389389
NanoAODSchema.warn_missing_crossrefs = False # silences warnings about branches we will not use here
390390
if USE_DASK:
391391
executor = processor.DaskExecutor(client=utils.get_client(af=config["global"]["AF"]))
@@ -410,7 +410,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
410410

411411
print(f"\nexecution took {exec_time:.2f} seconds")
412412

413-
# %%
413+
# %% tags=[]
414414
# track metrics
415415
dataset_source = "/data" if fileset["ttbar__nominal"]["files"][0].startswith("/data") else "https://xrootd-local.unl.edu:1094" # TODO: xcache support
416416
metrics.update({
@@ -448,15 +448,15 @@ def get_query(source: ObjectStream) -> ObjectStream:
448448
# Let's have a look at the data we obtained.
449449
# We built histograms in two phase space regions, for multiple physics processes and systematic variations.
450450

451-
# %%
451+
# %% tags=[]
452452
utils.set_style()
453453

454454
all_histograms[120j::hist.rebin(2), "4j1b", :, "nominal"].stack("process")[::-1].plot(stack=True, histtype="fill", linewidth=1, edgecolor="grey")
455455
plt.legend(frameon=False)
456456
plt.title(">= 4 jets, 1 b-tag")
457457
plt.xlabel("HT [GeV]");
458458

459-
# %%
459+
# %% tags=[]
460460
all_histograms[:, "4j2b", :, "nominal"].stack("process")[::-1].plot(stack=True, histtype="fill", linewidth=1,edgecolor="grey")
461461
plt.legend(frameon=False)
462462
plt.title(">= 4 jets, >= 2 b-tags")
@@ -471,7 +471,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
471471
#
472472
# We are making of [UHI](https://uhi.readthedocs.io/) here to re-bin.
473473

474-
# %%
474+
# %% tags=[]
475475
# b-tagging variations
476476
all_histograms[120j::hist.rebin(2), "4j1b", "ttbar", "nominal"].plot(label="nominal", linewidth=2)
477477
all_histograms[120j::hist.rebin(2), "4j1b", "ttbar", "btag_var_0_up"].plot(label="NP 1", linewidth=2)
@@ -482,7 +482,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
482482
plt.xlabel("HT [GeV]")
483483
plt.title("b-tagging variations");
484484

485-
# %%
485+
# %% tags=[]
486486
# jet energy scale variations
487487
all_histograms[:, "4j2b", "ttbar", "nominal"].plot(label="nominal", linewidth=2)
488488
all_histograms[:, "4j2b", "ttbar", "pt_scale_up"].plot(label="scale up", linewidth=2)
@@ -497,7 +497,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
497497
# We'll save everything to disk for subsequent usage.
498498
# This also builds pseudo-data by combining events from the various simulation setups we have processed.
499499

500-
# %%
500+
# %% tags=[]
501501
utils.save_histograms(all_histograms, fileset, "histograms.root")
502502

503503
# %% [markdown]
@@ -510,7 +510,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
510510
# A statistical model has been defined in `config.yml`, ready to be used with our output.
511511
# We will use `cabinetry` to combine all histograms into a `pyhf` workspace and fit the resulting statistical model to the pseudodata we built.
512512

513-
# %%
513+
# %% tags=[]
514514
config = cabinetry.configuration.load("cabinetry_config.yml")
515515

516516
# rebinning: lower edge 110 GeV, merge bins 2->1
@@ -523,13 +523,13 @@ def get_query(source: ObjectStream) -> ObjectStream:
523523
# %% [markdown]
524524
# We can inspect the workspace with `pyhf`, or use `pyhf` to perform inference.
525525

526-
# %%
526+
# %% tags=[]
527527
# !pyhf inspect workspace.json | head -n 20
528528

529529
# %% [markdown]
530530
# Let's try out what we built: the next cell will perform a maximum likelihood fit of our statistical model to the pseudodata we built.
531531

532-
# %%
532+
# %% tags=[]
533533
model, data = cabinetry.model_utils.model_and_data(ws)
534534
fit_results = cabinetry.fit.fit(model, data)
535535

@@ -540,31 +540,31 @@ def get_query(source: ObjectStream) -> ObjectStream:
540540
# %% [markdown]
541541
# For this pseudodata, what is the resulting ttbar cross-section divided by the Standard Model prediction?
542542

543-
# %%
543+
# %% tags=[]
544544
poi_index = model.config.poi_index
545545
print(f"\nfit result for ttbar_norm: {fit_results.bestfit[poi_index]:.3f} +/- {fit_results.uncertainty[poi_index]:.3f}")
546546

547547
# %% [markdown]
548548
# Let's also visualize the model before and after the fit, in both the regions we are using.
549549
# The binning here corresponds to the binning used for the fit.
550550

551-
# %%
551+
# %% tags=[]
552552
model_prediction = cabinetry.model_utils.prediction(model)
553553
figs = cabinetry.visualize.data_mc(model_prediction, data, close_figure=True, config=config)
554554
figs[0]["figure"]
555555

556-
# %%
556+
# %% tags=[]
557557
figs[1]["figure"]
558558

559559
# %% [markdown]
560560
# We can see very good post-fit agreement.
561561

562-
# %%
562+
# %% tags=[]
563563
model_prediction_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results)
564564
figs = cabinetry.visualize.data_mc(model_prediction_postfit, data, close_figure=True, config=config)
565565
figs[0]["figure"]
566566

567-
# %%
567+
# %% tags=[]
568568
figs[1]["figure"]
569569

570570
# %% [markdown]

0 commit comments

Comments
 (0)