Skip to content

Commit 9afdc3e

Browse files
committed
Separate snapshot for variations
1 parent 2dc1013 commit 9afdc3e

File tree

11 files changed

+273
-86
lines changed

11 files changed

+273
-86
lines changed

docs/processor/modules/index.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ Modules for processor
66
:maxdepth: 1
77
:caption: Contents:
88

9-
jme_calculator
9+
jme_calculator
10+
snapshot
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
Snapshot module
3+
====================
4+
5+
.. automodule:: mkShapesRDF.processor.modules.Snapshot
6+
:members:
7+
:undoc-members:
8+
:show-inheritance:
9+
:special-members: __init__

docs/processor/scripts.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ Basic arguments
1919
:module: mkShapesRDF.processor.scripts.mkPostProc
2020
:func: defaultParser
2121
:prog: mkPostProc
22-
:nodefault:
2322

2423

2524
Operation mode 0 arguments
@@ -29,7 +28,6 @@ Operation mode 0 arguments
2928
:module: mkShapesRDF.processor.scripts.mkPostProc
3029
:func: operationMode0Parser
3130
:prog: mkPostProc -o 0 -p PROD -s STEP -sN SAMPLENAME
32-
:nodefault:
3331

3432
Operation mode 1 arguments
3533
~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -38,4 +36,3 @@ Operation mode 1 arguments
3836
:module: mkShapesRDF.processor.scripts.mkPostProc
3937
:func: operationMode1Parser
4038
:prog: mkPostProc -o 1 -p PROD -s STEP -sN SAMPLENAME
41-
:nodefault:

mkShapesRDF/processor/framework/Steps_cfg.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919
"finalSnapshot_DATA",
2020
],
2121
},
22-
"jmeCalculator_18_debug": {
22+
"JES_18": {
2323
"isChain": True,
2424
"do4MC": True,
2525
"do4Data": False,
2626
"subTargets": [
27-
"jmeCalculator_18UL",
27+
"JES_modules_18UL",
2828
"l2Kin",
29-
"finalSnapshot_debugJES",
29+
"finalSnapshot_Variations",
3030
],
3131
},
3232
"MCl1loose2018v9": {
@@ -54,7 +54,7 @@
5454
"do4Data": False,
5555
"subTargets": [
5656
"baseW",
57-
"jmeCalculator_18UL",
57+
"JES_modules_18UL",
5858
# "JERsMCUL",
5959
# # "FatJERsMCUL",
6060
"btagPerJet_DeepCSV_2018UL",
@@ -188,7 +188,7 @@
188188
"declare": "baseW = lambda : BaseW(sampleName, files, xs_db, RPLME_genEventSumw)",
189189
"module": "baseW()",
190190
},
191-
"jmeCalculator_18UL": {
191+
"JES_modules_18UL": {
192192
"isChain": False,
193193
"do4MC": True,
194194
"do4Data": False,
@@ -229,33 +229,40 @@
229229
"declare": 'btagPerJet_DeepJet_2018UL = lambda : btagSFProducerLatinos(2018, "deepJet", ["shape"], "shape", "RPLME_FW/processor/data/jsonpog-integration/POG/BTV/2018_UL/btagging.json.gz", ["jes","jesAbsolute","jesAbsolute_2018","jesBBEC1","jesBBEC1_2018","jesEC2","jesEC2_2018","jesFlavorQCD","jesHF","jesHF_2018","jesRelativeBal","jesRelativeSample_2018"])',
230230
"module": "btagPerJet_DeepJet_2018UL()",
231231
},
232-
"finalSnapshot_debugJES": {
232+
"finalSnapshot_MC": {
233233
"isChain": False,
234234
"do4MC": True,
235-
"do4Data": True,
235+
"do4Data": False,
236236
"import": "mkShapesRDF.processor.modules.Snapshot",
237-
"declare": "snapshot = lambda : Snapshot(['*'], \
238-
'RPLME_EOSPATH', 'RPLME_OUTPUTFILENAME', \
239-
includeVariations=True, splitVariations=True, storeNominals=False )",
237+
"declare": "snapshot = lambda : Snapshot( \
238+
tmpOutputFilename='output.root', \
239+
columns=['*'], \
240+
eosPath='RPLME_EOSPATH', outputFilename='RPLME_OUTPUTFILENAME', \
241+
includeVariations=False, splitVariations=False, storeNominals=True )",
240242
"module": "snapshot()",
241243
},
242-
"finalSnapshot_MC": {
244+
"finalSnapshot_Variations": {
243245
"isChain": False,
244246
"do4MC": True,
245247
"do4Data": False,
246248
"import": "mkShapesRDF.processor.modules.Snapshot",
247-
"declare": "snapshot = lambda : Snapshot(['CleanJet_*', 'Jet_*', \
248-
'Lepton_*', 'Electron_*', 'Muon_*', 'NewMet*', 'baseW', 'genWeight', 'CUT'], \
249-
'RPLME_EOSPATH', 'RPLME_OUTPUTFILENAME', \
250-
includeVariations=False, splitVariations=False, storeNominals=True )",
249+
"declare": "snapshot = lambda : Snapshot( \
250+
tmpOutputFilename='output.root', \
251+
columns=['*'], \
252+
eosPath='RPLME_EOSPATH', outputFilename='RPLME_OUTPUTFILENAME', \
253+
includeVariations=True, splitVariations=True, storeNominals=False )",
251254
"module": "snapshot()",
252255
},
253256
"finalSnapshot_DATA": {
254257
"isChain": False,
255258
"do4MC": False,
256259
"do4Data": True,
257260
"import": "mkShapesRDF.processor.modules.Snapshot",
258-
"declare": "snapshot = lambda : Snapshot('output.root', ['CleanJet_*', 'Jet_*', 'Lepton_*', 'Electron_*', 'Muon_*',, 'CUT'])",
261+
"declare": "snapshot = lambda : Snapshot( \
262+
tmpOutputFilename='output.root', \
263+
columns=['*'], \
264+
eosPath='RPLME_EOSPATH', outputFilename='RPLME_OUTPUTFILENAME', \
265+
includeVariations=False, splitVariations=False, storeNominals=True )",
259266
"module": "snapshot()",
260267
},
261268
}

mkShapesRDF/processor/framework/mRDF.py

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def readRDF(self, *ar, **kw):
103103
self.cols = list(map(lambda k: str(k), self.df.GetColumnNames()))
104104
return self
105105

106-
def Define(self, a, b, includeVariations=True):
106+
def Define(self, a, b, excludeVariations=[]):
107107
r"""Define a new column, if the column already exists redefine it.
108108
109109
Parameters
@@ -114,8 +114,9 @@ def Define(self, a, b, includeVariations=True):
114114
b : str
115115
The expression to be evaluated to define the new column
116116
117-
includeVariations : bool, optional, default: True
118-
Whether to include variations or not
117+
excludeVariations : `list of str`, optional, default: []
118+
List of pattern of variations to exlude. If ``*`` is used, all variations will
119+
be excluded and the defined column will be nominal only.
119120
120121
Returns
121122
-------
@@ -124,8 +125,9 @@ def Define(self, a, b, includeVariations=True):
124125
125126
Notes
126127
-----
127-
If ``includeVariations`` is ``True``, the define expression (``b``) will be checked for variations.
128-
If variations of the define expression are found, they will be defined for the new column as well.
128+
If ``excludeVariations`` is ``[]``, the define expression (``b``) will be checked for all possible variations.
129+
If variations of the define expression are found, they will be defined for the new column as well
130+
(i.e. varied ``b`` will be defined as variations of ``a``).
129131
"""
130132

131133
c = self.Copy()
@@ -138,41 +140,40 @@ def Define(self, a, b, includeVariations=True):
138140
c.df = c.df.Redefine(colName, b)
139141
c.cols = list(set(c.cols + [colName]))
140142

141-
if includeVariations:
142-
# check variations
143-
depVars = ParseCpp.listOfVariables(ParseCpp.parse(b))
144-
variations = {}
145-
for variationName in c.variations.keys():
146-
s = list(
147-
filter(
148-
lambda k: k in depVars, c.variations[variationName]["variables"]
143+
# check variations
144+
depVars = ParseCpp.listOfVariables(ParseCpp.parse(b))
145+
variations = {}
146+
for variationName in c.variations.keys():
147+
if len([1 for x in excludeVariations if fnmatch(variationName, x)]) > 0:
148+
# if variationName matches a pattern of excludeVariations, skip it
149+
continue
150+
151+
s = list(
152+
filter(lambda k: k in depVars, c.variations[variationName]["variables"])
153+
)
154+
if len(s) > 0:
155+
# only register variations if they have an impact on "a" variable
156+
variations[variationName] = {
157+
"tags": c.variations[variationName]["tags"],
158+
"variables": s,
159+
}
160+
161+
for variationName in variations.keys():
162+
varied_bs = []
163+
for tag in variations[variationName]["tags"]:
164+
varied_b = ParseCpp.parse(b)
165+
for variable in variations[variationName]["variables"]:
166+
varied_b = ParseCpp.replace(
167+
varied_b,
168+
variable,
169+
mRDF.variationNaming(variationName, tag, variable),
149170
)
150-
)
151-
if len(s) > 0:
152-
# only register variations if they have an impact on "a" variable
153-
variations[variationName] = {
154-
"tags": c.variations[variationName]["tags"],
155-
"variables": s,
156-
}
157-
158-
for variationName in variations.keys():
159-
varied_bs = []
160-
for tag in variations[variationName]["tags"]:
161-
varied_b = ParseCpp.parse(b)
162-
for variable in variations[variationName]["variables"]:
163-
varied_b = ParseCpp.replace(
164-
varied_b,
165-
variable,
166-
mRDF.variationNaming(variationName, tag, variable),
167-
)
168-
varied_bs.append(ParseCpp.format(varied_b))
169-
_type = c.df.GetColumnType(colName)
170-
expression = (
171-
ParseCpp.RVecExpression(_type) + " {" + ", ".join(varied_bs) + "}"
172-
)
173-
c = c.Vary(
174-
a, expression, variations[variationName]["tags"], variationName
175-
)
171+
varied_bs.append(ParseCpp.format(varied_b))
172+
_type = c.df.GetColumnType(colName)
173+
expression = (
174+
ParseCpp.RVecExpression(_type) + " {" + ", ".join(varied_bs) + "}"
175+
)
176+
c = c.Vary(a, expression, variations[variationName]["tags"], variationName)
176177

177178
# move back nominal value to the right column name -> a
178179
if a not in (c.cols + c.cols_d):
@@ -244,14 +245,14 @@ def Vary(self, colName, expression, variationTags=["down", "up"], variationName=
244245

245246
# define a column that will contain the two variations in a vector of len 2
246247
c = c.Define(
247-
colName + "__" + variationName, expression, includeVariations=False
248+
colName + "__" + variationName, expression, excludeVariations=["*"]
248249
)
249250

250251
for i, variationTag in enumerate(variationTags):
251252
c = c.Define(
252253
mRDF.variationNaming(variationName, variationTag, colName),
253254
colName + "__" + variationName + "[" + str(i) + "]",
254-
includeVariations=False,
255+
excludeVariations=["*"],
255256
)
256257

257258
c = c.DropColumns(colName + "__" + variationName)

mkShapesRDF/processor/framework/processor.py

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,17 +110,19 @@ def getFiles_cfg(self, sampleName):
110110
"""
111111
if self.inputFolder == "":
112112
# if no inputFolder is given -> DAS
113-
return {
113+
d = {
114114
"process": self.Samples[sampleName]["nanoAOD"],
115115
"instance": self.Samples[sampleName].get("instance", ""),
116116
}
117117
else:
118-
return {
119-
"redirector": self.redirector,
118+
d = {
120119
"folder": self.inputFolder,
121120
"process": sampleName,
122121
"isLatino": self.isLatino,
123122
}
123+
if self.redirector != "":
124+
d["redirector"] = self.redirector
125+
return d
124126

125127
def addDeclareLines(self, step):
126128
"""
@@ -160,6 +162,8 @@ def run(self):
160162
self.fPy += "ROOT.EnableImplicitMT()\n"
161163

162164
self.fPy += "from mkShapesRDF.processor.framework.mRDF import mRDF\n"
165+
self.fPy += "import subprocess\n"
166+
self.fPy += "import sys\n"
163167

164168
if Productions[self.prodName]["isData"]:
165169
self.fPy += (
@@ -218,7 +222,29 @@ def run(self):
218222

219223
self.fPy += "sampleName = 'RPLME_SAMPLENAME'\n"
220224

221-
self.fPy += "files = RPLME_FILES\n"
225+
self.fPy += "_files = RPLME_FILES\n"
226+
self.fPy += dedent(
227+
"""
228+
files = []
229+
for f in _files:
230+
filename = f.split('/')[-1]
231+
filename = 'input__' + filename
232+
files.append(filename)
233+
proc = 0
234+
if "root://" in f:
235+
proc = subprocess.Popen(f"xrdcp {f} {filename}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
236+
else:
237+
proc = subprocess.Popen(f"cp {f} {filename}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
238+
239+
out, err = proc.communicate()
240+
print(out.decode('utf-8'))
241+
print(err.decode('utf-8'), file=sys.stderr)
242+
if proc.returncode != 0:
243+
print(f"Error copying file {f}", file=sys.stderr)
244+
sys.exit(1)\n
245+
"""
246+
)
247+
222248
self.fPy += f"ROOT.gInterpreter.Declare('#include \"{frameworkPath}/include/headers.hh\"')\n"
223249

224250
self.fPy += "df = mRDF()\n"
@@ -240,11 +266,15 @@ def run(self):
240266
if len(snapshots) != 0:
241267
ROOT.RDF.RunGraphs(snapshots)
242268
243-
import subprocess
244269
for destination in snapshot_destinations:
270+
copyFromInputFiles = destination[1]
245271
outputFilename = destination[0]
246-
outputFolderPath = destination[1]
247-
outputFilenameEOS = destination[2]
272+
273+
if copyFromInputFiles:
274+
Snapshot.CopyFromInputFiles(outputFilename, files)
275+
276+
outputFolderPath = destination[2]
277+
outputFilenameEOS = destination[3]
248278
249279
# Create output folder
250280
proc = subprocess.Popen(f"mkdir -p {outputFolderPath}", shell=True)
@@ -277,6 +307,14 @@ def sciNot(value):
277307
from tabulate import tabulate
278308
279309
print(tabulate(data, headers=["desc.", "value"]))
310+
311+
for f in files:
312+
print('Removing input file', f)
313+
proc = subprocess.Popen(f"rm {f}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
314+
out, err = proc.communicate()
315+
print(out.decode('utf-8'))
316+
print(err.decode('utf-8'), file=sys.stderr)
317+
280318
"""
281319
)
282320

@@ -305,6 +343,9 @@ def sciNot(value):
305343
for i, sampleName in enumerate(samplesToProcess)
306344
if i not in samplesNotToProcess
307345
]
346+
if len(samplesToProcess) == 0:
347+
print("No samples to process", file=sys.stderr)
348+
sys.exit(1)
308349

309350
for sampleName in samplesToProcess:
310351
files_cfg = self.getFiles_cfg(sampleName)

mkShapesRDF/processor/modules/JMECalculator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def __init__(
1212
JEC_era,
1313
JER_era,
1414
jet_object,
15-
met_collections=["PuppiMET", "MET"],
15+
met_collections=["PuppiMET", "MET", "RawMET"],
1616
do_Jets=True,
1717
do_MET=True,
1818
do_JER=True,

0 commit comments

Comments
 (0)