giorgiopizz
diff --git a/‎docs/processor/modules/index.rst‎
Lines changed: 2 additions & 1 deletion b/‎docs/processor/modules/index.rst‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/processor/modules/snapshot.rst‎
Lines changed: 9 additions & 0 deletions b/‎docs/processor/modules/snapshot.rst‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎docs/processor/scripts.rst‎
Lines changed: 0 additions & 3 deletions b/‎docs/processor/scripts.rst‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎mkShapesRDF/processor/framework/Steps_cfg.py‎
Lines changed: 23 additions & 16 deletions b/‎mkShapesRDF/processor/framework/Steps_cfg.py‎
Lines changed: 23 additions & 16 deletions
diff --git a/‎mkShapesRDF/processor/framework/mRDF.py‎
Lines changed: 42 additions & 41 deletions b/‎mkShapesRDF/processor/framework/mRDF.py‎
Lines changed: 42 additions & 41 deletions
diff --git a/‎mkShapesRDF/processor/framework/processor.py‎
Lines changed: 48 additions & 7 deletions b/‎mkShapesRDF/processor/framework/processor.py‎
Lines changed: 48 additions & 7 deletions
diff --git a/‎mkShapesRDF/processor/modules/JMECalculator.py‎
Lines changed: 1 addition & 1 deletion b/‎mkShapesRDF/processor/modules/JMECalculator.py‎
Lines changed: 1 addition & 1 deletion
@@ -6,4 +6,5 @@ Modules for processor
    :maxdepth: 1
    :caption: Contents:
 
-   jme_calculator
+   jme_calculator
+   snapshot
@@ -0,0 +1,9 @@
+
+Snapshot module
+====================
+
+.. automodule:: mkShapesRDF.processor.modules.Snapshot
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :special-members: __init__
@@ -19,7 +19,6 @@ Basic arguments
    :module: mkShapesRDF.processor.scripts.mkPostProc
    :func: defaultParser
    :prog: mkPostProc
-   :nodefault:
 
 
 Operation mode 0 arguments
@@ -29,7 +28,6 @@ Operation mode 0 arguments
    :module: mkShapesRDF.processor.scripts.mkPostProc
    :func: operationMode0Parser
    :prog: mkPostProc -o 0 -p PROD -s STEP -sN SAMPLENAME
-   :nodefault:
 
 Operation mode 1 arguments
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -38,4 +36,3 @@ Operation mode 1 arguments
    :module: mkShapesRDF.processor.scripts.mkPostProc
    :func: operationMode1Parser
    :prog: mkPostProc -o 1 -p PROD -s STEP -sN SAMPLENAME
-   :nodefault:
@@ -19,14 +19,14 @@
             "finalSnapshot_DATA",
         ],
     },
-    "jmeCalculator_18_debug": {
+    "JES_18": {
         "isChain": True,
         "do4MC": True,
         "do4Data": False,
         "subTargets": [
-            "jmeCalculator_18UL",
+            "JES_modules_18UL",
             "l2Kin",
-            "finalSnapshot_debugJES",
+            "finalSnapshot_Variations",
         ],
     },
     "MCl1loose2018v9": {
@@ -54,7 +54,7 @@
         "do4Data": False,
         "subTargets": [
             "baseW",
-            "jmeCalculator_18UL",
+            "JES_modules_18UL",
             # "JERsMCUL",
             # # "FatJERsMCUL",
             "btagPerJet_DeepCSV_2018UL",
@@ -188,7 +188,7 @@
         "declare": "baseW = lambda : BaseW(sampleName, files, xs_db, RPLME_genEventSumw)",
         "module": "baseW()",
     },
-    "jmeCalculator_18UL": {
+    "JES_modules_18UL": {
         "isChain": False,
         "do4MC": True,
         "do4Data": False,
@@ -229,33 +229,40 @@
         "declare": 'btagPerJet_DeepJet_2018UL = lambda : btagSFProducerLatinos(2018, "deepJet", ["shape"], "shape", "RPLME_FW/processor/data/jsonpog-integration/POG/BTV/2018_UL/btagging.json.gz", ["jes","jesAbsolute","jesAbsolute_2018","jesBBEC1","jesBBEC1_2018","jesEC2","jesEC2_2018","jesFlavorQCD","jesHF","jesHF_2018","jesRelativeBal","jesRelativeSample_2018"])',
         "module": "btagPerJet_DeepJet_2018UL()",
     },
-    "finalSnapshot_debugJES": {
+    "finalSnapshot_MC": {
         "isChain": False,
         "do4MC": True,
-        "do4Data": True,
+        "do4Data": False,
         "import": "mkShapesRDF.processor.modules.Snapshot",
-        "declare": "snapshot = lambda : Snapshot(['*'], \
-            'RPLME_EOSPATH', 'RPLME_OUTPUTFILENAME', \
-            includeVariations=True, splitVariations=True, storeNominals=False )",
+        "declare": "snapshot = lambda : Snapshot( \
+                tmpOutputFilename='output.root', \
+                columns=['*'], \
+                eosPath='RPLME_EOSPATH', outputFilename='RPLME_OUTPUTFILENAME', \
+                includeVariations=False, splitVariations=False, storeNominals=True )",
         "module": "snapshot()",
     },
-    "finalSnapshot_MC": {
+    "finalSnapshot_Variations": {
         "isChain": False,
         "do4MC": True,
         "do4Data": False,
         "import": "mkShapesRDF.processor.modules.Snapshot",
-        "declare": "snapshot = lambda : Snapshot(['CleanJet_*', 'Jet_*', \
-                'Lepton_*', 'Electron_*', 'Muon_*', 'NewMet*', 'baseW', 'genWeight', 'CUT'], \
-                'RPLME_EOSPATH', 'RPLME_OUTPUTFILENAME', \
-                includeVariations=False, splitVariations=False, storeNominals=True )",
+        "declare": "snapshot = lambda : Snapshot( \
+                tmpOutputFilename='output.root', \
+                columns=['*'], \
+                eosPath='RPLME_EOSPATH', outputFilename='RPLME_OUTPUTFILENAME', \
+                includeVariations=True, splitVariations=True, storeNominals=False )",
         "module": "snapshot()",
     },
     "finalSnapshot_DATA": {
         "isChain": False,
         "do4MC": False,
         "do4Data": True,
         "import": "mkShapesRDF.processor.modules.Snapshot",
-        "declare": "snapshot = lambda : Snapshot('output.root', ['CleanJet_*', 'Jet_*', 'Lepton_*', 'Electron_*', 'Muon_*',, 'CUT'])",
+        "declare": "snapshot = lambda : Snapshot( \
+                tmpOutputFilename='output.root', \
+                columns=['*'], \
+                eosPath='RPLME_EOSPATH', outputFilename='RPLME_OUTPUTFILENAME', \
+                includeVariations=False, splitVariations=False, storeNominals=True )",
         "module": "snapshot()",
     },
 }
@@ -103,7 +103,7 @@ def readRDF(self, *ar, **kw):
         self.cols = list(map(lambda k: str(k), self.df.GetColumnNames()))
         return self
 
-    def Define(self, a, b, includeVariations=True):
+    def Define(self, a, b, excludeVariations=[]):
         r"""Define a new column, if the column already exists redefine it.
 
         Parameters
@@ -114,8 +114,9 @@ def Define(self, a, b, includeVariations=True):
         b : str
             The expression to be evaluated to define the new column
 
-        includeVariations : bool, optional, default: True
-            Whether to include variations or not
+        excludeVariations : `list of str`, optional, default: []
+            List of pattern of variations to exlude. If ``*`` is used, all variations will
+            be excluded and the defined column will be nominal only.
 
         Returns
         -------
@@ -124,8 +125,9 @@ def Define(self, a, b, includeVariations=True):
 
         Notes
         -----
-        If ``includeVariations`` is ``True``, the define expression (``b``) will be checked for variations.
-        If variations of the define expression are found, they will be defined for the new column as well.
+        If ``excludeVariations`` is ``[]``, the define expression (``b``) will be checked for all possible variations.
+        If variations of the define expression are found, they will be defined for the new column as well
+        (i.e. varied ``b`` will be defined as variations of ``a``).
         """
 
         c = self.Copy()
@@ -138,41 +140,40 @@ def Define(self, a, b, includeVariations=True):
             c.df = c.df.Redefine(colName, b)
         c.cols = list(set(c.cols + [colName]))
 
-        if includeVariations:
-            # check variations
-            depVars = ParseCpp.listOfVariables(ParseCpp.parse(b))
-            variations = {}
-            for variationName in c.variations.keys():
-                s = list(
-                    filter(
-                        lambda k: k in depVars, c.variations[variationName]["variables"]
+        # check variations
+        depVars = ParseCpp.listOfVariables(ParseCpp.parse(b))
+        variations = {}
+        for variationName in c.variations.keys():
+            if len([1 for x in excludeVariations if fnmatch(variationName, x)]) > 0:
+                # if variationName matches a pattern of excludeVariations, skip it
+                continue
+
+            s = list(
+                filter(lambda k: k in depVars, c.variations[variationName]["variables"])
+            )
+            if len(s) > 0:
+                # only register variations if they have an impact on "a" variable
+                variations[variationName] = {
+                    "tags": c.variations[variationName]["tags"],
+                    "variables": s,
+                }
+
+        for variationName in variations.keys():
+            varied_bs = []
+            for tag in variations[variationName]["tags"]:
+                varied_b = ParseCpp.parse(b)
+                for variable in variations[variationName]["variables"]:
+                    varied_b = ParseCpp.replace(
+                        varied_b,
+                        variable,
+                        mRDF.variationNaming(variationName, tag, variable),
                     )
-                )
-                if len(s) > 0:
-                    # only register variations if they have an impact on "a" variable
-                    variations[variationName] = {
-                        "tags": c.variations[variationName]["tags"],
-                        "variables": s,
-                    }
-
-            for variationName in variations.keys():
-                varied_bs = []
-                for tag in variations[variationName]["tags"]:
-                    varied_b = ParseCpp.parse(b)
-                    for variable in variations[variationName]["variables"]:
-                        varied_b = ParseCpp.replace(
-                            varied_b,
-                            variable,
-                            mRDF.variationNaming(variationName, tag, variable),
-                        )
-                    varied_bs.append(ParseCpp.format(varied_b))
-                _type = c.df.GetColumnType(colName)
-                expression = (
-                    ParseCpp.RVecExpression(_type) + " {" + ", ".join(varied_bs) + "}"
-                )
-                c = c.Vary(
-                    a, expression, variations[variationName]["tags"], variationName
-                )
+                varied_bs.append(ParseCpp.format(varied_b))
+            _type = c.df.GetColumnType(colName)
+            expression = (
+                ParseCpp.RVecExpression(_type) + " {" + ", ".join(varied_bs) + "}"
+            )
+            c = c.Vary(a, expression, variations[variationName]["tags"], variationName)
 
         # move back nominal value to the right column name -> a
         if a not in (c.cols + c.cols_d):
@@ -244,14 +245,14 @@ def Vary(self, colName, expression, variationTags=["down", "up"], variationName=
 
         # define a column that will contain the two variations in a vector of len 2
         c = c.Define(
-            colName + "__" + variationName, expression, includeVariations=False
+            colName + "__" + variationName, expression, excludeVariations=["*"]
         )
 
         for i, variationTag in enumerate(variationTags):
             c = c.Define(
                 mRDF.variationNaming(variationName, variationTag, colName),
                 colName + "__" + variationName + "[" + str(i) + "]",
-                includeVariations=False,
+                excludeVariations=["*"],
             )
 
         c = c.DropColumns(colName + "__" + variationName)
 
@@ -110,17 +110,19 @@ def getFiles_cfg(self, sampleName):
         """
         if self.inputFolder == "":
             # if no inputFolder is given -> DAS
-            return {
+            d = {
                 "process": self.Samples[sampleName]["nanoAOD"],
                 "instance": self.Samples[sampleName].get("instance", ""),
             }
         else:
-            return {
-                "redirector": self.redirector,
+            d = {
                 "folder": self.inputFolder,
                 "process": sampleName,
                 "isLatino": self.isLatino,
             }
+        if self.redirector != "":
+            d["redirector"] = self.redirector
+        return d
 
     def addDeclareLines(self, step):
         """
@@ -160,6 +162,8 @@ def run(self):
             self.fPy += "ROOT.EnableImplicitMT()\n"
 
         self.fPy += "from mkShapesRDF.processor.framework.mRDF import mRDF\n"
+        self.fPy += "import subprocess\n"
+        self.fPy += "import sys\n"
 
         if Productions[self.prodName]["isData"]:
             self.fPy += (
@@ -218,7 +222,29 @@ def run(self):
 
         self.fPy += "sampleName = 'RPLME_SAMPLENAME'\n"
 
-        self.fPy += "files = RPLME_FILES\n"
+        self.fPy += "_files = RPLME_FILES\n"
+        self.fPy += dedent(
+            """
+        files = []
+        for f in _files:
+            filename = f.split('/')[-1]
+            filename = 'input__' + filename
+            files.append(filename)
+            proc = 0
+            if "root://" in f:
+                proc = subprocess.Popen(f"xrdcp {f} {filename}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            else:
+                proc = subprocess.Popen(f"cp {f} {filename}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+            out, err = proc.communicate()
+            print(out.decode('utf-8'))
+            print(err.decode('utf-8'), file=sys.stderr)
+            if proc.returncode != 0:
+                print(f"Error copying file {f}", file=sys.stderr)
+                sys.exit(1)\n
+        """
+        )
+
         self.fPy += f"ROOT.gInterpreter.Declare('#include \"{frameworkPath}/include/headers.hh\"')\n"
 
         self.fPy += "df = mRDF()\n"
@@ -240,11 +266,15 @@ def run(self):
         if len(snapshots) != 0:
             ROOT.RDF.RunGraphs(snapshots)
 
-        import subprocess
         for destination in snapshot_destinations:
+            copyFromInputFiles = destination[1]
             outputFilename = destination[0]
-            outputFolderPath = destination[1]
-            outputFilenameEOS = destination[2]
+
+            if copyFromInputFiles:
+                Snapshot.CopyFromInputFiles(outputFilename, files)
+
+            outputFolderPath = destination[2]
+            outputFilenameEOS = destination[3]
 
             # Create output folder
             proc = subprocess.Popen(f"mkdir -p {outputFolderPath}", shell=True)
@@ -277,6 +307,14 @@ def sciNot(value):
         from tabulate import tabulate
 
         print(tabulate(data, headers=["desc.", "value"]))
+
+        for f in files:
+            print('Removing input file', f)
+            proc = subprocess.Popen(f"rm {f}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            out, err = proc.communicate()
+            print(out.decode('utf-8'))
+            print(err.decode('utf-8'), file=sys.stderr)
+
         """
         )
 
@@ -305,6 +343,9 @@ def sciNot(value):
             for i, sampleName in enumerate(samplesToProcess)
             if i not in samplesNotToProcess
         ]
+        if len(samplesToProcess) == 0:
+            print("No samples to process", file=sys.stderr)
+            sys.exit(1)
 
         for sampleName in samplesToProcess:
             files_cfg = self.getFiles_cfg(sampleName)
 
@@ -12,7 +12,7 @@ def __init__(
         JEC_era,
         JER_era,
         jet_object,
-        met_collections=["PuppiMET", "MET"],
+        met_collections=["PuppiMET", "MET", "RawMET"],
         do_Jets=True,
         do_MET=True,
         do_JER=True,