From cf731a464f63da54c06dc46e3b651ddf77ce1d39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:39:02 -0400
Subject: [PATCH 01/65] package: bump up version to 0.8.0

---
 madminer/__info__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/madminer/__info__.py b/madminer/__info__.py
index 26db542e5..a1f1f09e3 100644
--- a/madminer/__info__.py
+++ b/madminer/__info__.py
@@ -1,3 +1,3 @@
 __authors__ = ", ".join(["Johann Brehmer", "Felix Kling", "Irina Espejo", "Sinclert Perez", "Kyle Cranmer"])
 
-__version__ = "0.7.6"
+__version__ = "0.8.0"

From 358d39fc8c9f6799c6c53aef3c01d2801a01dd2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:02:45 -0400
Subject: [PATCH 02/65] package: update setup.py to Python3.6

---
 setup.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index 150c3bcb5..081a28cdf 100644
--- a/setup.py
+++ b/setup.py
@@ -8,20 +8,21 @@
 
 import os
 import sys
+from pathlib import Path
 from shutil import rmtree
 
 from setuptools import find_packages, setup, Command
 
 
-project_dir = os.path.abspath(os.path.dirname(__file__))
+project_dir = Path(__file__).parent
 
 # Import the README and use it as the long-description.
-with open(os.path.join(project_dir, 'README.md')) as f:
+with open(project_dir.joinpath('README.md')) as f:
     LONG_DESCRIPTION = '\n' + f.read()
 
 # Load the package's __version__.py module as a dictionary.
 info = {}
-with open(os.path.join(project_dir, 'madminer', '__info__.py')) as f:
+with open(project_dir.joinpath('madminer', '__info__.py')) as f:
     exec(f.read(), info)
 
 
@@ -30,17 +31,15 @@
 DESCRIPTION = 'Mining gold from MadGraph to improve limit setting in particle physics.'
 URL = 'https://github.com/diana-hep/madminer'
 EMAIL = 'johann.brehmer@nyu.edu'
-REQUIRES_PYTHON = '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4'
+REQUIRES_PYTHON = '>=3.6, <4'
 AUTHORS = info['__authors__']
 VERSION = info['__version__']
 REQUIRED = [
-    "future",
     "h5py",
     "matplotlib>=2.0.0",
     "numpy>=1.13.0",
     "scipy>=1.0.0",
     "scikit-hep>=0.5.0, <0.6.0",
-    "six",
     "torch>=1.0.0",
     "uproot",
 ]
@@ -129,6 +128,8 @@ def run(self):
         'Programming Language :: Python',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
     ],
     # $ setup.py publish support.
     cmdclass={

From 5f5e116b45814d1471030cd06532fa07a4591343 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 14:29:31 -0400
Subject: [PATCH 03/65] conda: remove Python2 deps.

---
 environment.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 4c31ec743..aef0ad51f 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,7 +10,6 @@ dependencies:
 - h5py
 - scipy>=1.0.0
 - scikit-learn>=0.19.0
-- six
 - pytorch>=1.0.0
 - pytest
 - pip

From 68aa96873bc0988eb4d2eada7e25783cf9b35ee5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:06:25 -0400
Subject: [PATCH 04/65] docker: remove Python2 commands

---
 Dockerfile | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 8f346ddde..637277b29 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,16 +4,10 @@ FROM rootproject/root-ubuntu
 USER root
 
 RUN apt-get update && apt-get install -y \
-    python-dev \
-    python-tk \
     python3-tk \
     python3-pip
 
-# Python2 pip is not longer shiped with Ubuntu (20.04+)
-RUN curl "https://bootstrap.pypa.io/get-pip.py" --output get-pip.py && \
-    python get-pip.py
-
-RUN pip install --upgrade --no-cache-dir pip && \
-    pip install --upgrade --no-cache-dir madminer
+RUN pip3 install --upgrade --no-cache-dir pip && \
+    pip3 install --upgrade --no-cache-dir madminer
 
 WORKDIR /home/

From 8d3c90a36ff6a06332fb55647f51ab1f331192bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:07:33 -0400
Subject: [PATCH 05/65] travis: update CI Python versions

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f19bbe291..ff96403fc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,9 +7,9 @@ services:
 - docker
 language: python
 python:
-- '2.7'
-- '3.5'
 - '3.6'
+- '3.7'
+- '3.8'
 install:
 - pip install -q --no-cache-dir -e .
 - travis_wait pip install -q --no-cache-dir -e .

From e6727900cdf888180040adab1d06cef53bdb3f6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:09:27 -0400
Subject: [PATCH 06/65] tests: remove empty module

---
 .travis.yml           | 1 -
 tests/test_imports.py | 2 --
 2 files changed, 3 deletions(-)
 delete mode 100644 tests/test_imports.py

diff --git a/.travis.yml b/.travis.yml
index ff96403fc..e4398f53b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,6 @@ install:
 - pip install -q --no-cache-dir -e .
 - travis_wait pip install -q --no-cache-dir -e .
 script:
-- pytest tests/test_imports.py
 - pytest -s tests/test_ratio_estimation.py
 - pytest -s tests/test_nuisance.py
 jobs:
diff --git a/tests/test_imports.py b/tests/test_imports.py
deleted file mode 100644
index 1d60b3ca2..000000000
--- a/tests/test_imports.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_imports():
-    assert True

From e1db7ca2d2b3534ad58c7f8367d6bd79e1a31c23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:23:05 -0400
Subject: [PATCH 07/65] tests: drop Python2 support

---
 tests/test_nuisance.py         | 2 --
 tests/test_ratio_estimation.py | 8 ++------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/tests/test_nuisance.py b/tests/test_nuisance.py
index 4ffc2ef49..3c9d6348e 100644
--- a/tests/test_nuisance.py
+++ b/tests/test_nuisance.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import os
 import numpy as np
 from collections import OrderedDict
diff --git a/tests/test_ratio_estimation.py b/tests/test_ratio_estimation.py
index 2037b1860..c6ed1c98c 100644
--- a/tests/test_ratio_estimation.py
+++ b/tests/test_ratio_estimation.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import os
 import numpy as np
 import logging
@@ -139,9 +137,9 @@ def test_ratio_estimation():
     for method, alpha in zip(methods, alphas):
         this_rmses = []
         for sample_size in sample_sizes:
-            print("Training method {} on {} samples".format(method, sample_size))
+            print(f"Training method {method} on {sample_size} samples")
             this_rmses.append(run_test(method, alpha, sample_size))
-            print("  -> MSE =", this_rmses[-1])
+            print(f"  -> MSE = {this_rmses[-1]}")
         rmses.append(this_rmses)
     rmses = np.asarray(rmses)
 
@@ -153,8 +151,6 @@ def test_ratio_estimation():
     for method, this_rmses in zip(methods, rmses):
         print(" {:>6s}  |  {:11.3f}  |  {:11.3f} ".format(method, this_rmses[0], this_rmses[1]))
 
-    print("")
-
     assert np.max(rmses[:, -1]) < 100.0
 
 

From 3756a8d2e6343a307cdb9c8c3a003d68ad33b8bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:25:13 -0400
Subject: [PATCH 08/65] tests: improve fmt

---
 tests/test_ratio_estimation.py | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/tests/test_ratio_estimation.py b/tests/test_ratio_estimation.py
index c6ed1c98c..3da03f5c5 100644
--- a/tests/test_ratio_estimation.py
+++ b/tests/test_ratio_estimation.py
@@ -1,24 +1,21 @@
 import os
 import numpy as np
 import logging
+
+from madminer import ParameterizedRatioEstimator
 from scipy.stats import norm
 
+
 # MadMiner output
 logging.basicConfig(
-    format="%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s", datefmt="%H:%M", level=logging.WARNING
+    format="%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s",
+    datefmt="%H:%M",
+    level=logging.WARNING,
 )
 
-# Output of all other modules (e.g. matplotlib)
-for key in logging.Logger.manager.loggerDict:
-    if "madminer" not in key:
-        logging.getLogger(key).setLevel(logging.WARNING)
-
-from madminer import ParameterizedRatioEstimator
-
 if not os.path.exists("tests/data"):
     os.makedirs("tests/data")
 
-
 # Simulator settings
 z_std = 2.0
 x_std = 1.0
@@ -32,21 +29,20 @@ def simulate(theta, theta0=None, theta1=None, theta_score=None, npoints=None):
     # Draw observable
     x = np.random.normal(loc=z, scale=x_std, size=None)
 
+    r_xz = None
+    t_xz = None
+
     # Calculate joint likelihood ratio and joint score
     if theta0 is not None and theta1 is not None:
         r_xz = norm(loc=theta0, scale=z_std).pdf(z) / norm(loc=theta1, scale=z_std).pdf(z)
-    else:
-        r_xz = None
 
     if theta_score is not None:
         t_xz = (x - theta_score) / z_std ** 2
-    else:
-        t_xz = None
 
     return x, r_xz, t_xz
 
 
-# True likeleihood ratio function
+# True likelihood ratio function
 def calculate_likelihood_ratio(x, theta0, theta1=0.0):
     combined_std = (z_std ** 2 + x_std ** 2) ** 0.5
     r_x = norm(loc=theta0, scale=combined_std).pdf(x) / norm(loc=theta1, scale=combined_std).pdf(x)
@@ -54,7 +50,7 @@ def calculate_likelihood_ratio(x, theta0, theta1=0.0):
 
 
 def generate_data(sample_sizes):
-    # Run simulator and generate etraining data
+    # Run simulator and generate training data
     n_param_points = max(sample_sizes) // 2  # number of parameter points to train
 
     theta0 = np.random.uniform(low=-4.0, high=4.0, size=n_param_points)  # numerator, uniform prior
@@ -116,13 +112,13 @@ def run_test(method, alpha, sample_size):
 
     # Evaluation
     log_r_tests_alices, _ = estimator.evaluate(
-        theta="tests/data/theta_grid.npy", x="tests/data/x_test.npy", evaluate_score=False
+        theta="tests/data/theta_grid.npy",
+        x="tests/data/x_test.npy",
+        evaluate_score=False,
     )
 
     # Calculate error
-    rmse = np.mean((log_r_test_true - log_r_tests_alices) ** 2)
-
-    return rmse
+    return np.mean((log_r_test_true - log_r_tests_alices) ** 2)
 
 
 def test_ratio_estimation():

From e357bad6b537c32eb19a3e746340242614d107fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:35:30 -0400
Subject: [PATCH 09/65] docs: drop Python2 support

---
 docs/conf.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index b1648860c..5c5d52b3d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -15,13 +15,13 @@
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
 
-import os
+from pathlib import Path
 
 # -- Project information -----------------------------------------------------
 
 info = {}
-project_folder = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-with open(os.path.join(project_folder, 'madminer', '__info__.py')) as f:
+project_folder = Path(__file__).parent.parent
+with open(project_folder.joinpath('madminer', '__info__.py')) as f:
     exec(f.read(), info)
 
 project = 'MadMiner'

From 006e3d04b26eee1a7a0ebed44d531651b9619439 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:36:42 -0400
Subject: [PATCH 10/65] docs: improve fmt

---
 docs/conf.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 5c5d52b3d..5f7a534d4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -40,8 +40,7 @@
 # needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
@@ -56,7 +55,6 @@
 # You can specify multiple suffix as a list of string:
 #
 source_suffix = ['.rst', '.md']
-#source_suffix = '.rst'
 
 # Markdown support
 from recommonmark.parser import CommonMarkParser
@@ -107,11 +105,11 @@
 #
 # The default sidebars (for documents that don't match any pattern) are
 # defined by theme itself.  Builtin themes are using these templates by
-# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
-# 'searchbox.html']``.
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html']``.
 #
-# html_sidebars = {"toc.rst"}
-html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] }
+html_sidebars = {
+    '**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'],
+}
 
 
 # -- Options for HTMLHelp output ---------------------------------------------
@@ -154,8 +152,7 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    (master_doc, 'madminer', 'MadMiner Documentation',
-     [authors], 1)
+    (master_doc, 'madminer', 'MadMiner Documentation', [authors], 1),
 ]
 
 

From 97753f4703203fa6028a4854bbd9a686db51f9d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:39:45 -0400
Subject: [PATCH 11/65] package: harmonize imports format

---
 madminer/__init__.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/madminer/__init__.py b/madminer/__init__.py
index c0e936818..e21362299 100644
--- a/madminer/__init__.py
+++ b/madminer/__init__.py
@@ -2,9 +2,20 @@
 from .analysis import DataAnalyzer
 from .core import MadMiner
 from .delphes import DelphesReader
-from .fisherinformation import FisherInformation, InformationGeometry, profile_information, project_information
+from .fisherinformation import (
+    FisherInformation,
+    InformationGeometry,
+    profile_information,
+    project_information,
+)
 from .lhe import LHEReader
-from .likelihood import HistoLikelihood, NeuralLikelihood, fix_params, project_log_likelihood, profile_log_likelihood
+from .likelihood import (
+    HistoLikelihood,
+    NeuralLikelihood,
+    fix_params,
+    project_log_likelihood,
+    profile_log_likelihood,
+)
 from .limits import AsymptoticLimits
 from .ml import (
     ParameterizedRatioEstimator,

From 946f16cad95c1adc2f75a90e1d25f2dd48d878df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:54:15 -0400
Subject: [PATCH 12/65] src: analyzer module f-strings

---
 madminer/analysis/dataanalyzer.py | 61 ++++++++++++++-----------------
 1 file changed, 27 insertions(+), 34 deletions(-)

diff --git a/madminer/analysis/dataanalyzer.py b/madminer/analysis/dataanalyzer.py
index 43a045c6d..974cd2e7d 100644
--- a/madminer/analysis/dataanalyzer.py
+++ b/madminer/analysis/dataanalyzer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 import six
@@ -134,7 +132,7 @@ def event_loader(
             include_nuisance_parameters = self.include_nuisance_parameters
 
         sampling_benchmark = self._find_closest_benchmark(generated_close_to)
-        logger.debug("Sampling benchmark closest to %s: %s", generated_close_to, sampling_benchmark)
+        logger.debug(f"Sampling benchmark closest to {generated_close_to}: {sampling_benchmark}")
 
         if sampling_benchmark is None:
             sampling_factors = self._calculate_sampling_factors()
@@ -218,7 +216,7 @@ def weighted_events(
             x = x[idx]
             weights_benchmarks = weights_benchmarks[idx]
         elif n_draws is not None:
-            logger.warning("Requested %s events, but only %s available", n_draws, n_events)
+            logger.warning(f"Requested {n_draws} events, but only {n_events} available")
 
         # Process and return appropriate weights
         if theta is None:
@@ -312,7 +310,7 @@ def xsecs(
                 partition, test_split, validation_split
             )
         else:
-            raise ValueError("Events has to be either 'all', 'train', or 'test', but got {}!".format(partition))
+            raise ValueError(f"Invalid partition type: {partition}")
 
         # Theta matrices (translation of benchmarks to theta, at nominal nuisance params)
         if thetas is None:
@@ -423,7 +421,7 @@ def xsec_gradients(
             Calculated cross section gradients in pb with shape (n_gradients,).
         """
 
-        logger.debug("Calculating cross section gradients for thetas = %s and nus = %s", thetas, nus)
+        logger.debug(f"Calculating cross section gradients for thetas = {thetas} and nus = {nus}")
 
         # Inputs
         include_nuisance_benchmarks = nus is not None or gradients in ["all", "nu"]
@@ -431,7 +429,7 @@ def xsec_gradients(
             nus = [None for _ in thetas]
         assert len(nus) == len(thetas), "Numbers of thetas and nus don't match!"
         if gradients not in ["all", "theta", "nu"]:
-            raise RuntimeError("Gradients has to be 'all', 'theta', or 'nu', but got {}".format(gradients))
+            raise RuntimeError(f"Invalid gradients type: {gradients}")
 
         # Which events to use
         if partition == "all":
@@ -442,7 +440,7 @@ def xsec_gradients(
                 partition, test_split, validation_split
             )
         else:
-            raise ValueError("Events has to be either 'all', 'train', or 'test', but got {}!".format(partition))
+            raise ValueError(f"Invalid partition type: {partition}")
 
         # Theta matrices (translation of benchmarks to theta, at nominal nuisance params)
         theta_matrices = np.asarray(
@@ -465,7 +463,7 @@ def xsec_gradients(
             )
         ):
             n_batch, _ = benchmark_weights.shape
-            logger.debug("Batch %s with %s events", i_batch + 1, n_batch)
+            logger.debug(f"Batch {i_batch+1} with {n_batch} events")
 
             if gradients in ["all", "theta"]:
                 nom_gradients = mdot(
@@ -512,31 +510,33 @@ def _check_n_events(self):
             )
 
     def _report_setup(self):
-        logger.info("Found %s parameters", self.n_parameters)
-        for i, (key, values) in enumerate(six.iteritems(self.parameters)):
-            logger.info("  %s: %s (%s)", i, key, " / ".join(str(x) for x in values))
+        logger.info(f"Found {self.n_parameters} parameters")
+        for i, (key, values) in enumerate(self.parameters.items()):
+            values_str = " / ".join(str(x) for x in values)
+            logger.info(f"  {i}: {key} ({values_str})")
 
         if self.nuisance_parameters is not None:
-            logger.info("Found %s nuisance parameters", self.n_nuisance_parameters)
-            for i, (key, values) in enumerate(six.iteritems(self.systematics)):
-                logger.info("  %s: %s (%s)", i, key, " / ".join(str(x) for x in values))
+            logger.info(f"Found {self.n_nuisance_parameters} nuisance parameters")
+            for i, (key, values) in enumerate(self.systematics.items()):
+                values_str = " / ".join(str(x) for x in values)
+                logger.info(f"  {i}: {key} ({values_str})")
         else:
             logger.info("Did not find nuisance parameters")
             self.include_nuisance_parameters = False
 
-        logger.info("Found %s benchmarks, of which %s physical", self.n_benchmarks, self.n_benchmarks_phys)
+        logger.info(f"Found {self.n_benchmarks} benchmarks")
         for (key, values), is_nuisance in zip(six.iteritems(self.benchmarks), self.benchmark_is_nuisance):
             if is_nuisance:
                 logger.debug("   %s: systematics", key)
             else:
                 logger.debug("   %s: %s", key, format_benchmark(values))
 
-        logger.info("Found %s observables", self.n_observables)
+        logger.info(f"Found {self.n_observables} observables")
         if self.observables is not None:
             for i, obs in enumerate(self.observables):
                 logger.debug("  %2.2s %s", i, obs)
 
-        logger.info("Found %s events", self.n_samples)
+        logger.info(f"Found {self.n_samples} events")
         if self.n_events_generated_per_benchmark is not None:
             for events, name in zip(self.n_events_generated_per_benchmark, six.iterkeys(self.benchmarks)):
                 if events > 0:
@@ -765,7 +765,7 @@ def _train_test_split(self, train, test_split):
                 end_event = int(round((1.0 - test_split) * self.n_samples, 0))
                 correction_factor = 1.0 / (1.0 - test_split)
                 if end_event < 0 or end_event > self.n_samples:
-                    raise ValueError("Irregular train / test split: sample {} / {}", end_event, self.n_samples)
+                    raise ValueError(f"Irregular split: sample {end_event} / {self.n_samples}")
 
         else:
             if test_split is None or test_split <= 0.0 or test_split >= 1.0:
@@ -775,7 +775,7 @@ def _train_test_split(self, train, test_split):
                 start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1
                 correction_factor = 1.0 / test_split
                 if start_event < 0 or start_event > self.n_samples:
-                    raise ValueError("Irregular train / test split: sample {} / {}", start_event, self.n_samples)
+                    raise ValueError(f"Irregular split: sample {start_event} / {self.n_samples}")
 
             end_event = None
 
@@ -826,9 +826,7 @@ def _train_validation_test_split(self, partition, test_split, validation_split):
                 correction_factor = 1.0 / train_split
 
                 if end_event < 0 or end_event > self.n_samples:
-                    raise ValueError(
-                        "Irregular train / validation / test split: sample {} / {}", end_event, self.n_samples
-                    )
+                    raise ValueError(f"Irregular split: sample {end_event} / {self.n_samples}")
 
         elif partition == "validation":
             if validation_split is None or validation_split <= 0.0 or validation_split >= 1.0:
@@ -842,13 +840,10 @@ def _train_validation_test_split(self, partition, test_split, validation_split):
                 correction_factor = 1.0 / validation_split
 
                 if start_event < 0 or start_event > self.n_samples:
-                    raise ValueError(
-                        "Irregular train / validation / test  split: sample {} / {}", start_event, self.n_samples
-                    )
+                    raise ValueError(f"Irregular split: sample {start_event} / {self.n_samples}")
+
                 if end_event < 0 or end_event > self.n_samples:
-                    raise ValueError(
-                        "Irregular train / validation / test split: sample {} / {}", end_event, self.n_samples
-                    )
+                    raise ValueError(f"Irregular split: sample {end_event} / {self.n_samples}")
 
         elif partition == "test":
             end_event = None
@@ -860,12 +855,10 @@ def _train_validation_test_split(self, partition, test_split, validation_split):
                 start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1
                 correction_factor = 1.0 / test_split
                 if start_event < 0 or start_event > self.n_samples:
-                    raise ValueError(
-                        "Irregular train / validation / test split: sample {} / {}", start_event, self.n_samples
-                    )
+                    raise ValueError(f"Irregular split: sample {start_event} / {self.n_samples}")
 
         else:
-            raise RuntimeError("Unknown partition {}, has to be 'train', 'validation', or 'test'.")
+            raise RuntimeError(f"Unknown partition {partition}")
 
         return start_event, end_event, correction_factor
 
@@ -950,7 +943,7 @@ def _get_dtheta_benchmark_matrix(self, theta, zero_pad=True):
 
     def _calculate_sampling_factors(self):
         events = np.asarray(self.n_events_generated_per_benchmark, dtype=np.float)
-        logger.debug("Events per benchmark: %s", events)
+        logger.debug(f"Events per benchmark: {events}")
         factors = events / np.sum(events)
         factors = np.hstack((factors, 1.0))  # background events
         return factors

From 04af5a8a1acfa213e0fa5d4e65a66a4b6c8d45c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 10:57:53 -0400
Subject: [PATCH 13/65] src: analyzer module improve fmt

---
 madminer/analysis/dataanalyzer.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/madminer/analysis/dataanalyzer.py b/madminer/analysis/dataanalyzer.py
index 974cd2e7d..e5c3e3434 100644
--- a/madminer/analysis/dataanalyzer.py
+++ b/madminer/analysis/dataanalyzer.py
@@ -113,7 +113,7 @@ def event_loader(
             at the closest benchmark point to a given parameter point.
 
         return_sampling_ids : bool, optional
-            If True, the iterator returns the sampling IDs in additioin to observables and weights.
+            If True, the iterator returns the sampling IDs in addition to observables and weights.
 
         Yields
         ------
@@ -206,11 +206,17 @@ def weighted_events(
         """
 
         x, weights_benchmarks = next(
-            self.event_loader(batch_size=None, start=start_event, end=end_event, generated_close_to=generated_close_to)
+            self.event_loader(
+                start=start_event,
+                end=end_event,
+                batch_size=None,
+                generated_close_to=generated_close_to,
+            )
         )
 
         # Pick events randomly
         n_events = len(x)
+
         if n_draws is not None and n_draws < n_events:
             idx = np.random.choice(n_events, n_draws, replace=False)
             x = x[idx]
@@ -232,7 +238,7 @@ def weighted_events(
         else:
             # TODO: nuisance params
             if nu is not None:
-                raise NotImplementedError
+                raise NotImplementedError()
             theta_matrix = self._get_theta_benchmark_matrix(theta)
             weights_theta = mdot(theta_matrix, weights_benchmarks)
             return x, weights_theta
@@ -399,11 +405,14 @@ def xsec_gradients(
              account. Otherwise, the list has to have the same number of elements as thetas, and each entry can specify
              nuisance parameters at nominal value (None) or a value of the nuisance parameters (ndarray).
 
+        partition : {"train", "test", "validation", "all"}, optional
+            Which events to use. Default: "all".
+
         test_split : float, optional
             Fraction of events reserved for testing. Default value: 0.2.
 
-        partition : {"train", "test", "validation", "all"}, optional
-            Which events to use. Default: "all".
+        validation_split : float, optional
+            Fraction of weighted events reserved for validation. Default value: 0.2.
 
         gradients : {"all", "theta", "nu"}, optional
             Which gradients to calculate. Default value: "all".
@@ -446,6 +455,7 @@ def xsec_gradients(
         theta_matrices = np.asarray(
             [self._get_theta_benchmark_matrix(theta) for theta in thetas]
         )  # shape (n_thetas, n_benchmarks)
+
         theta_gradient_matrices = np.asarray(
             [self._get_dtheta_benchmark_matrix(theta) for theta in thetas]
         )  # shape (n_thetas, n_gradients, n_benchmarks)
@@ -640,7 +650,6 @@ def _weights(self, thetas, nus, benchmark_weights, theta_matrices=None):
         n_events, _ = benchmark_weights.shape
 
         # Inputs
-        include_nuisance_benchmarks = nus is not None
         if nus is None:
             nus = [None for _ in thetas]
         assert len(nus) == len(thetas), "Numbers of thetas and nus don't match!"
@@ -935,9 +944,8 @@ def _get_dtheta_benchmark_matrix(self, theta, zero_pad=True):
             if mode == "fd":
                 raise RuntimeError("Cannot calculate score for arbitrary parameter points without morphing setup")
 
-            dtheta_matrix = self.morpher.calculate_morphing_weight_gradient(
-                theta
-            )  # Shape (n_parameters, n_benchmarks_phys)
+            # Shape (n_parameters, n_benchmarks_phys)
+            dtheta_matrix = self.morpher.calculate_morphing_weight_gradient(theta)
 
         return dtheta_matrix
 

From 38c57914262193f580fc9f7d025e7a5d779faab1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 17 Sep 2020 11:36:55 -0400
Subject: [PATCH 14/65] src: analyzer module remove 'six'

---
 madminer/analysis/dataanalyzer.py | 40 +++++++++++++++----------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/madminer/analysis/dataanalyzer.py b/madminer/analysis/dataanalyzer.py
index e5c3e3434..36429c84b 100644
--- a/madminer/analysis/dataanalyzer.py
+++ b/madminer/analysis/dataanalyzer.py
@@ -1,6 +1,5 @@
 import logging
 import numpy as np
-import six
 
 from madminer.utils.interfaces.madminer_hdf5 import load_madminer_settings, madminer_event_loader
 from madminer.utils.morphing import PhysicsMorpher, NuisanceMorpher
@@ -227,7 +226,7 @@ def weighted_events(
         # Process and return appropriate weights
         if theta is None:
             return x, weights_benchmarks
-        elif isinstance(theta, six.string_types):
+        elif isinstance(theta, str):
             i_benchmark = list(self.benchmarks.keys()).index(theta)
             return x, weights_benchmarks[:, i_benchmark]
         elif derivative:
@@ -535,7 +534,7 @@ def _report_setup(self):
             self.include_nuisance_parameters = False
 
         logger.info(f"Found {self.n_benchmarks} benchmarks")
-        for (key, values), is_nuisance in zip(six.iteritems(self.benchmarks), self.benchmark_is_nuisance):
+        for (key, values), is_nuisance in zip(self.benchmarks.items(), self.benchmark_is_nuisance):
             if is_nuisance:
                 logger.debug("   %s: systematics", key)
             else:
@@ -548,7 +547,7 @@ def _report_setup(self):
 
         logger.info(f"Found {self.n_samples} events")
         if self.n_events_generated_per_benchmark is not None:
-            for events, name in zip(self.n_events_generated_per_benchmark, six.iterkeys(self.benchmarks)):
+            for events, name in zip(self.n_events_generated_per_benchmark, self.benchmarks.keys()):
                 if events > 0:
                     logger.info("  %s signal events sampled from benchmark %s", events, name)
             if self.n_events_backgrounds is not None and self.n_events_backgrounds > 0:
@@ -582,17 +581,19 @@ def _finite_differences_theta_gradient_matrices(self):
         matrix = np.zeros(
             (self.n_benchmarks, self.n_parameters, self.n_benchmarks)
         )  # (n_thetas, n_gradients, n_benchmarks)
-        benchmark_names = list(six.iterkeys(self.benchmarks))
 
-        # We'll generally try to find the tupels p, i, j, k such that matrix[i, p, j] = - 1 / eps and matrix[i, p, i] = 1 / eps
+        benchmark_names = list(self.benchmarks.keys())
 
-        for i, benchmark in enumerate(six.iterkeys(self.benchmarks)):
-            # For the FD-shited benchmarks, we assume that the gradients are the same as at the original point, and will just copy the matrix later
+        # We'll generally try to find the tuples p, i, j, k such that
+        # matrix[i, p, j] = - 1 / eps and matrix[i, p, i] = 1 / eps
+        for i, benchmark in enumerate(self.benchmarks.keys()):
+            # For the FD-shifted benchmarks, we assume that the gradients are
+            # the same as at the original point, and will just copy the matrix later
             copy_to = []
             if benchmark not in self.finite_difference_benchmarks:
                 continue
 
-            for p, param in enumerate(six.iterkeys(self.parameters)):
+            for p, param in enumerate(self.parameters.keys()):
                 shifted_benchmark = self.finite_difference_benchmarks[benchmark][param]
                 j = benchmark_names.index(shifted_benchmark)
                 copy_to.append(j)
@@ -872,7 +873,7 @@ def _train_validation_test_split(self, partition, test_split, validation_split):
         return start_event, end_event, correction_factor
 
     def _get_theta_value(self, theta):
-        if isinstance(theta, six.string_types):
+        if isinstance(theta, str):
             benchmark = self.benchmarks[theta]
             theta_value = np.array([benchmark[key] for key in benchmark])
         elif isinstance(theta, int):
@@ -897,7 +898,7 @@ def _get_theta_benchmark_matrix(self, theta, zero_pad=True):
             theta_matrix = np.zeros(self.n_benchmarks)
             theta_matrix[: unpadded_theta_matrix.shape[0]] = unpadded_theta_matrix
 
-        elif isinstance(theta, six.string_types):
+        elif isinstance(theta, str):
             i_benchmark = list(self.benchmarks).index(theta)
             theta_matrix = self._get_theta_benchmark_matrix(i_benchmark)
 
@@ -921,18 +922,18 @@ def _get_dtheta_benchmark_matrix(self, theta, zero_pad=True):
             dtheta_matrix = np.zeros((unpadded_theta_matrix.shape[0], self.n_benchmarks))
             dtheta_matrix[:, : unpadded_theta_matrix.shape[1]] = unpadded_theta_matrix
 
-        elif isinstance(theta, six.string_types) and mode == "morphing":
+        elif isinstance(theta, str) and mode == "morphing":
             benchmark = self.benchmarks[theta]
-            benchmark = np.array([value for _, value in six.iteritems(benchmark)])
+            benchmark = np.array([value for _, value in benchmark.items()])
             dtheta_matrix = self._get_dtheta_benchmark_matrix(benchmark)
 
         elif isinstance(theta, int) and mode == "morphing":
             benchmark = self.benchmarks[list(self.benchmarks.keys())[theta]]
-            benchmark = np.array([value for _, value in six.iteritems(benchmark)])
+            benchmark = np.array([value for _, value in benchmark.items()])
             dtheta_matrix = self._get_dtheta_benchmark_matrix(benchmark)
 
-        elif isinstance(theta, six.string_types):  # finite differences
-            benchmark_id = list(six.iterkeys(self.benchmarks)).index(theta)
+        elif isinstance(theta, str):
+            benchmark_id = list(self.benchmarks.keys()).index(theta)
             dtheta_matrix = self._get_dtheta_benchmark_matrix(benchmark_id)
 
         elif isinstance(theta, int):  # finite differences
@@ -971,7 +972,6 @@ def _find_closest_benchmark(self, theta):
         return closest_idx
 
     def _benchmark_array(self):
-        benchmarks_array = []
-        for benchmark in six.itervalues(self.benchmarks):
-            benchmarks_array.append(list(six.itervalues(benchmark)))
-        return np.asarray(benchmarks_array)
+        return np.asarray([
+            benchmark.values() for benchmark in self.benchmarks.values()
+        ])

From 19f031ffa8da1cc0c2442ba92428d3da796aa54c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 13:50:07 -0400
Subject: [PATCH 15/65] src: core module f-strings

---
 madminer/core/madminer.py | 104 +++++++++++++++++++-------------------
 1 file changed, 53 insertions(+), 51 deletions(-)

diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py
index 4ba3aa051..bdf40788e 100644
--- a/madminer/core/madminer.py
+++ b/madminer/core/madminer.py
@@ -95,17 +95,17 @@ def add_parameter(
 
         # Default names
         if parameter_name is None:
-            parameter_name = "parameter_" + str(len(self.parameters))
+            parameter_name = f"parameter_{len(self.parameters)}"
 
         # Check and sanitize input
-        assert isinstance(parameter_name, six.string_types), "Parameter name is not a string: {}".format(parameter_name)
-        assert isinstance(lha_block, six.string_types), "LHA block is not a string: {}".format(lha_block)
-        assert isinstance(lha_id, int), "LHA id is not an integer: {}".format(lha_id)
+        assert isinstance(parameter_name, six.string_types), f"Parameter name is not a string: {parameter_name}"
+        assert isinstance(lha_block, six.string_types), f"LHA block is not a string: {lha_block}"
+        assert isinstance(lha_id, int), f"LHA id is not an integer: {lha_id}"
 
         parameter_name = parameter_name.replace(" ", "_")
         parameter_name = parameter_name.replace("-", "_")
 
-        assert parameter_name not in self.parameters, "Parameter name exists already: {}".format(parameter_name)
+        assert parameter_name not in self.parameters, f"Parameter name exists already: {parameter_name}"
 
         if isinstance(morphing_max_power, int):
             morphing_max_power = (morphing_max_power,)
@@ -171,11 +171,11 @@ def set_parameters(self, parameters=None):
                 elif len(values) == 2:
                     self.add_parameter(lha_block=values[0], lha_id=values[1], parameter_name=key)
                 else:
-                    raise ValueError("Parameter properties has unexpected length: {0}".format(values))
+                    raise ValueError(f"Parameter properties has unexpected length: {values}")
 
         else:
             for values in parameters:
-                assert len(values) == 2, "Parameter list entry does not have length 2: {0}".format(values)
+                assert len(values) == 2, f"Parameter list entry does not have length 2: {values}"
                 self.add_parameter(values[0], values[1])
 
         # After manually adding parameters, the morphing information is not accurate anymore
@@ -218,18 +218,18 @@ def add_benchmark(self, parameter_values, benchmark_name=None, verbose=True):
 
         # Default names
         if benchmark_name is None:
-            benchmark_name = "benchmark_" + str(len(self.benchmarks))
+            benchmark_name = f"benchmark_{len(self.benchmarks)}"
 
         # Check input
         if not isinstance(parameter_values, dict):
-            raise RuntimeError("Parameter values are not a dict: {}".format(parameter_values))
+            raise RuntimeError(f"Parameter values are not a dict: {parameter_values}")
 
         for key, value in six.iteritems(parameter_values):
             if key not in self.parameters:
-                raise RuntimeError("Unknown parameter: {0}".format(key))
+                raise RuntimeError(f"Unknown parameter: {key}")
 
         if benchmark_name in self.benchmarks:
-            raise RuntimeError("Benchmark name {} exists already".format(benchmark_name))
+            raise RuntimeError(f"Benchmark {benchmark_name} exists already")
 
         # Add benchmark
         self.benchmarks[benchmark_name] = parameter_values
@@ -451,9 +451,9 @@ def add_systematics(
         # Default name
         if systematic_name is None:
             i = 0
-            while "{}_{}".format(effect, i) in list(six.iterkeys(self.systematics)):
+            while f"{effect}_{i}" in list(six.iterkeys(self.systematics)):
                 i += 1
-            systematic_name = "{}_{}".format(type, i)
+            systematic_name = f"{type}_{i}"
         systematic_name = systematic_name.replace(" ", "_")
         systematic_name = systematic_name.replace("-", "_")
 
@@ -466,7 +466,7 @@ def add_systematics(
         elif effect == "norm":
             self.systematics[systematic_name] = ("norm", norm_variation)
         else:
-            raise ValueError("Unknown systematic type {}, has to be one of 'norm', 'scale', or 'pdf'!".format(type))
+            raise ValueError(f"Unknown systematic type: {effect}")
 
     def load(self, filename, disable_morphing=False):
         """
@@ -948,15 +948,19 @@ def run_multiple(
         # Gives 'python2_override' full power if 'initial_command' is empty.
         # (Reference: https://github.com/diana-hep/madminer/issues/422)
         if python2_override and initial_command is None and not python_executable:
-            logger.warning("The keyword python2_override is discouraged. Instead, consider using python_executable.")
+            logger.warning(
+                "The keyword python2_override is discouraged. "
+                "Instead, consider using python_executable."
+            )
+
             logger.info("Adding Python2.7 bin folder to PATH")
             binary_path = os.popen("command -v python2.7").read().strip()
             binary_folder = os.path.dirname(os.path.realpath(binary_path))
-            initial_command = "export PATH={}:$PATH".format(binary_folder)
-            logger.info("Using Python executable %s", binary_path)
+            initial_command = f"export PATH={binary_folder}:$PATH"
+            logger.info(f"Using Python executable {binary_path}")
 
         # Generate process folder
-        log_file_generate = log_directory + "/generate.log"
+        log_file_generate = f"{log_directory}/generate.log"
 
         generate_mg_process(
             mg_directory,
@@ -973,9 +977,9 @@ def run_multiple(
         # Make MadMiner folders
         create_missing_folders(
             [
-                mg_process_directory + "/madminer",
-                mg_process_directory + "/madminer/cards",
-                mg_process_directory + "/madminer/scripts",
+                f"{mg_process_directory}/madminer",
+                f"{mg_process_directory}/madminer/cards",
+                f"{mg_process_directory}/madminer/scripts",
             ]
         )
 
@@ -995,20 +999,20 @@ def run_multiple(
             for sample_benchmark in sample_benchmarks:
 
                 # Files
-                script_file = "madminer/scripts/run_{}.sh".format(i)
-                log_file_run = "run_{}.log".format(i)
-                mg_commands_filename = "madminer/cards/mg_commands_{}.dat".format(i)
-                param_card_file = "madminer/cards/param_card_{}.dat".format(i)
-                reweight_card_file = "madminer/cards/reweight_card_{}.dat".format(i)
+                script_file = f"madminer/scripts/run_{i}.sh"
+                log_file_run = f"run_{i}.log"
+                mg_commands_filename = f"madminer/cards/mg_commands_{i}.dat"
+                param_card_file = f"madminer/cards/param_card_{i}.dat"
+                reweight_card_file = f"madminer/cards/reweight_card_{i}.dat"
                 new_pythia8_card_file = None
                 if pythia8_card_file is not None:
-                    new_pythia8_card_file = "madminer/cards/pythia8_card_{}.dat".format(i)
+                    new_pythia8_card_file = f"madminer/cards/pythia8_card_{i}.dat"
                 new_run_card_file = None
                 if run_card_file is not None:
-                    new_run_card_file = "madminer/cards/run_card_{}.dat".format(i)
+                    new_run_card_file = f"madminer/cards/run_card_{i}.dat"
                 new_configuration_file = None
                 if configuration_file is not None:
-                    new_configuration_file = "madminer/cards/me5_configuration_{}.txt".format(i)
+                    new_configuration_file = f"madminer/cards/me5_configuration_{i}.txt"
 
                 logger.info("Run %s", i)
                 logger.info("  Sampling from benchmark: %s", sample_benchmark)
@@ -1034,26 +1038,26 @@ def run_multiple(
                     param_card_template_file,
                     mg_process_directory,
                     sample_benchmark=sample_benchmark,
-                    param_card_filename=mg_process_directory + "/" + param_card_file,
-                    reweight_card_filename=mg_process_directory + "/" + reweight_card_file,
+                    param_card_filename=f"{mg_process_directory}/{param_card_file}",
+                    reweight_card_filename=f"{mg_process_directory}/{reweight_card_file}",
                 )
 
                 # Create run card
                 if run_card_file is not None:
                     export_run_card(
                         template_filename=run_card_file,
-                        run_card_filename=mg_process_directory + "/" + new_run_card_file,
+                        run_card_filename=f"{mg_process_directory}/{new_run_card_file}",
                         systematics=systematics_used,
                         order=order,
                     )
 
                 # Copy Pythia card
                 if pythia8_card_file is not None:
-                    copy_file(pythia8_card_file, mg_process_directory + "/" + new_pythia8_card_file)
+                    copy_file(pythia8_card_file, f"{mg_process_directory}/{new_pythia8_card_file}")
 
                 # Copy Configuration card
                 if configuration_file is not None:
-                    copy_file(configuration_file, mg_process_directory + "/" + new_configuration_file)
+                    copy_file(configuration_file, f"{mg_process_directory}/{new_configuration_file}")
 
                 # Run MG and Pythia
                 if only_prepare_script:
@@ -1079,15 +1083,15 @@ def run_multiple(
                     run_mg(
                         mg_directory,
                         mg_process_directory,
-                        mg_process_directory + "/" + mg_commands_filename,
-                        mg_process_directory + "/" + new_run_card_file,
-                        mg_process_directory + "/" + param_card_file,
-                        mg_process_directory + "/" + reweight_card_file,
-                        None if new_pythia8_card_file is None else mg_process_directory + "/" + new_pythia8_card_file,
-                        None if new_configuration_file is None else mg_process_directory + "/" + new_configuration_file,
+                        f"{mg_process_directory}/{mg_commands_filename}",
+                        f"{mg_process_directory}/{new_run_card_file}",
+                        f"{mg_process_directory}/{param_card_file}",
+                        f"{mg_process_directory}/{reweight_card_file}",
+                        None if new_pythia8_card_file is None else f"{mg_process_directory}/{new_pythia8_card_file}",
+                        None if new_configuration_file is None else f"{mg_process_directory}/{new_configuration_file}",
                         is_background=is_background,
                         initial_command=initial_command,
-                        log_file=log_directory + "/" + log_file_run,
+                        log_file=f"{log_directory}/{log_file_run}",
                         explicit_python_call=python2_override or (python_executable is not None),
                         python_executable=python_executable,
                         order=order,
@@ -1099,7 +1103,7 @@ def run_multiple(
 
         # Master shell script
         if only_prepare_script:
-            master_script_filename = "{}/madminer/run.sh".format(mg_process_directory)
+            master_script_filename = f"{mg_process_directory}/madminer/run.sh"
             create_master_script(log_directory, master_script_filename, mg_directory, mg_process_directory, mg_scripts)
 
             logger.info(
@@ -1108,9 +1112,7 @@ def run_multiple(
             )
 
         else:
-            expected_event_files = [
-                mg_process_directory + "/Events/run_{:02d}".format(i + 1) for i in range(n_runs_total)
-            ]
+            expected_event_files = [f"{mg_process_directory}/Events/run_{(i+1):02d}" for i in range(n_runs_total)]
             expected_event_files = "\n".join(expected_event_files)
             logger.info(
                 "Finished running MadGraph! Please check that events were succesfully generated in the following "
@@ -1184,9 +1186,9 @@ def reweight_existing_sample(
         # Make MadMiner folders
         create_missing_folders(
             [
-                mg_process_directory + "/madminer",
-                mg_process_directory + "/madminer/cards",
-                mg_process_directory + "/madminer/scripts",
+                f"{mg_process_directory}/madminer",
+                f"{mg_process_directory}/madminer/cards",
+                f"{mg_process_directory}/madminer/scripts",
             ]
         )
 
@@ -1212,7 +1214,7 @@ def reweight_existing_sample(
             param_card_template_file,
             mg_process_directory,
             sample_benchmark=sample_benchmark,
-            reweight_card_filename=mg_process_directory + "/" + reweight_card_file,
+            reweight_card_filename=f"{mg_process_directory}/{reweight_card_file}",
             include_param_card=False,
             benchmarks=missing_benchmarks,
         )
@@ -1235,9 +1237,9 @@ def reweight_existing_sample(
             run_mg_reweighting(
                 mg_process_directory,
                 run_name=run_name,
-                reweight_card_file=mg_process_directory + "/" + reweight_card_file,
+                reweight_card_file=f"{mg_process_directory}/{reweight_card_file}",
                 initial_command=initial_command,
-                log_file=log_directory + "/" + log_file_run,
+                log_file=f"{log_directory}/{log_file_run}",
             )
             logger.info(
                 "Finished running reweighting! Please check that events were succesfully reweighted in the following "

From 2999b874dca88d09bd189787ab4057a9a3b59951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 13:54:40 -0400
Subject: [PATCH 16/65] src: core module improve fmt

---
 madminer/core/madminer.py | 40 ++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py
index bdf40788e..6354e91f0 100644
--- a/madminer/core/madminer.py
+++ b/madminer/core/madminer.py
@@ -111,7 +111,13 @@ def add_parameter(
             morphing_max_power = (morphing_max_power,)
 
         # Add parameter
-        self.parameters[parameter_name] = (lha_block, lha_id, morphing_max_power, parameter_range, param_card_transform)
+        self.parameters[parameter_name] = (
+            lha_block,
+            lha_id,
+            morphing_max_power,
+            parameter_range,
+            param_card_transform,
+        )
 
         # After manually adding parameters, the morphing information is not accurate anymore
         self.morpher = None
@@ -165,11 +171,15 @@ def set_parameters(self, parameters=None):
                         lha_block=values[0],
                         lha_id=values[1],
                         parameter_name=key,
-                        parameter_range=[values[3], values[4]],
+                        parameter_range=(values[3], values[4]),
                         morphing_max_power=values[2],
                     )
                 elif len(values) == 2:
-                    self.add_parameter(lha_block=values[0], lha_id=values[1], parameter_name=key)
+                    self.add_parameter(
+                        lha_block=values[0],
+                        lha_id=values[1],
+                        parameter_name=key,
+                    )
                 else:
                     raise ValueError(f"Parameter properties has unexpected length: {values}")
 
@@ -284,7 +294,12 @@ def set_benchmarks(self, benchmarks=None, verbose=True):
             self.export_morphing = False
 
     def set_morphing(
-        self, max_overall_power=4, n_bases=1, include_existing_benchmarks=True, n_trials=100, n_test_thetas=100
+        self,
+        max_overall_power=4,
+        n_bases=1,
+        include_existing_benchmarks=True,
+        n_trials=100,
+        n_test_thetas=100,
     ):
         """
         Sets up the morphing environment.
@@ -353,7 +368,10 @@ def set_morphing(
         else:
             n_predefined_benchmarks = 0
             basis = morpher.optimize_basis(
-                n_bases=n_bases, fixed_benchmarks_from_madminer=None, n_trials=n_trials, n_test_thetas=n_test_thetas
+                n_bases=n_bases,
+                fixed_benchmarks_from_madminer=None,
+                n_trials=n_trials,
+                n_test_thetas=n_test_thetas,
             )
 
             basis.update(self.benchmarks)
@@ -424,7 +442,7 @@ def add_systematics(
 
         scale : {"mu", "mur", "muf"}, optional
             If type is "scale", this sets whether only the regularization scale ("mur"), only the factorization scale
-            ("muf"), or both simulatenously ("mu") are varied. Default value:
+            ("muf"), or both simultaneously ("mu") are varied. Default value:
             "mu".
 
         norm_variation : float, optional
@@ -1104,8 +1122,13 @@ def run_multiple(
         # Master shell script
         if only_prepare_script:
             master_script_filename = f"{mg_process_directory}/madminer/run.sh"
-            create_master_script(log_directory, master_script_filename, mg_directory, mg_process_directory, mg_scripts)
-
+            create_master_script(
+                log_directory,
+                master_script_filename,
+                mg_directory,
+                mg_process_directory,
+                mg_scripts,
+            )
             logger.info(
                 "To generate events, please run:\n\n %s [MG_directory] [MG_process_directory] [log_dir]\n\n",
                 master_script_filename,
@@ -1129,7 +1152,6 @@ def reweight_existing_sample(
         reweight_benchmarks=None,
         only_prepare_script=False,
         log_directory=None,
-        temp_directory=None,
         initial_command=None,
     ):
         """

From c0f58963f73147e0ab60738f55aaf9a7782b1ad5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 14:05:16 -0400
Subject: [PATCH 17/65] src: core module remove 'six'

---
 madminer/core/madminer.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py
index 6354e91f0..9cafddfc0 100644
--- a/madminer/core/madminer.py
+++ b/madminer/core/madminer.py
@@ -1,10 +1,7 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-import six
-
 import os
 import logging
-from collections import OrderedDict
 import tempfile
+from collections import OrderedDict
 
 from madminer.utils.morphing import PhysicsMorpher
 from madminer.utils.interfaces.madminer_hdf5 import save_madminer_settings, load_madminer_settings
@@ -98,8 +95,8 @@ def add_parameter(
             parameter_name = f"parameter_{len(self.parameters)}"
 
         # Check and sanitize input
-        assert isinstance(parameter_name, six.string_types), f"Parameter name is not a string: {parameter_name}"
-        assert isinstance(lha_block, six.string_types), f"LHA block is not a string: {lha_block}"
+        assert isinstance(parameter_name, str), f"Parameter name is not a string: {parameter_name}"
+        assert isinstance(lha_block, str), f"LHA block is not a string: {lha_block}"
         assert isinstance(lha_id, int), f"LHA id is not an integer: {lha_id}"
 
         parameter_name = parameter_name.replace(" ", "_")
@@ -165,7 +162,7 @@ def set_parameters(self, parameters=None):
         self.parameters = OrderedDict()
 
         if isinstance(parameters, dict):
-            for key, values in six.iteritems(parameters):
+            for key, values in parameters.items():
                 if len(values) == 5:
                     self.add_parameter(
                         lha_block=values[0],
@@ -234,7 +231,7 @@ def add_benchmark(self, parameter_values, benchmark_name=None, verbose=True):
         if not isinstance(parameter_values, dict):
             raise RuntimeError(f"Parameter values are not a dict: {parameter_values}")
 
-        for key, value in six.iteritems(parameter_values):
+        for key, value in parameter_values.items():
             if key not in self.parameters:
                 raise RuntimeError(f"Unknown parameter: {key}")
 
@@ -281,7 +278,7 @@ def set_benchmarks(self, benchmarks=None, verbose=True):
         self.default_benchmark = None
 
         if isinstance(benchmarks, dict):
-            for name, values in six.iteritems(benchmarks):
+            for name, values in benchmarks.items():
                 self.add_benchmark(values, name, verbose=verbose)
         else:
             for values in benchmarks:
@@ -469,7 +466,7 @@ def add_systematics(
         # Default name
         if systematic_name is None:
             i = 0
-            while f"{effect}_{i}" in list(six.iterkeys(self.systematics)):
+            while f"{effect}_{i}" in list(self.systematics.keys()):
                 i += 1
             systematic_name = f"{type}_{i}"
         systematic_name = systematic_name.replace(" ", "_")
@@ -524,7 +521,7 @@ def load(self, filename, disable_morphing=False):
         ) = load_madminer_settings(filename, include_nuisance_benchmarks=False)
 
         logger.info("Found %s parameters:", len(self.parameters))
-        for key, values in six.iteritems(self.parameters):
+        for key, values in self.parameters.items():
             logger.info(
                 "   %s (LHA: %s %s, maximal power in squared ME: %s, range: %s)",
                 key,
@@ -535,7 +532,7 @@ def load(self, filename, disable_morphing=False):
             )
 
         logger.info("Found %s benchmarks:", len(self.benchmarks))
-        for key, values in six.iteritems(self.benchmarks):
+        for key, values in self.benchmarks.items():
             logger.info("   %s: %s", key, format_benchmark(values))
 
             if self.default_benchmark is None:
@@ -562,7 +559,7 @@ def load(self, filename, disable_morphing=False):
         else:
             logger.info("Found systematics setup with %s nuisance parameter groups", len(self.systematics))
 
-            for key, value in six.iteritems(self.systematics):
+            for key, value in self.systematics.items():
                 logger.debug("  %s: %s", key, " / ".join(str(x) for x in value))
 
     def save(self, filename):
@@ -1271,7 +1268,7 @@ def reweight_existing_sample(
             )
 
     def _check_pdf_or_scale_variation(self, systematics):
-        for value in six.itervalues(systematics):
+        for value in systematics.values():
             if value[0] in ["pdf", "scale"]:
                 return True
         return False

From 3ed585f84d6494601ed0ece3e12ba85cc593ab2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 14:27:45 -0400
Subject: [PATCH 18/65] src: delphes module f-strings

---
 madminer/delphes/delphes_reader.py | 55 +++++++++++++-----------------
 1 file changed, 24 insertions(+), 31 deletions(-)

diff --git a/madminer/delphes/delphes_reader.py b/madminer/delphes/delphes_reader.py
index 1f0c8c23d..b64a8b7b0 100644
--- a/madminer/delphes/delphes_reader.py
+++ b/madminer/delphes/delphes_reader.py
@@ -450,25 +450,25 @@ def add_default_observables(
             [n_leptons_max, n_photons_max, n_jets_max], ["l", "a", "j"], [False, False, include_charge]
         ):
             if include_numbers:
-                self.add_observable("n_{}s".format(symbol), "len({})".format(symbol), required=True)
+                self.add_observable(f"n_{symbol}s", f"len({symbol})", required=True)
 
             for i in range(n):
                 self.add_observable(
-                    "e_{}{}".format(symbol, i + 1), "{}[{}].e".format(symbol, i), required=False, default=0.0
+                    f"e_{symbol}{i+1}", f"{symbol}[{i}].e", required=False, default=0.0
                 )
                 self.add_observable(
-                    "pt_{}{}".format(symbol, i + 1), "{}[{}].pt".format(symbol, i), required=False, default=0.0
+                    f"pt_{symbol}{i+1}", f"{symbol}[{i}].pt", required=False, default=0.0
                 )
                 self.add_observable(
-                    "eta_{}{}".format(symbol, i + 1), "{}[{}].eta".format(symbol, i), required=False, default=0.0
+                    f"eta_{symbol}{i+1}", f"{symbol}[{i}].eta", required=False, default=0.0
                 )
                 self.add_observable(
-                    "phi_{}{}".format(symbol, i + 1), "{}[{}].phi()".format(symbol, i), required=False, default=0.0
+                    f"phi_{symbol}{i+1}", f"{symbol}[{i}].phi()", required=False, default=0.0
                 )
                 if include_this_charge and symbol == "l":
                     self.add_observable(
-                        "charge_{}{}".format(symbol, i + 1),
-                        "{}[{}].charge".format(symbol, i),
+                        f"charge_{symbol}{i+1}",
+                        f"{symbol}[{i}].charge",
                         required=False,
                         default=0.0,
                     )
@@ -638,9 +638,8 @@ def analyse_delphes_samples(
             # Following results: check consistency with previous results
             if len(self.observations) != len(this_observations):
                 raise ValueError(
-                    "Number of observations in different Delphes files incompatible: {} vs {}".format(
-                        len(self.observations), len(this_observations)
-                    )
+                    f"Number of observations in different Delphes files incompatible: "
+                    f"{len(self.observations)} vs {len(this_observations)}"
                 )
 
             # Merge weights with previous
@@ -664,7 +663,7 @@ def analyse_delphes_samples(
 
             # Merge observations with previous (should always be the same observables)
             for key in self.observations:
-                assert key in this_observations, "Observable {} not found in Delphes sample!".format(key)
+                assert key in this_observations, f"Observable {key} not found in Delphes sample!"
                 self.observations[key] = np.hstack([self.observations[key], this_observations[key]])
 
             self.events_sampling_benchmark_ids = np.hstack(
@@ -725,11 +724,9 @@ def _analyse_delphes_sample(
                     and (systematics_name, benchmark0, benchmark1) != self.nuisance_parameters[nuisance_parameter_name]
                 ):
                     raise RuntimeError(
-                        "Inconsistent information for same nuisance parameter {}. Old: {}. New: {}.".format(
-                            nuisance_parameter_name,
-                            self.nuisance_parameters[nuisance_parameter_name],
-                            (systematics_name, benchmark0, benchmark1),
-                        )
+                        f"Inconsistent information for same nuisance parameter {nuisance_parameter_name}. "
+                        f"Old: {self.nuisance_parameters[nuisance_parameter_name]}. "
+                        f"New: {(systematics_name, benchmark0, benchmark1)}."
                     )
                 self.nuisance_parameters[nuisance_parameter_name] = (systematics_name, benchmark0, benchmark1)
 
@@ -796,8 +793,8 @@ def _analyse_delphes_sample(
         if k_factor is not None:
             for key in this_weights:
                 this_weights[key] = k_factor * this_weights[key]
-        # Background scenario: we only have one set of weights, but these should be true for all benchmarks
 
+        # Background scenario: we only have one set of weights, but these should be true for all benchmarks
         if is_background:
             logger.debug("Sample is background")
             benchmarks_weight = list(six.itervalues(this_weights))[0]
@@ -847,18 +844,15 @@ def _check_sample_observations(self, this_observations):
 
             if this_n_events != n_events:
                 raise RuntimeError(
-                    "Mismatching number of events in Delphes observations for {}: {} vs {}".format(
-                        key, n_events, this_n_events
-                    )
+                    f"Mismatching number of events in Delphes observations for {key}:"
+                    f"{n_events} vs {this_n_events}"
                 )
 
             if not np.issubdtype(obs.dtype, np.number):
                 logger.warning(
-                    "Observations for observable %s have non-numeric dtype %s. This usually means something "
-                    "is wrong in the definition of the observable. Data: %s",
-                    key,
-                    obs.dtype,
-                    obs,
+                    f"Observations for observable {key} have non-numeric dtype {obs.dtype}."
+                    f"This usually means something is wrong in the observable definition."
+                    f"Data: {obs}"
                 )
         return n_events
 
@@ -873,16 +867,15 @@ def _check_sample_weights(self, n_events, this_weights):
 
             if this_n_events != n_events:
                 raise RuntimeError(
-                    "Mismatching number of events in weights {}: {} vs {}".format(key, n_events, this_n_events)
+                    f"Mismatching number of events in weights {key}:"
+                    f"{n_events} vs {this_n_events}"
                 )
 
             if not np.issubdtype(weights.dtype, np.number):
                 logger.warning(
-                    "Weights %s have non-numeric dtype %s. This usually means something "
-                    "is wrong in the definition of the observable. Data: %s",
-                    key,
-                    weights.dtype,
-                    weights,
+                    f"Weights {key} have non-numeric dtype {weights.dtype}."
+                    f"This usually means something is wrong in the observable definition."
+                    f"Data: {weights}"
                 )
         return n_events
 

From f9273776f39d6356691dacb4a3c98d7686011606 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 14:42:14 -0400
Subject: [PATCH 19/65] src: delphes module improve fmt

---
 madminer/delphes/delphes_reader.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/madminer/delphes/delphes_reader.py b/madminer/delphes/delphes_reader.py
index b64a8b7b0..9aec9b449 100644
--- a/madminer/delphes/delphes_reader.py
+++ b/madminer/delphes/delphes_reader.py
@@ -241,8 +241,13 @@ def run_delphes(self, delphes_directory, delphes_card, initial_command=None, log
                 logger.info("Running Delphes on HepMC sample at %s", hepmc_filename)
 
             delphes_sample_filename = run_delphes(
-                delphes_directory, delphes_card, hepmc_filename, initial_command=initial_command, log_file=log_file
+                delphes_directory=delphes_directory,
+                delphes_card_filename=delphes_card,
+                hepmc_sample_filename=hepmc_filename,
+                initial_command=initial_command,
+                log_file=log_file,
             )
+
             self.delphes_sample_filenames[i] = delphes_sample_filename
 
     def set_acceptance(
@@ -527,7 +532,11 @@ def reset_cuts(self):
         self.cuts_default_pass = []
 
     def analyse_delphes_samples(
-        self, generator_truth=False, delete_delphes_files=False, reference_benchmark=None, parse_lhe_events_as_xml=True
+        self,
+        generator_truth=False,
+        delete_delphes_files=False,
+        reference_benchmark=None,
+        parse_lhe_events_as_xml=True,
     ):
         """
         Main function that parses the Delphes samples (ROOT files), checks acceptance and cuts, and extracts
@@ -674,6 +683,7 @@ def analyse_delphes_samples(
         for name, n_events in zip(self.benchmark_names_phys, self.signal_events_per_benchmark):
             if n_events > 0:
                 logger.info("  %s from %s", n_events, name)
+
         if self.background_events > 0:
             logger.info("  %s from backgrounds", self.background_events)
 
@@ -710,6 +720,7 @@ def _analyse_delphes_sample(
         logger.debug("Extracting nuisance parameter definitions from LHE file")
         systematics_dict = extract_nuisance_parameters_from_lhe_file(lhe_file, systematics_used)
         logger.debug("systematics_dict: %s", systematics_dict)
+
         # systematics_dict has structure
         # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}}
 
@@ -769,11 +780,11 @@ def _analyse_delphes_sample(
             _, this_weights = parse_lhe_file(
                 filename=lhe_file_for_weights,
                 sampling_benchmark=sampling_benchmark,
-                benchmark_names=self.benchmark_names_phys,
                 observables=OrderedDict(),
+                benchmark_names=self.benchmark_names_phys,
+                is_background=is_background,
                 parse_events_as_xml=parse_lhe_events_as_xml,
                 systematics_dict=systematics_dict,
-                is_background=is_background,
             )
 
             logger.debug("Found weights %s in LHE file", list(this_weights.keys()))

From b46ded2618ae3919c2f638e0ccac39dec7306029 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 14:46:58 -0400
Subject: [PATCH 20/65] src: delphes module remove 'six'

---
 madminer/delphes/delphes_reader.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/madminer/delphes/delphes_reader.py b/madminer/delphes/delphes_reader.py
index 9aec9b449..f2720d295 100644
--- a/madminer/delphes/delphes_reader.py
+++ b/madminer/delphes/delphes_reader.py
@@ -1,10 +1,7 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
-from collections import OrderedDict
-import numpy as np
 import logging
+import numpy as np
 import os
+from collections import OrderedDict
 
 from madminer.utils.interfaces.madminer_hdf5 import (
     save_events_to_madminer_file,
@@ -725,10 +722,8 @@ def _analyse_delphes_sample(
         # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}}
 
         # Store nuisance parameters
-        for systematics_name, nuisance_info in six.iteritems(systematics_dict):
-            for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in six.iteritems(
-                nuisance_info
-            ):
+        for systematics_name, nuisance_info in systematics_dict.items():
+            for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in nuisance_info.items():
                 if (
                     self.nuisance_parameters is not None
                     and nuisance_parameter_name in self.nuisance_parameters
@@ -791,7 +786,7 @@ def _analyse_delphes_sample(
 
             # Apply cuts
             logger.debug("Applying Delphes-based cuts to LHE weights")
-            for key, weights in six.iteritems(this_weights):
+            for key, weights in this_weights.items():
                 this_weights[key] = weights[cut_filter]
 
         if this_weights is None:
@@ -808,7 +803,7 @@ def _analyse_delphes_sample(
         # Background scenario: we only have one set of weights, but these should be true for all benchmarks
         if is_background:
             logger.debug("Sample is background")
-            benchmarks_weight = list(six.itervalues(this_weights))[0]
+            benchmarks_weight = list(this_weights.values())[0]
 
             for benchmark_name in self.benchmark_names_phys:
                 this_weights[benchmark_name] = benchmarks_weight
@@ -847,7 +842,7 @@ def _check_sample_observations(self, this_observations):
         """ Sanity checks """
         # Check number of events in observables
         n_events = None
-        for key, obs in six.iteritems(this_observations):
+        for key, obs in this_observations.items():
             this_n_events = len(obs)
             if n_events is None:
                 n_events = this_n_events
@@ -870,7 +865,7 @@ def _check_sample_observations(self, this_observations):
     def _check_sample_weights(self, n_events, this_weights):
         """ Sanity checks """
         # Check number of events in weights
-        for key, weights in six.iteritems(this_weights):
+        for key, weights in this_weights.items():
             this_n_events = len(weights)
             if n_events is None:
                 n_events = this_n_events

From 321955995b5c228ff851f386face2cca849214e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 14:58:31 -0400
Subject: [PATCH 21/65] src: delphes module combine funcs

---
 madminer/delphes/delphes_reader.py | 114 +++++++++++------------------
 1 file changed, 44 insertions(+), 70 deletions(-)

diff --git a/madminer/delphes/delphes_reader.py b/madminer/delphes/delphes_reader.py
index f2720d295..9f5502cc9 100644
--- a/madminer/delphes/delphes_reader.py
+++ b/madminer/delphes/delphes_reader.py
@@ -100,6 +100,48 @@ def __init__(self, filename):
         # Initialize nuisance parameters
         self.nuisance_parameters = OrderedDict()
 
+    @staticmethod
+    def _check_python_syntax(expression):
+        """
+        Evaluates a Python expression to check for syntax errors
+
+        Parameters
+        ----------
+        expression : str
+            Python expression to be evaluated. The evaluation raises either SyntaxError or NameError
+
+        Returns
+        -------
+            None
+        """
+
+        try:
+            eval(expression)
+        except SyntaxError:
+            raise ValueError("The provided Python expression is invalid")
+        except NameError:
+            pass
+
+    @staticmethod
+    def _check_sample_elements(this_elements, n_events=None):
+        """ Sanity checks """
+
+        # Check number of events in observables
+        for key, elems in this_elements.items():
+            this_n_events = len(elems)
+
+            if n_events is None:
+                n_events = this_n_events
+                logger.debug(f"Found {n_events} events")
+
+            if this_n_events != n_events:
+                raise RuntimeError(f"Mismatching number of events for {key}: "f"{n_events} vs {this_n_events}")
+
+            if not np.issubdtype(elems.dtype, np.number):
+                logger.warning(f"For key {key} have non-numeric dtype {elems.dtype}.")
+
+        return n_events
+
     def add_sample(
         self,
         hepmc_filename,
@@ -767,7 +809,7 @@ def _analyse_delphes_sample(
             logger.debug("Did not extract weights from Delphes file")
 
         # Sanity checks
-        n_events = self._check_sample_observations(this_observations)
+        n_events = self._check_sample_elements(this_observations, None)
 
         # Find weights in LHE file
         if lhe_file_for_weights is not None:
@@ -793,7 +835,7 @@ def _analyse_delphes_sample(
             raise RuntimeError("Could not extract weights from Delphes ROOT file or LHE file.")
 
         # Sanity checks
-        n_events = self._check_sample_weights(n_events, this_weights)
+        n_events = self._check_sample_elements(this_weights, n_events)
 
         # k factors
         if k_factor is not None:
@@ -817,74 +859,6 @@ def _analyse_delphes_sample(
 
         return this_observations, this_weights, n_events
 
-    def _check_python_syntax(self, expression):
-        """
-        Evaluates a Python expression to check for syntax errors
-
-        Parameters
-        ----------
-        expression : str
-            Python expression to be evaluated. The evaluation raises either SyntaxError or NameError
-
-        Returns
-        -------
-            None
-        """
-
-        try:
-            eval(expression)
-        except SyntaxError:
-            raise ValueError("The provided Python expression is invalid")
-        except NameError:
-            pass
-
-    def _check_sample_observations(self, this_observations):
-        """ Sanity checks """
-        # Check number of events in observables
-        n_events = None
-        for key, obs in this_observations.items():
-            this_n_events = len(obs)
-            if n_events is None:
-                n_events = this_n_events
-                logger.debug("Found %s events", n_events)
-
-            if this_n_events != n_events:
-                raise RuntimeError(
-                    f"Mismatching number of events in Delphes observations for {key}:"
-                    f"{n_events} vs {this_n_events}"
-                )
-
-            if not np.issubdtype(obs.dtype, np.number):
-                logger.warning(
-                    f"Observations for observable {key} have non-numeric dtype {obs.dtype}."
-                    f"This usually means something is wrong in the observable definition."
-                    f"Data: {obs}"
-                )
-        return n_events
-
-    def _check_sample_weights(self, n_events, this_weights):
-        """ Sanity checks """
-        # Check number of events in weights
-        for key, weights in this_weights.items():
-            this_n_events = len(weights)
-            if n_events is None:
-                n_events = this_n_events
-                logger.debug("Found %s events", n_events)
-
-            if this_n_events != n_events:
-                raise RuntimeError(
-                    f"Mismatching number of events in weights {key}:"
-                    f"{n_events} vs {this_n_events}"
-                )
-
-            if not np.issubdtype(weights.dtype, np.number):
-                logger.warning(
-                    f"Weights {key} have non-numeric dtype {weights.dtype}."
-                    f"This usually means something is wrong in the observable definition."
-                    f"Data: {weights}"
-                )
-        return n_events
-
     def save(self, filename_out, shuffle=True):
         """
         Saves the observable definitions, observable values, and event weights in a MadMiner file. The parameter,

From f34ab126ab2b6f530e712d378b885c8d972b2608 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 15:05:20 -0400
Subject: [PATCH 22/65] src: fisher module f-strings

---
 madminer/fisherinformation/geometry.py    |  3 ++-
 madminer/fisherinformation/information.py | 14 +++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/madminer/fisherinformation/geometry.py b/madminer/fisherinformation/geometry.py
index 218c798bb..0f670b88d 100755
--- a/madminer/fisherinformation/geometry.py
+++ b/madminer/fisherinformation/geometry.py
@@ -359,7 +359,8 @@ def distance_contours(
                 dth0 = np.array([np.cos(angle), np.sin(angle)])
             else:
                 dth0 = np.array([random.uniform(-1, 1) for _ in range(self.dimension)])
-            logger.debug("Calculate Trajectory Number %s with dtheta0=%s", i, dth0)
+
+            logger.debug(f"Calculate Trajectory Number {i} with dtheta0={dth0}")
             ths, ds = self.find_trajectory(theta0, dth0, limits, stepsize)
             for th in ths:
                 thetas.append(th)
diff --git a/madminer/fisherinformation/information.py b/madminer/fisherinformation/information.py
index 1a32c690b..e28328281 100755
--- a/madminer/fisherinformation/information.py
+++ b/madminer/fisherinformation/information.py
@@ -200,8 +200,8 @@ def full_information(
         """
 
         # Check input
-        if mode not in ["score", "information", "modified_score"]:
-            raise ValueError("Unknown mode {}, has to be 'score', 'modified_score', or 'information'!".format(mode))
+        if mode not in {"score", "information", "modified_score"}:
+            raise ValueError(f"Unknown mode {mode}")
 
         # Load Estimator model
         if os.path.isdir(model_file) and os.path.exists(model_file + "/ensemble.json"):
@@ -671,8 +671,8 @@ def histo_information_2d(
             i_bins1 = np.searchsorted(bin1_boundaries, histo1_observables)
             i_bins2 = np.searchsorted(bin2_boundaries, histo2_observables)
 
-            assert ((0 <= i_bins1) & (i_bins1 < n_bins1_total)).all(), "Wrong bin {}".format(i_bins1)
-            assert ((0 <= i_bins2) & (i_bins2 < n_bins1_total)).all(), "Wrong bin {}".format(i_bins2)
+            assert ((0 <= i_bins1) & (i_bins1 < n_bins1_total)).all(), f"Wrong bin {i_bins1}"
+            assert ((0 <= i_bins2) & (i_bins2 < n_bins1_total)).all(), f"Wrong bin {i_bins2}"
 
             # Add up
             for i in range(n_bins1_total):
@@ -823,7 +823,7 @@ def histogram_of_information(
 
                 # Find bins
                 bins = np.searchsorted(bin_boundaries, histo_observables)
-                assert ((0 <= bins) & (bins < n_bins_total)).all(), "Wrong bin {}".format(bins)
+                assert ((0 <= bins) & (bins < n_bins_total)).all(), f"Wrong bin {bins}"
 
                 # Add up
                 for i in range(n_bins_total):
@@ -834,7 +834,7 @@ def histogram_of_information(
         # ML case
         else:
             # Load SALLY model
-            if os.path.isdir(model_file) and os.path.exists(model_file + "/ensemble.json"):
+            if os.path.isdir(model_file) and os.path.exists(f"{model_file}/ensemble.json"):
                 model_is_ensemble = True
                 model = Ensemble()
                 model.load(model_file)
@@ -946,7 +946,7 @@ def histogram_of_information(
 
                 # Find bins
                 bins = np.searchsorted(bin_boundaries, histo_observables)
-                assert ((0 <= bins) & (bins < n_bins_total)).all(), "Wrong bin {}".format(bins)
+                assert ((0 <= bins) & (bins < n_bins_total)).all(), f"Wrong bin {bins}"
 
                 # Add up
                 for i in range(n_bins_total):

From fca8db549d0a03414232cbaadb86acff5e20b1ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 15:17:59 -0400
Subject: [PATCH 23/65] src: fisher module improve fmt

---
 madminer/fisherinformation/geometry.py    |  11 +--
 madminer/fisherinformation/information.py | 102 ++++++++++++++++------
 madminer/fisherinformation/manipulate.py  |   4 +-
 3 files changed, 81 insertions(+), 36 deletions(-)

diff --git a/madminer/fisherinformation/geometry.py b/madminer/fisherinformation/geometry.py
index 0f670b88d..b477ba2bc 100755
--- a/madminer/fisherinformation/geometry.py
+++ b/madminer/fisherinformation/geometry.py
@@ -20,7 +20,7 @@ class InformationGeometry:
     one of the following functions
 
     * `InformationGeometry.information_from_formula()` defines the Fisher Information
-    explicitly as function of the theory paramters `theta`.
+    explicitly as function of the theory parameters `theta`.
     * `InformationGeometry.information_from_grid()` loads a grid of Fisher Informations
     which is then interpolated.
 
@@ -38,7 +38,7 @@ def __init__(self):
     def information_from_formula(self, formula, dimension):
         """
         Explicitly defines the Fisher Information as function of the theory parameter `theta`
-        through a formula that can be avaulated using `eval()`.
+        through a formula that can be evaluated using `eval()`.
 
         Parameters
         ----------
@@ -300,7 +300,7 @@ def distance_contours(
 
         grid_ranges : list of (tuple of float)
             Specifies the boundaries of the parameter grid in which the trajectory
-            is evaulated. It should be `[[min, max], [min, max], ..., [min, max]`,
+            is evaluated. It should be `[[min, max], [min, max], ..., [min, max]`,
             where the list goes over all parameters and `min` and `max` are float.
 
         grid_resolutions : list of int
@@ -342,12 +342,13 @@ def distance_contours(
         """
 
         # automatic setting of stepsize and ntrajectories
-        if stepsize == None:
+        if stepsize is None:
             stepsize = min([(limit[1] - limit[0]) / 20.0 for limit in grid_ranges])
-        if ntrajectories == None:
+        if ntrajectories is None:
             ntrajectories = 20 * self.dimension
         if self.dimension is not 2:
             continous_sampling = False
+
         limits = (1.0 + 2.0 * stepsize) * np.array(grid_ranges)
 
         # determine trajectories
diff --git a/madminer/fisherinformation/information.py b/madminer/fisherinformation/information.py
index e28328281..f1b4d45bc 100755
--- a/madminer/fisherinformation/information.py
+++ b/madminer/fisherinformation/information.py
@@ -16,7 +16,7 @@ class FisherInformation(DataAnalyzer):
     """
     Functions to calculate expected Fisher information matrices.
 
-    After inializing a `FisherInformation` instance with the filename of a MadMiner file, different information matrices
+    After initializing a `FisherInformation` instance with the filename of a MadMiner file, different information matrices
     can be calculated:
 
     * `FisherInformation.truth_information()` calculates the full truth-level Fisher information.
@@ -51,7 +51,12 @@ def __init__(self, filename, include_nuisance_parameters=True):
         super(FisherInformation, self).__init__(filename, False, include_nuisance_parameters)
 
     def truth_information(
-        self, theta, luminosity=300000.0, cuts=None, efficiency_functions=None, include_nuisance_parameters=True
+        self,
+        theta,
+        luminosity=300000.0,
+        cuts=None,
+        efficiency_functions=None,
+        include_nuisance_parameters=True,
     ):
         """
         Calculates the full Fisher information at parton / truth level. This is the information in an idealized
@@ -299,7 +304,9 @@ def full_information(
 
             for i_batch, (observations, weights_benchmarks) in enumerate(
                 self.event_loader(
-                    batch_size=batch_size, start=start_event, include_nuisance_parameters=include_nuisance_parameters
+                    batch_size=batch_size,
+                    start=start_event,
+                    include_nuisance_parameters=include_nuisance_parameters,
                 )
             ):
                 if (i_batch + 1) % n_batches_verbose == 0:
@@ -329,6 +336,7 @@ def full_information(
                             n_events=luminosity * total_xsec * np.sum(weights_theta) / total_sum_weights_theta,
                         )
                     this_covariance = None
+
                 # Sum up results
                 if fisher_info_kin is None:
                     fisher_info_kin = this_fisher_info
@@ -371,7 +379,12 @@ def full_information(
         return fisher_info_rate + fisher_info_kin, rate_covariance
 
     def rate_information(
-        self, theta, luminosity, cuts=None, efficiency_functions=None, include_nuisance_parameters=True
+        self,
+        theta,
+        luminosity,
+        cuts=None,
+        efficiency_functions=None,
+        include_nuisance_parameters=True,
     ):
         """
         Calculates the Fisher information in a measurement of the total cross section (without any kinematic
@@ -527,7 +540,7 @@ def histo_information(
 
             # Find bins
             i_bins = np.searchsorted(bin_boundaries, histo_observables)
-            assert ((0 <= i_bins) & (i_bins < n_bins_total)).all(), "Wrong bin {}".format(i_bins)
+            assert ((0 <= i_bins) & (i_bins < n_bins_total)).all(), f"Wrong bin {i_bins}"
 
             # Add up
             for i in range(n_bins_total):
@@ -637,10 +650,23 @@ def histo_information_2d(
 
         # Binning
         bin1_boundaries, n_bins1_total = self._calculate_binning(
-            bins1, cuts, efficiency_functions, histrange1, n_events_dynamic_binning, observable1, theta
+            bins1,
+            cuts,
+            efficiency_functions,
+            histrange1,
+            n_events_dynamic_binning,
+            observable1,
+            theta,
         )
+
         bin2_boundaries, n_bins2_total = self._calculate_binning(
-            bins2, cuts, efficiency_functions, histrange2, n_events_dynamic_binning, observable2, theta
+            bins2,
+            cuts,
+            efficiency_functions,
+            histrange2,
+            n_events_dynamic_binning,
+            observable2,
+            theta,
         )
 
         # Loop over batches
@@ -879,11 +905,6 @@ def histogram_of_information(
             else:
                 start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1
 
-            if start_event > 0:
-                total_sum_weights_theta = self._calculate_xsec(theta=theta, start_event=start_event)
-            else:
-                total_sum_weights_theta = total_xsec
-
             # Number of batches
             n_batches = int(np.ceil((self.n_samples - start_event) / batch_size))
             n_batches_verbose = max(int(round(n_batches / 10, 0)), 1)
@@ -978,36 +999,36 @@ def histogram_of_sigma_dsigma(self, theta, observable, nbins, histrange, cuts=No
         Parameters
         ----------
         theta : ndarray
-        Parameter point `theta` at which the Fisher information matrix `I_ij(theta)` is evaluated.
+            Parameter point `theta` at which the Fisher information matrix `I_ij(theta)` is evaluated.
 
         observable : str
-        Expression for the observable to be sliced. The str will be parsed by Python's `eval()` function
-        and can use the names of the observables in the MadMiner files.
+            Expression for the observable to be sliced. The str will be parsed by Python's `eval()` function
+            and can use the names of the observables in the MadMiner files.
 
         nbins : int
-        Number of bins in the slicing, excluding overflow bins.
+            Number of bins in the slicing, excluding overflow bins.
 
         histrange : tuple of float
-        Minimum and maximum value of the slicing in the form `(min, max)`. Overflow bins are always added.
+            Minimum and maximum value of the slicing in the form `(min, max)`. Overflow bins are always added.
 
         cuts : None or list of str, optional
-        Cuts. Each entry is a parseable Python expression that returns a bool (True if the event should pass a cut,
-        False otherwise). Default value: None.
+            Cuts. Each entry is a parseable Python expression that returns a bool (True if the event should pass a cut,
+            False otherwise). Default value: None.
 
         efficiency_functions : list of str or None
-        Efficiencies. Each entry is a parseable Python expression that returns a float for the efficiency of one
-        component. Default value: None.
+            Efficiencies. Each entry is a parseable Python expression that returns a float for the efficiency of one
+            component. Default value: None.
 
         Returns
         -------
         bin_boundaries : ndarray
-        Observable slice boundaries.
+            Observable slice boundaries.
 
         sigma_bins : ndarray
-        Cross section in pb in each of the slices.
+            Cross section in pb in each of the slices.
 
         dsigma_bins : ndarray
-        Cross section in pb in each of the slices.
+            Cross section in pb in each of the slices.
         """
 
         # Input
@@ -1071,11 +1092,20 @@ def histogram_of_sigma_dsigma(self, theta, observable, nbins, histrange, cuts=No
     def nuisance_constraint_information(self):
         """ Builds the Fisher information term representing the Gaussian constraints on the nuisance parameters """
 
-        diagonal = np.array([0.0 for _ in range(self.n_parameters)] + [1.0 for _ in range(self.n_nuisance_parameters)])
-        return np.diag(diagonal)
+        return np.diag(
+            np.array(
+                [0.0 for _ in range(self.n_parameters)] +
+                [1.0 for _ in range(self.n_nuisance_parameters)]
+            )
+        )
 
     def _check_binning_stats(
-        self, weights_benchmarks, weights_benchmark_uncertainties, theta, report=5, n_bins_last_axis=None
+        self,
+        weights_benchmarks,
+        weights_benchmark_uncertainties,
+        theta,
+        report=5,
+        n_bins_last_axis=None,
     ):
         theta_matrix = self._get_theta_benchmark_matrix(theta, zero_pad=False)  # (n_benchmarks_phys,)
         sigma = mdot(theta_matrix, weights_benchmarks)  # Shape (n_bins,)
@@ -1098,7 +1128,14 @@ def _check_binning_stats(
             )
 
     def _calculate_binning(
-        self, bins, cuts, efficiency_functions, histrange, n_events_dynamic_binning, observable, theta
+        self,
+        bins,
+        cuts,
+        efficiency_functions,
+        histrange,
+        n_events_dynamic_binning,
+        observable,
+        theta,
     ):
         dynamic_binning = histrange is None and isinstance(bins, int)
         if dynamic_binning:
@@ -1113,6 +1150,7 @@ def _calculate_binning(
         else:
             bin_boundaries = bins
             n_bins_total = len(bins) + 1
+
         return bin_boundaries, n_bins_total
 
     def _calculate_fisher_information(
@@ -1485,7 +1523,13 @@ def _calculate_xsec(
         return xsec
 
     def _calculate_dynamic_binning(
-        self, observable, theta, n_bins, n_events=None, cuts=None, efficiency_functions=None
+        self,
+        observable,
+        theta,
+        n_bins,
+        n_events=None,
+        cuts=None,
+        efficiency_functions=None,
     ):
 
         if cuts is None:
diff --git a/madminer/fisherinformation/manipulate.py b/madminer/fisherinformation/manipulate.py
index d7922f2f8..2c1ed8b0d 100755
--- a/madminer/fisherinformation/manipulate.py
+++ b/madminer/fisherinformation/manipulate.py
@@ -18,7 +18,7 @@ def project_information(fisher_information, remaining_components, covariance=Non
         Original n x n Fisher information.
 
     remaining_components : list of int
-        List with m entries, each an int with 0 <= remaining_compoinents[i] < n. Denotes which parameters are kept, and
+        List with m entries, each an int with 0 <= remaining_components[i] < n. Denotes which parameters are kept, and
         their new order. All other parameters or projected out.
 
     covariance : ndarray or None, optional
@@ -74,7 +74,7 @@ def profile_information(
         Original n x n Fisher information.
 
     remaining_components : list of int
-        List with m entries, each an int with 0 <= remaining_compoinents[i] < n. Denotes which parameters are kept, and
+        List with m entries, each an int with 0 <= remaining_components[i] < n. Denotes which parameters are kept, and
         their new order. All other parameters or profiled out.
 
     covariance : ndarray or None, optional

From 6fb6279a51e5755c9b7c4c7e14f310dd0ac7b72a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 15:19:45 -0400
Subject: [PATCH 24/65] src: fisher module remove 'future'

---
 madminer/fisherinformation/geometry.py    | 3 +--
 madminer/fisherinformation/information.py | 2 --
 madminer/fisherinformation/manipulate.py  | 3 +--
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/madminer/fisherinformation/geometry.py b/madminer/fisherinformation/geometry.py
index b477ba2bc..ad41512d9 100755
--- a/madminer/fisherinformation/geometry.py
+++ b/madminer/fisherinformation/geometry.py
@@ -1,8 +1,7 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 import random
+
 from scipy.interpolate import griddata, LinearNDInterpolator, CloughTocher2DInterpolator
 from scipy.stats import chi2
 
diff --git a/madminer/fisherinformation/information.py b/madminer/fisherinformation/information.py
index f1b4d45bc..4e33e27fe 100755
--- a/madminer/fisherinformation/information.py
+++ b/madminer/fisherinformation/information.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 import os
diff --git a/madminer/fisherinformation/manipulate.py b/madminer/fisherinformation/manipulate.py
index 2c1ed8b0d..9cf331e41 100755
--- a/madminer/fisherinformation/manipulate.py
+++ b/madminer/fisherinformation/manipulate.py
@@ -1,7 +1,6 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
+
 from ..utils.various import separate_information_blocks
 
 logger = logging.getLogger(__name__)

From aa0104fdf6bbeea0fc62b8460c0e9adb9f057ae4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 15:58:40 -0400
Subject: [PATCH 25/65] src: LHE module f-strings

---
 madminer/lhe/lhe_reader.py | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/madminer/lhe/lhe_reader.py b/madminer/lhe/lhe_reader.py
index 48daa9548..154c6187e 100644
--- a/madminer/lhe/lhe_reader.py
+++ b/madminer/lhe/lhe_reader.py
@@ -406,25 +406,25 @@ def add_default_observables(
             [n_leptons_max, n_photons_max, n_jets_max], ["l", "a", "j"], [False, False, include_charge]
         ):
             if include_numbers:
-                self.add_observable("n_{}s".format(symbol), "len({})".format(symbol), required=True)
+                self.add_observable(f"n_{symbol}s", f"len({symbol})", required=True)
 
             for i in range(n):
                 self.add_observable(
-                    "e_{}{}".format(symbol, i + 1), "{}[{}].e".format(symbol, i), required=False, default=0.0
+                    f"e_{symbol}{i+1}", f"{symbol}[{i}].e", required=False, default=0.0
                 )
                 self.add_observable(
-                    "pt_{}{}".format(symbol, i + 1), "{}[{}].pt".format(symbol, i), required=False, default=0.0
+                    f"pt_{symbol}{i+1}", f"{symbol}[{i}].pt", required=False, default=0.0
                 )
                 self.add_observable(
-                    "eta_{}{}".format(symbol, i + 1), "{}[{}].eta".format(symbol, i), required=False, default=0.0
+                    f"eta_{symbol}{i+1}", f"{symbol}[{i}].eta", required=False, default=0.0
                 )
                 self.add_observable(
-                    "phi_{}{}".format(symbol, i + 1), "{}[{}].phi()".format(symbol, i), required=False, default=0.0
+                    f"phi_{symbol}{i+1}", f"{symbol}[{i}].phi()", required=False, default=0.0
                 )
                 if include_this_charge and symbol == "l":
                     self.add_observable(
-                        "charge_{}{}".format(symbol, i + 1),
-                        "{}[{}].charge".format(symbol, i),
+                        f"charge_{symbol}{i+1}",
+                        f"{symbol}[{i}].charge",
                         required=False,
                         default=0.0,
                     )
@@ -605,9 +605,8 @@ def analyse_samples(self, reference_benchmark=None, parse_events_as_xml=True):
             # Following results: check consistency with previous results
             if len(self.observations) != len(this_observations):
                 raise ValueError(
-                    "Number of observations in different Delphes files incompatible: {} vs {}".format(
-                        len(self.observations), len(this_observations)
-                    )
+                    f"Number of observations in different Delphes files incompatible: "
+                    f"{len(self.observations)} vs {len(this_observations)}"
                 )
 
             # Merge weights with previous
@@ -631,7 +630,7 @@ def analyse_samples(self, reference_benchmark=None, parse_events_as_xml=True):
 
             # Merge observations with previous (should always be the same observables)
             for key in self.observations:
-                assert key in this_observations, "Observable {} not found in Delphes sample!".format(key)
+                assert key in this_observations, f"Observable {key} not found in Delphes sample!"
                 self.observations[key] = np.hstack([self.observations[key], this_observations[key]])
 
             self.events_sampling_benchmark_ids = np.hstack(
@@ -680,11 +679,9 @@ def _parse_sample(
                     and (systematics_name, benchmark0, benchmark1) != self.nuisance_parameters[nuisance_parameter_name]
                 ):
                     raise RuntimeError(
-                        "Inconsistent information for same nuisance parameter {}. Old: {}. New: {}.".format(
-                            nuisance_parameter_name,
-                            self.nuisance_parameters[nuisance_parameter_name],
-                            (systematics_name, benchmark0, benchmark1),
-                        )
+                        f"Inconsistent information for same nuisance parameter {nuisance_parameter_name}. "
+                        f"Old: {self.nuisance_parameters[nuisance_parameter_name]}. "
+                        f"New: {(systematics_name, benchmark0, benchmark1)}."
                     )
                 self.nuisance_parameters[nuisance_parameter_name] = (systematics_name, benchmark0, benchmark1)
 

From 1c664c3c111f0f0fae5f359ce6bde535e852ce3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:04:09 -0400
Subject: [PATCH 26/65] src: LHE module improve fmt

---
 madminer/lhe/lhe_reader.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/madminer/lhe/lhe_reader.py b/madminer/lhe/lhe_reader.py
index 154c6187e..bea20ad1e 100644
--- a/madminer/lhe/lhe_reader.py
+++ b/madminer/lhe/lhe_reader.py
@@ -111,7 +111,14 @@ def __init__(self, filename):
         # Initialize nuisance parameters
         self.nuisance_parameters = OrderedDict()
 
-    def add_sample(self, lhe_filename, sampled_from_benchmark, is_background=False, k_factor=1.0, systematics=None):
+    def add_sample(
+        self,
+        lhe_filename,
+        sampled_from_benchmark,
+        is_background=False,
+        k_factor=1.0,
+        systematics=None,
+    ):
         """
         Adds an LHE sample of simulated events.
 
@@ -665,6 +672,7 @@ def _parse_sample(
         logger.debug("Extracting nuisance parameter definitions from LHE file")
         systematics_dict = extract_nuisance_parameters_from_lhe_file(lhe_file, systematics_used)
         logger.debug("systematics_dict: %s", systematics_dict)
+
         # systematics_dict has structure
         # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}}
 

From dfe9ad316a32370af260b4999df4de7a9a424929 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:06:32 -0400
Subject: [PATCH 27/65] src: LHE module remove 'six'

---
 madminer/lhe/lhe_reader.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/madminer/lhe/lhe_reader.py b/madminer/lhe/lhe_reader.py
index bea20ad1e..2ba34cc34 100644
--- a/madminer/lhe/lhe_reader.py
+++ b/madminer/lhe/lhe_reader.py
@@ -1,9 +1,6 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
-from collections import OrderedDict
-import numpy as np
 import logging
+import numpy as np
+from collections import OrderedDict
 
 from madminer.utils.interfaces.madminer_hdf5 import (
     save_events_to_madminer_file,
@@ -677,10 +674,8 @@ def _parse_sample(
         # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}}
 
         # Store nuisance parameters
-        for systematics_name, nuisance_info in six.iteritems(systematics_dict):
-            for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in six.iteritems(
-                nuisance_info
-            ):
+        for systematics_name, nuisance_info in systematics_dict.items():
+            for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in nuisance_info.items():
                 if (
                     self.nuisance_parameters is not None
                     and nuisance_parameter_name in self.nuisance_parameters
@@ -739,7 +734,7 @@ def _check_sample_observations_and_weights(this_observations, this_weights):
 
         # Check number of events in observables, and their dtype
         n_events = None
-        for key, obs in six.iteritems(this_observations):
+        for key, obs in this_observations.items():
             this_n_events = len(obs)
             if n_events is None:
                 n_events = this_n_events
@@ -761,7 +756,7 @@ def _check_sample_observations_and_weights(this_observations, this_weights):
                     obs,
                 )
         # Check number of events in weights, and thier dtype
-        for key, weights in six.iteritems(this_weights):
+        for key, weights in this_weights.items():
             this_n_events = len(weights)
             if n_events is None:
                 n_events = this_n_events

From c100335f57900186229562f2459b3b358f383828 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:08:48 -0400
Subject: [PATCH 28/65] src: LHE module combine funcs

---
 madminer/lhe/lhe_reader.py | 74 ++++++++++++--------------------------
 1 file changed, 23 insertions(+), 51 deletions(-)

diff --git a/madminer/lhe/lhe_reader.py b/madminer/lhe/lhe_reader.py
index 2ba34cc34..d8c091ce7 100644
--- a/madminer/lhe/lhe_reader.py
+++ b/madminer/lhe/lhe_reader.py
@@ -108,6 +108,27 @@ def __init__(self, filename):
         # Initialize nuisance parameters
         self.nuisance_parameters = OrderedDict()
 
+    @staticmethod
+    def _check_sample_elements(this_elements, n_events=None):
+        """ Sanity checks """
+
+        # Check number of events in observables
+        for key, elems in this_elements.items():
+            this_n_events = len(elems)
+
+            if n_events is None:
+                n_events = this_n_events
+                logger.debug(f"Found {n_events} events")
+
+            if this_n_events != n_events:
+                raise RuntimeError(
+                    f"Mismatching number of events for {key}: "f"{n_events} vs {this_n_events}")
+
+            if not np.issubdtype(elems.dtype, np.number):
+                logger.warning(f"For key {key} have non-numeric dtype {elems.dtype}.")
+
+        return n_events
+
     def add_sample(
         self,
         lhe_filename,
@@ -716,8 +737,8 @@ def _parse_sample(
             return None, None
         logger.debug("Found weights %s in LHE file", list(this_weights.keys()))
 
-        # Sanity checks
-        n_events = self._check_sample_observations_and_weights(this_observations, this_weights)
+        n_events = self._check_sample_elements(this_observations, None)
+        n_events = self._check_sample_elements(this_weights, None)
 
         # Rescale nuisance parameters to reference benchmark
         reference_weights = this_weights[reference_benchmark]
@@ -728,55 +749,6 @@ def _parse_sample(
 
         return this_observations, this_weights, n_events
 
-    @staticmethod
-    def _check_sample_observations_and_weights(this_observations, this_weights):
-        """ Sanity checks """
-
-        # Check number of events in observables, and their dtype
-        n_events = None
-        for key, obs in this_observations.items():
-            this_n_events = len(obs)
-            if n_events is None:
-                n_events = this_n_events
-                logger.debug("Found %s events", n_events)
-
-            if this_n_events != n_events:
-                raise RuntimeError(
-                    "Mismatching number of events in LHE observations for {}: {} vs {}".format(
-                        key, n_events, this_n_events
-                    )
-                )
-
-            if not np.issubdtype(obs.dtype, np.number):
-                logger.warning(
-                    "Observations for observable %s have non-numeric dtype %s. This usually means something "
-                    "is wrong in the definition of the observable. Data: %s",
-                    key,
-                    obs.dtype,
-                    obs,
-                )
-        # Check number of events in weights, and thier dtype
-        for key, weights in this_weights.items():
-            this_n_events = len(weights)
-            if n_events is None:
-                n_events = this_n_events
-                logger.debug("Found %s events", n_events)
-
-            if this_n_events != n_events:
-                raise RuntimeError(
-                    "Mismatching number of events in weights {}: {} vs {}".format(key, n_events, this_n_events)
-                )
-
-            if not np.issubdtype(weights.dtype, np.number):
-                logger.warning(
-                    "Weights %s have non-numeric dtype %s. This usually means something "
-                    "is wrong in the definition of the observable. Data: %s",
-                    key,
-                    weights.dtype,
-                    weights,
-                )
-        return n_events
-
     def save(self, filename_out, shuffle=True):
         """
         Saves the observable definitions, observable values, and event weights in a MadMiner file. The parameter,

From 54b42d8cf635ffc7db0e482506ea1db31ea79e3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:24:56 -0400
Subject: [PATCH 29/65] src: likelihood module f-strings

---
 madminer/likelihood/histo.py      | 9 +++------
 madminer/likelihood/manipulate.py | 7 +++----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/madminer/likelihood/histo.py b/madminer/likelihood/histo.py
index 32cf8b0b8..fc3676e3c 100644
--- a/madminer/likelihood/histo.py
+++ b/madminer/likelihood/histo.py
@@ -114,13 +114,10 @@ def create_negative_log_likelihood(
         if n_observed is None:
             n_observed = len(x_observed)
 
-        supported_modes = ["sampled", "weighted", "histo"]
-        if mode not in supported_modes:
-            raise ValueError("Mode %s unknown. Choose one of the following methods: %s", mode, supported_modes)
+        if mode not in {"sampled", "weighted", "histo"}:
+            raise ValueError(f"Mode {mode} unknown.")
         if mode == "histo" and self.n_nuisance_parameters > 0:
-            raise ValueError(
-                "Mode histo is currently not supported in the presence of nuisance parameters. Please use mode weighted or sampled."
-            )
+            raise ValueError("Mode histo is currently not supported in the presence of nuisance parameters")
 
         # Load model - nothing interesting
         if score_components != []:
diff --git a/madminer/likelihood/manipulate.py b/madminer/likelihood/manipulate.py
index 2804392ed..b9c8fd4a6 100644
--- a/madminer/likelihood/manipulate.py
+++ b/madminer/likelihood/manipulate.py
@@ -49,7 +49,7 @@ def constrained_nll(params):
         if len(theta) != len(fixed_components):
             raise ValueError("Length of fixed_components and theta should be the same")
         if len(params) + len(fixed_components) != n_dimension:
-            raise ValueError("Length of params should be %s", n_dimension - len(fixed_components))
+            raise ValueError(f"Length of params should be {n_dimension-len(fixed_components)}")
 
         # Initialize full paramaters
         params_full = np.zeros(n_dimension)
@@ -275,9 +275,8 @@ def profile_log_likelihood(
         dof = m_paramaters
 
     # Method
-    supported_methods = ["TNC", " L-BFGS-B"]
-    if method not in supported_methods:
-        raise ValueError("Method %s unknown. Choose one of the following methods: %s", method, supported_methods)
+    if method not in {"TNC", " L-BFGS-B"}:
+        raise ValueError(f"Method {method} unknown.")
 
     # Initial guess for theta
     if theta_start is None:

From c47843a726522382ba362bab29ea7c3172f39255 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:32:34 -0400
Subject: [PATCH 30/65] src: likelihood module improve fmt

---
 madminer/likelihood/base.py       | 29 ++++++++++++++++++-------
 madminer/likelihood/histo.py      | 35 +++++++++++++++----------------
 madminer/likelihood/manipulate.py |  8 +++----
 madminer/likelihood/neural.py     |  1 +
 4 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/madminer/likelihood/base.py b/madminer/likelihood/base.py
index 3a53b6580..4275195c4 100644
--- a/madminer/likelihood/base.py
+++ b/madminer/likelihood/base.py
@@ -11,13 +11,20 @@
 
 
 class BaseLikelihood(DataAnalyzer):
+
     def create_negative_log_likelihood(self, *args, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def create_expected_negative_log_likelihood(self, *args, **kwargs):
-        raise NotImplementedError
-
-    def _asimov_data(self, theta, test_split=0.2, sample_only_from_closest_benchmark=True, n_asimov=None):
+        raise NotImplementedError()
+
+    def _asimov_data(
+        self,
+        theta,
+        test_split=0.2,
+        sample_only_from_closest_benchmark=True,
+        n_asimov=None,
+    ):
 
         # get data
         start_event, end_event, correction_factor = self._train_test_split(False, test_split)
@@ -39,13 +46,19 @@ def _asimov_data(self, theta, test_split=0.2, sample_only_from_closest_benchmark
         return x, weights_theta
 
     def _log_likelihood(self, *args, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def _log_likelihood_kinematic(self, *args, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def _log_likelihood_poisson(
-        self, n_observed, theta, nu, luminosity=300000.0, weights_benchmarks=None, total_weights=None
+        self,
+        n_observed,
+        theta,
+        nu,
+        luminosity=300000.0,
+        weights_benchmarks=None,
+        total_weights=None,
     ):
         if total_weights is not None and nu is None:
             # `histo` mode: Efficient morphing of whole cross section for the case without nuisance parameters
@@ -72,8 +85,8 @@ def _log_likelihood_poisson(
         if xsec < 0:
             logger.warning("Total cross section is negative (%s pb) at theta=%s)", xsec, theta)
             n_predicted = 10 ** -5
-        n_observed_rounded = int(np.round(n_observed, 0))
 
+        n_observed_rounded = int(np.round(n_observed, 0))
         log_likelihood = poisson.logpmf(k=n_observed_rounded, mu=n_predicted)
 
         logger.debug(
diff --git a/madminer/likelihood/histo.py b/madminer/likelihood/histo.py
index fc3676e3c..1e1822d70 100644
--- a/madminer/likelihood/histo.py
+++ b/madminer/likelihood/histo.py
@@ -15,6 +15,7 @@
 
 
 class HistoLikelihood(BaseLikelihood):
+
     def create_negative_log_likelihood(
         self,
         x_observed,
@@ -33,7 +34,7 @@ def create_negative_log_likelihood(
     ):
         """
         Returns a function which calculates the negative log likelihood for a given
-        parameter point, evaulated with a dataset (x_observed,n_observed,x_observed_weights).
+        parameter point, evaluated with a dataset (x_observed,n_observed,x_observed_weights).
 
         Parameters
         ----------
@@ -64,7 +65,7 @@ def create_negative_log_likelihood(
             Integrated luminosity in pb^{-1} assumed in the analysis. Default value: 300000.
 
         mode : {"weighted" , "sampled", "histo"} , optional
-            If "sampled", for each evaulation of the likelihood function, a separate
+            If "sampled", for each evaluation of the likelihood function, a separate
             set of events are sampled and histogram is created to construct the
             likelihood function. If "weighted", first a set of weighted events is
             sampled which is then used to create histograms. Default value: "sampled"
@@ -120,7 +121,7 @@ def create_negative_log_likelihood(
             raise ValueError("Mode histo is currently not supported in the presence of nuisance parameters")
 
         # Load model - nothing interesting
-        if score_components != []:
+        if score_components:
             assert all([isinstance(score_component, int) for score_component in score_components])
             if model_file is None:
                 raise ValueError("You need to provide a model_file!")
@@ -131,7 +132,7 @@ def create_negative_log_likelihood(
         # Create summary function
         logger.info("Setting up standard summary statistics")
         summary_function = None
-        if observables != []:
+        if observables:
             summary_function = self._make_summary_statistic_function(observables=observables, model=model)
 
         # Weighted sampled
@@ -234,15 +235,15 @@ def create_expected_negative_log_likelihood(
     ):
         """
         Returns a function which calculates the expected negative log likelihood for a given
-        parameter point, evaulated with test data sampled according to theta_true.
+        parameter point, evaluated with test data sampled according to theta_true.
 
         Parameters
         ----------
         theta_true : ndarray
-            Specifies the physical paramaters according to which the test data is sampled.
+            Specifies the physical parameters according to which the test data is sampled.
 
         nu_true : ndarray
-            Specifies the nuisance paramaters according to which the test data is sampled.
+            Specifies the nuisance parameters according to which the test data is sampled.
 
         observables : list of str or None , optional
             Kinematic variables used in the histograms. The names are the same as
@@ -263,7 +264,7 @@ def create_expected_negative_log_likelihood(
             file are used. Default value: None.
 
         mode : {"weighted" , "sampled"} , optional
-            If "sampled", for each evaulation of the likelihood function, a separate
+            If "sampled", for each evaluation of the likelihood function, a separate
             set of events are sampled and histogram is created to construct the
             likelihood function. If "weighted", first a set of weighted events is
             sampled which is then used to create histograms. Default value: "sampled"
@@ -417,9 +418,7 @@ def _log_likelihood_kinematic(
             histo = self._histogram_morphing(theta, benchmark_histograms, hist_bins, bin_centers)
 
         # calculate log-likelihood from histogram
-        log_p = histo.log_likelihood(summary_stats)
-
-        return log_p
+        return histo.log_likelihood(summary_stats)
 
     def _make_summary_statistic_function(self, observables=None, model=None):
         """
@@ -434,7 +433,7 @@ def summary_function(xs):
             if not "score" in x_indices and not "function" in x_indices:
                 return xs[:, x_indices]
 
-            # evaulate some observables using eval() - more slow
+            # evaluate some observables using eval() - more slow
             data_events = []
             for x in xs:
                 data_event = []
@@ -476,6 +475,7 @@ def _find_x_indices(self, observables):
                     x_indices.append(x_names.index(obs))
                 except:
                     x_indices.append("function")
+
         logger.debug("Using x indices %s", x_indices)
         return x_indices
 
@@ -495,9 +495,7 @@ def _make_histo_data_sampled(self, summary_function, theta, n_histo_toys=1000):
             )
 
         # Calculate summary stats
-        data = summary_function(x)
-
-        return data
+        return summary_function(x)
 
     def _make_histo_data_weighted(self, summary_function, n_toys, test_split=None):
         """
@@ -530,6 +528,7 @@ def _find_bins(self, hist_bins, n_summary_stats):
         elif isinstance(hist_bins, int):
             # hist_bins = tuple([hist_bins] * n_summary_stats)
             hist_bins = [hist_bins for _ in range(n_summary_stats)]
+
         return hist_bins
 
     def _fixed_adaptive_binning(
@@ -549,7 +548,9 @@ def _fixed_adaptive_binning(
         # Get weighted data
         if data is None:
             data, weights_benchmarks = self._make_histo_data_weighted(
-                summary_function=summary_function, n_toys=n_toys, test_split=test_split
+                summary_function=summary_function,
+                n_toys=n_toys,
+                test_split=test_split,
             )
 
         # Calculate weights for thetas
@@ -584,8 +585,6 @@ def _histogram_morphing(self, theta, histogram_benchmarks, hist_bins, bin_center
         """
         Low-level function that morphes histograms
         """
-        # get binning
-        hist_nbins = [len(bins) - 1 for bins in hist_bins]
 
         # get array of flattened histograms
         flattened_histo_weights = []
diff --git a/madminer/likelihood/manipulate.py b/madminer/likelihood/manipulate.py
index b9c8fd4a6..d9a1f81f3 100644
--- a/madminer/likelihood/manipulate.py
+++ b/madminer/likelihood/manipulate.py
@@ -27,7 +27,7 @@ def fix_params(negative_log_likelihood, theta, fixed_components=None):
     fixed_components : list of int or None, optional.
         m-dimensional vector of coordinate indices provided in theta.
         `fixed_components=[0,1]` will fix the 1st and 2nd
-        component of the paramater point. If None, uses [0, ..., m-1].
+        component of the parameter point. If None, uses [0, ..., m-1].
 
     Returns
     -------
@@ -51,7 +51,7 @@ def constrained_nll(params):
         if len(params) + len(fixed_components) != n_dimension:
             raise ValueError(f"Length of params should be {n_dimension-len(fixed_components)}")
 
-        # Initialize full paramaters
+        # Initialize full parameters
         params_full = np.zeros(n_dimension)
 
         # fill fixed components
@@ -83,7 +83,7 @@ def project_log_likelihood(
     """
     Takes a likelihood function depending on N parameters, and evaluates
     for a set of M-dimensional parameter points (either grid or explicitly specified)
-    while the remaining N-M paramters are set to zero.
+    while the remaining N-M parameters are set to zero.
 
     Parameters
     ----------
@@ -199,7 +199,7 @@ def profile_log_likelihood(
     """
     Takes a likelihood function depending on N parameters, and evaluates
     for a set of M-dimensional parameter points (either grid or explicitly specified)
-    while the remaining N-M paramters are profiled over.
+    while the remaining N-M parameters are profiled over.
 
     Parameters
     ----------
diff --git a/madminer/likelihood/neural.py b/madminer/likelihood/neural.py
index 49aa89691..e07b5c66b 100644
--- a/madminer/likelihood/neural.py
+++ b/madminer/likelihood/neural.py
@@ -11,6 +11,7 @@
 
 
 class NeuralLikelihood(BaseLikelihood):
+
     def create_negative_log_likelihood(
         self,
         model_file,

From 4deeb3801a3aab0ca42660eb6ffb3bfdad30d9f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:35:25 -0400
Subject: [PATCH 31/65] src: likelihood module remove 'future'

---
 madminer/likelihood/base.py       | 2 --
 madminer/likelihood/histo.py      | 8 +++-----
 madminer/likelihood/manipulate.py | 2 --
 madminer/likelihood/neural.py     | 6 ++----
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/madminer/likelihood/base.py b/madminer/likelihood/base.py
index 4275195c4..42b518ab1 100644
--- a/madminer/likelihood/base.py
+++ b/madminer/likelihood/base.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 from scipy.stats import poisson, norm
diff --git a/madminer/likelihood/histo.py b/madminer/likelihood/histo.py
index 1e1822d70..9c37c6fea 100644
--- a/madminer/likelihood/histo.py
+++ b/madminer/likelihood/histo.py
@@ -1,15 +1,13 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 from itertools import product
 
-from ..utils.various import mdot, less_logging, math_commands
+from .base import BaseLikelihood
+from .. import sampling
 from ..ml import ScoreEstimator, Ensemble, load_estimator
 from ..utils.histo import Histo
+from ..utils.various import mdot, less_logging, math_commands
 from ..sampling import SampleAugmenter
-from .. import sampling
-from .base import BaseLikelihood
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/likelihood/manipulate.py b/madminer/likelihood/manipulate.py
index d9a1f81f3..aeb8ae252 100644
--- a/madminer/likelihood/manipulate.py
+++ b/madminer/likelihood/manipulate.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 import time
diff --git a/madminer/likelihood/neural.py b/madminer/likelihood/neural.py
index e07b5c66b..fcfa08893 100644
--- a/madminer/likelihood/neural.py
+++ b/madminer/likelihood/neural.py
@@ -1,11 +1,9 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 
-from ..utils.various import less_logging
-from ..ml import ParameterizedRatioEstimator, Ensemble, LikelihoodEstimator, load_estimator
 from .base import BaseLikelihood
+from ..ml import ParameterizedRatioEstimator, Ensemble, LikelihoodEstimator, load_estimator
+from ..utils.various import less_logging
 
 logger = logging.getLogger(__name__)
 

From 4117194e56898e01bf6f80580272ed4d2aa2d011 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:41:40 -0400
Subject: [PATCH 32/65] src: limits module f-strings

---
 madminer/limits/asymptotic_limits.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/madminer/limits/asymptotic_limits.py b/madminer/limits/asymptotic_limits.py
index 21ac66eb0..72829f9f5 100644
--- a/madminer/limits/asymptotic_limits.py
+++ b/madminer/limits/asymptotic_limits.py
@@ -690,7 +690,7 @@ def _analyse(
             log_r_kin = n_events * np.sum(log_r_kin * obs_weights[np.newaxis, :], axis=1)
 
         else:
-            raise ValueError("Unknown mode {}, has to be 'ml' or 'histo' or 'xsec'".format(mode))
+            raise ValueError(f"Unknown mode {mode}")
 
         # xsec part
         if include_xsec:
@@ -783,7 +783,7 @@ def summary_function(x):
                     return score
 
         else:
-            raise RuntimeError("Unknown mode {}, has to be 'observables' or 'sally'".format(mode))
+            raise RuntimeError(f"Unknown mode {mode}")
 
         return summary_function
 
@@ -855,7 +855,7 @@ def processor(scores, theta):
                 return postprocessing(h)
 
         else:
-            raise RuntimeError("Unknown score processing mode {} for summary stats dimension {}".format(mode, dim))
+            raise RuntimeError(f"Unknown score processing mode {mode} for summary stats dimension {dim}")
 
         return processor
 
@@ -1052,7 +1052,7 @@ def _find_x_indices(self, observables):
             try:
                 x_indices.append(x_names.index(obs))
             except ValueError:
-                raise RuntimeError("Unknown observable {}, has to be one of {}".format(obs, x_names))
+                raise RuntimeError(f"Unknown observable {obs}")
         logger.debug("Using x indices %s", x_indices)
         return x_indices
 

From 237d42589ad1726f10248c151381e4d5e67f43bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:47:18 -0400
Subject: [PATCH 33/65] src: limits module improve fmt

---
 madminer/limits/asymptotic_limits.py | 29 +++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/madminer/limits/asymptotic_limits.py b/madminer/limits/asymptotic_limits.py
index 72829f9f5..426ceee7f 100644
--- a/madminer/limits/asymptotic_limits.py
+++ b/madminer/limits/asymptotic_limits.py
@@ -84,7 +84,7 @@ def observed_limits(
         `x_observed` specifies the observed data as an array of observables, using the same observables and their order
         as used throughout the MadMiner workflow.
 
-        The p-values with frequentist hypothesis tests using the likelihood ratio as test statistic. The asymptotic
+        The p-values with frequent hypothesis tests using the likelihood ratio as test statistic. The asymptotic
         approximation is used, see https://arxiv.org/abs/1007.1727.
 
         Depending on the keyword `mode`, the likelihood ratio is calculated with one of several different methods:
@@ -243,7 +243,8 @@ def observed_limits(
         """
         if n_observed is None:
             n_observed = len(x_observed)
-        results = self._analyse(
+
+        return self._analyse(
             n_observed,
             x_observed,
             grid_ranges,
@@ -269,7 +270,6 @@ def observed_limits(
             n_binning_toys=n_binning_toys,
             thetas_eval=thetas_eval,
         )
-        return results
 
     def expected_limits(
         self,
@@ -472,9 +472,11 @@ def expected_limits(
             test_split=test_split,
             n_asimov=n_asimov,
         )
+
         n_observed = luminosity * self._calculate_xsecs([theta_true])[0]
         logger.info("Expected events: %s", n_observed)
-        results = self._analyse(
+
+        return self._analyse(
             n_observed,
             x_asimov,
             grid_ranges,
@@ -501,7 +503,6 @@ def expected_limits(
             n_binning_toys=n_binning_toys,
             thetas_eval=thetas_eval,
         )
-        return results
 
     def asymptotic_p_value(self, log_likelihood_ratio, dof=None):
         """
@@ -525,9 +526,9 @@ def asymptotic_p_value(self, log_likelihood_ratio, dof=None):
         """
         if dof is None:
             dof = self.n_parameters
+
         q = -2.0 * log_likelihood_ratio
-        p_value = chi2.sf(x=q, df=dof)
-        return p_value
+        return chi2.sf(x=q, df=dof)
 
     def _analyse(
         self,
@@ -716,6 +717,7 @@ def _analyse(
             histo_data = (histos, processed_summary_stats, obs_weights)
         elif return_histos:
             histo_data = histos
+
         return theta_grid, p_values, i_ml, log_r_kin, log_p_xsec, histo_data
 
     def _find_bins(self, mode, hist_bins, summary_stats):
@@ -724,6 +726,7 @@ def _find_bins(self, mode, hist_bins, summary_stats):
             n_summary_stats += 1
         elif mode == "sallino":
             n_summary_stats = 1
+
         # Bin numbers
         if hist_bins is None:
             if mode == "adaptive-sally" and n_summary_stats == 2:
@@ -749,13 +752,13 @@ def _find_bins(self, mode, hist_bins, summary_stats):
         else:
             n_bins_each = [n_bins if isinstance(n_bins, int) else len(n_bins) - 1 for n_bins in hist_bins]
             total_n_bins = np.prod(n_bins_each)
+
         return hist_bins, n_bins_each, n_summary_stats, total_n_bins
 
     def _make_summary_statistic_function(self, mode, model=None, observables=None):
         if mode == "observables":
             assert observables is not None
             x_indices = self._find_x_indices(observables)
-
             logger.debug("Preparing observables %s as summary statistic function", x_indices)
 
             def summary_function(x):
@@ -1000,17 +1003,21 @@ def _make_histos(
 
     def _fixed_adaptive_binning(self, n_toys, processor, summary_function, test_split, thetas_binning, x_bins):
         summary_stats, all_weights = self._make_weighted_histo_data(
-            summary_function, thetas_binning, n_toys, test_split=test_split
+            summary_function=summary_function,
+            thetas=thetas_binning,
+            n_toys=n_toys,
+            test_split=test_split,
         )
+
         all_weights = np.asarray(all_weights)
         weights = np.mean(all_weights, axis=0)
         if processor is None:
             data = summary_stats
         else:
             data = processor(summary_stats, thetas_binning)
+
         histo = Histo(data, weights, x_bins, epsilon=1.0e-12)
-        x_bins = histo.edges
-        return x_bins
+        return histo.edges
 
     def _make_weighted_histo_data(self, summary_function, thetas, n_toys, test_split=0.2):
         # Get weighted events

From 314714a4200932fb0a3d3f579a670b8ea4076980 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 21 Sep 2020 16:48:35 -0400
Subject: [PATCH 34/65] src: limits module remove 'future'

---
 madminer/limits/asymptotic_limits.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/madminer/limits/asymptotic_limits.py b/madminer/limits/asymptotic_limits.py
index 426ceee7f..b3c46da91 100644
--- a/madminer/limits/asymptotic_limits.py
+++ b/madminer/limits/asymptotic_limits.py
@@ -1,15 +1,13 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 from scipy.stats import chi2, poisson
 
+from madminer import sampling
 from madminer.analysis import DataAnalyzer
-from madminer.utils.various import mdot, less_logging
 from madminer.ml import ParameterizedRatioEstimator, Ensemble, ScoreEstimator, LikelihoodEstimator, load_estimator
 from madminer.utils.histo import Histo
+from madminer.utils.various import mdot, less_logging
 from madminer.sampling import SampleAugmenter
-from madminer import sampling
 
 logger = logging.getLogger(__name__)
 

From 8b9d2e0857016d5ce3428d33822ea1dade915915 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 10:15:20 -0400
Subject: [PATCH 35/65] src: ML module f-strings

---
 madminer/ml/double_parameterized_ratio.py | 14 +++++--------
 madminer/ml/ensemble.py                   | 14 ++++++-------
 madminer/ml/likelihood.py                 | 12 ++++-------
 madminer/ml/lookup.py                     |  4 ++--
 madminer/ml/parameterized_ratio.py        | 20 ++++++++----------
 madminer/ml/score.py                      | 25 +++++++++--------------
 6 files changed, 36 insertions(+), 53 deletions(-)

diff --git a/madminer/ml/double_parameterized_ratio.py b/madminer/ml/double_parameterized_ratio.py
index 2d99b6227..ec3cf197e 100644
--- a/madminer/ml/double_parameterized_ratio.py
+++ b/madminer/ml/double_parameterized_ratio.py
@@ -309,13 +309,9 @@ def train(
             self.n_parameters = n_parameters
 
         if n_parameters != self.n_parameters:
-            raise RuntimeError(
-                "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters)
-            )
+            raise RuntimeError(f"Number of parameters does not match: {n_parameters} vs {self.n_parameters}")
         if n_observables != self.n_observables:
-            raise RuntimeError(
-                "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables)
-            )
+            raise RuntimeError(f"Number of observables does not match: {n_observables} vs {self.n_observables}")
 
         # Data
         data = self._package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1)
@@ -499,9 +495,9 @@ def _create_model(self):
     @staticmethod
     def _check_required_data(method, r_xz, t_xz0, t_xz1):
         if method in ["cascal", "alices", "rascal"] and (t_xz0 is None or t_xz1 is None):
-            raise RuntimeError("Method {} requires joint score information".format(method))
+            raise RuntimeError(f"Method {method} requires joint score information")
         if method in ["rolr", "alice", "alices", "rascal"] and r_xz is None:
-            raise RuntimeError("Method {} requires joint likelihood ratio information".format(method))
+            raise RuntimeError(f"Method {method} requires joint likelihood ratio information")
 
     @staticmethod
     def _package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1):
@@ -527,4 +523,4 @@ def _unwrap_settings(self, settings):
 
         estimator_type = str(settings["estimator_type"])
         if estimator_type != "double_parameterized_ratio":
-            raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type))
+            raise RuntimeError(f"Saved model is an incompatible estimator type {estimator_type}.")
diff --git a/madminer/ml/ensemble.py b/madminer/ml/ensemble.py
index 3ad09d94a..c30f11b8a 100644
--- a/madminer/ml/ensemble.py
+++ b/madminer/ml/ensemble.py
@@ -144,7 +144,7 @@ def train_all(self, **kwargs):
             if not isinstance(value, list):
                 kwargs[key] = [value for _ in range(self.n_estimators)]
 
-            assert len(kwargs[key]) == self.n_estimators, "Keyword {} has wrong length {}".format(key, len(value))
+            assert len(kwargs[key]) == self.n_estimators, f"Keyword {key} has wrong length {len(value)}"
 
         for i, estimator in enumerate(self.estimators):
             kwargs_this_estimator = {}
@@ -407,7 +407,7 @@ def calculate_fisher_information(
 
         # Check input
         if mode not in ["score", "information"]:
-            raise ValueError("Unknown mode {}, has to be 'score' or 'information'!".format(mode))
+            raise ValueError(f"Unknown mode {mode}!")
 
         # Calculate estimator_weights of each estimator in vote
         if estimator_weights is None:
@@ -604,12 +604,12 @@ def save(self, folder, save_model=False):
         logger.debug("Saving ensemble setup to %s/ensemble.json", folder)
         settings = {"estimator_type": self.estimator_type, "n_estimators": self.n_estimators}
 
-        with open(folder + "/ensemble.json", "w") as f:
+        with open(f"{folder}/ensemble.json", "w") as f:
             json.dump(settings, f)
 
         # Save estimators
         for i, estimator in enumerate(self.estimators):
-            estimator.save(folder + "/estimator_" + str(i), save_model=save_model)
+            estimator.save(f"{folder}/estimator_{i}", save_model=save_model)
 
     def load(self, folder):
         """
@@ -627,7 +627,7 @@ def load(self, folder):
         """
         # Load ensemble settings
         logger.debug("Loading ensemble setup from %s/ensemble.json", folder)
-        with open(folder + "/ensemble.json", "r") as f:
+        with open(f"{folder}/ensemble.json", "r") as f:
             settings = json.load(f)
 
         self.n_estimators = int(settings["n_estimators"])
@@ -644,7 +644,7 @@ def load(self, folder):
         self.estimators = []
         for i in range(self.n_estimators):
             estimator = self._get_estimator_class(estimator_type)()
-            estimator.load(folder + "/estimator_" + str(i))
+            estimator.load(f"{folder}/estimator_{i}")
             self.estimators.append(estimator)
         self._check_consistency()
 
@@ -725,4 +725,4 @@ def _get_estimator_class(estimator_type):
         elif estimator_type == "likelihood":
             return LikelihoodEstimator
         else:
-            raise RuntimeError("Unknown estimator type {}!".format(estimator_type))
+            raise RuntimeError(f"Unknown estimator type {estimator_type}!")
diff --git a/madminer/ml/likelihood.py b/madminer/ml/likelihood.py
index dd81547fd..0567323b2 100644
--- a/madminer/ml/likelihood.py
+++ b/madminer/ml/likelihood.py
@@ -271,13 +271,9 @@ def train(
             self.n_parameters = n_parameters
 
         if n_parameters != self.n_parameters:
-            raise RuntimeError(
-                "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters)
-            )
+            raise RuntimeError(f"Number of parameters does not match: {n_parameters} vs {self.n_parameters}")
         if n_observables != self.n_observables:
-            raise RuntimeError(
-                "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables)
-            )
+            raise RuntimeError(f"Number of observables does not match: {n_observables} vs {self.n_observables}")
 
         # Data
         data = self._package_training_data(method, x, theta, t_xz)
@@ -515,7 +511,7 @@ def _create_model(self):
     @staticmethod
     def _check_required_data(method, t_xz):
         if method == ["scandal"] and t_xz is None:
-            raise RuntimeError("Method {} requires joint score information".format(method))
+            raise RuntimeError(f"Method {method} requires joint score information")
 
     @staticmethod
     def _package_training_data(method, x, theta, t_xz):
@@ -539,7 +535,7 @@ def _unwrap_settings(self, settings):
 
         estimator_type = str(settings["estimator_type"])
         if estimator_type != "likelihood":
-            raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type))
+            raise RuntimeError(f"Saved model is an incompatible estimator type {estimator_type}.")
 
         self.n_components = int(settings["n_components"])
         self.n_mades = int(settings["n_mades"])
diff --git a/madminer/ml/lookup.py b/madminer/ml/lookup.py
index 061a5afce..4d16cc44c 100644
--- a/madminer/ml/lookup.py
+++ b/madminer/ml/lookup.py
@@ -16,7 +16,7 @@ def load_estimator(filename):
         model.load(filename)
 
     else:
-        with open(filename + "_settings.json", "r") as f:
+        with open(f"{filename}_settings.json", "r") as f:
             settings = json.load(f)
         try:
             estimator_type = settings["estimator_type"]
@@ -32,7 +32,7 @@ def load_estimator(filename):
         elif estimator_type == "likelihood":
             model = LikelihoodEstimator()
         else:
-            raise RuntimeError("Unknown estimator type {}!".format(estimator_type))
+            raise RuntimeError(f"Unknown estimator type {estimator_type}!")
 
         model.load(filename)
 
diff --git a/madminer/ml/parameterized_ratio.py b/madminer/ml/parameterized_ratio.py
index 99844a844..63a994f1a 100644
--- a/madminer/ml/parameterized_ratio.py
+++ b/madminer/ml/parameterized_ratio.py
@@ -283,13 +283,9 @@ def train(
             self.n_parameters = n_parameters
 
         if n_parameters != self.n_parameters:
-            raise RuntimeError(
-                "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters)
-            )
+            raise RuntimeError(f"Number of parameters does not match: {n_parameters} vs {self.n_parameters}")
         if n_observables != self.n_observables:
-            raise RuntimeError(
-                "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables)
-            )
+            raise RuntimeError(f"Number of observables does not match: {n_observables} vs {self.n_observables}")
 
         # Data
         data = self._package_training_data(method, x, theta, y, r_xz, t_xz)
@@ -503,7 +499,7 @@ def evaluate_score(self, x, theta, nuisance_mode="keep"):
         if nuisance_mode == "keep":
             logger.debug("Keeping nuisance parameter in score")
         else:
-            raise ValueError("Unknown nuisance_mode {}".format(nuisance_mode))
+            raise ValueError(f"Unknown nuisance_mode {nuisance_mode}")
 
         _, all_t_hat = self.evaluate_log_likelihood_ratio(x, theta, test_all_combinations=False, evaluate_score=True)
         return all_t_hat
@@ -528,10 +524,10 @@ def _create_model(self):
 
     @staticmethod
     def _check_required_data(method, r_xz, t_xz):
-        if method in ["cascal", "alices", "rascal"] and t_xz is None:
-            raise RuntimeError("Method {} requires joint score information".format(method))
-        if method in ["rolr", "alice", "alices", "rascal"] and r_xz is None:
-            raise RuntimeError("Method {} requires joint likelihood ratio information".format(method))
+        if method in {"cascal", "alices", "rascal"} and t_xz is None:
+            raise RuntimeError(f"Method {method} requires joint score information")
+        if method in {"rolr", "alice", "alices", "rascal"} and r_xz is None:
+            raise RuntimeError(f"Method {method} requires joint likelihood ratio information")
 
     @staticmethod
     def _package_training_data(method, x, theta, y, r_xz, t_xz):
@@ -555,4 +551,4 @@ def _unwrap_settings(self, settings):
 
         estimator_type = str(settings["estimator_type"])
         if estimator_type != "parameterized_ratio":
-            raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type))
+            raise RuntimeError(f"Saved model is an incompatible estimator type {estimator_type}.")
diff --git a/madminer/ml/score.py b/madminer/ml/score.py
index 19de558ee..40e5300e8 100644
--- a/madminer/ml/score.py
+++ b/madminer/ml/score.py
@@ -215,13 +215,9 @@ def train(
             self.n_parameters = n_parameters
 
         if n_parameters != self.n_parameters:
-            raise RuntimeError(
-                "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters)
-            )
+            raise RuntimeError(f"Number of parameters does not match: {n_parameters} vs {self.n_parameters}")
         if n_observables != self.n_observables:
-            raise RuntimeError(
-                "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables)
-            )
+            raise RuntimeError(f"Number of observables does not match: {n_observables} vs {self.n_observables}")
 
         # Data
         data = self._package_training_data(x, t_xz)
@@ -284,9 +280,8 @@ def set_nuisance(self, fisher_information, parameters_of_interest):
         """
         if fisher_information.shape != (self.n_parameters, self.n_parameters):
             raise ValueError(
-                "Fisher information has wrong shape {}, expected {}".format(
-                    fisher_information.shape, (self.n_parameters, self.n_parameters)
-                )
+                f"Fisher information has wrong shape {fisher_information.shape}. "
+                f"Expected {(self.n_parameters, self.n_parameters)}"
             )
 
         n_parameters_of_interest = len(parameters_of_interest)
@@ -386,7 +381,7 @@ def evaluate_score(self, x, theta=None, nuisance_mode="auto"):
             t_hat = np.einsum("ij,xj->xi", self.nuisance_profile_matrix, t_hat)
 
         else:
-            raise ValueError("Unknown nuisance_mode {}".format(nuisance_mode))
+            raise ValueError(f"Unknown nuisance_mode {nuisance_mode}")
 
         return t_hat
 
@@ -416,16 +411,16 @@ def save(self, filename, save_model=False):
                 filename,
                 filename,
             )
-            np.save(filename + "_nuisance_profile_matrix.npy", self.nuisance_profile_matrix)
-            np.save(filename + "_nuisance_project_matrix.npy", self.nuisance_project_matrix)
+            np.save(f"{filename}_nuisance_profile_matrix.npy", self.nuisance_profile_matrix)
+            np.save(f"{filename}_nuisance_project_matrix.npy", self.nuisance_project_matrix)
 
     def load(self, filename):
         super(ScoreEstimator, self).load(filename)
 
         # Load scaling
         try:
-            self.nuisance_profile_matrix = np.load(filename + "_nuisance_profile_matrix.npy")
-            self.nuisance_project_matrix = np.load(filename + "_nuisance_project_matrix.npy")
+            self.nuisance_profile_matrix = np.load(f"{filename}_nuisance_profile_matrix.npy")
+            self.nuisance_project_matrix = np.load(f"{filename}_nuisance_project_matrix.npy")
             logger.debug(
                 "  Found nuisance profiling / projection matrices:\nProfiling:\n%s\nProjection:\n%s",
                 self.nuisance_profile_matrix,
@@ -463,7 +458,7 @@ def _unwrap_settings(self, settings):
 
         estimator_type = str(settings["estimator_type"])
         if estimator_type != "score":
-            raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type))
+            raise RuntimeError(f"Saved model is an incompatible estimator type {estimator_type}.")
 
         try:
             self.nuisance_mode_default = str(settings["nuisance_mode_default"])

From 2454945656c4696757ccfbdd38b2b7cc37dab72d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 10:31:39 -0400
Subject: [PATCH 36/65] src: ML module improve fmt

---
 madminer/ml/base.py                       | 15 ++++++---------
 madminer/ml/double_parameterized_ratio.py | 13 +++++++++++--
 madminer/ml/ensemble.py                   |  2 +-
 madminer/ml/likelihood.py                 |  4 ++--
 madminer/ml/parameterized_ratio.py        | 14 ++++++--------
 madminer/ml/score.py                      | 14 ++++++--------
 6 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/madminer/ml/base.py b/madminer/ml/base.py
index 7f84a4d97..bd79c44ea 100644
--- a/madminer/ml/base.py
+++ b/madminer/ml/base.py
@@ -46,34 +46,31 @@ def __init__(self, features=None, n_hidden=(100,), activation="tanh", dropout_pr
         self.x_scaling_stds = None
 
     def train(self, *args, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def evaluate_log_likelihood(self, *args, **kwargs):
         """
         Log likelihood estimation. Signature depends on the type of estimator. The first returned value is the log
         likelihood with shape `(n_thetas, n_x)`.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def evaluate_log_likelihood_ratio(self, *args, **kwargs):
         """
         Log likelihood ratio estimation. Signature depends on the type of estimator. The first returned value is the log
         likelihood ratio with shape `(n_thetas, n_x)` or `(n_x)`.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def evaluate_score(self, *args, **kwargs):
         """
         Score estimation. Signature depends on the type of estimator. The only returned value is the score with shape
         `(n_x)`.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def evaluate(self, *args, **kwargs):
-        raise NotImplementedError
-
-    def calculate_fisher_information(self, *args, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def save(self, filename, save_model=False):
 
@@ -237,7 +234,7 @@ def _unwrap_settings(self, settings):
             )
 
     def _create_model(self):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def calculate_fisher_information(self, x, theta=None, weights=None, n_events=1, sum_events=True):
         """
diff --git a/madminer/ml/double_parameterized_ratio.py b/madminer/ml/double_parameterized_ratio.py
index ec3cf197e..5ac36e090 100644
--- a/madminer/ml/double_parameterized_ratio.py
+++ b/madminer/ml/double_parameterized_ratio.py
@@ -90,7 +90,7 @@ def train(
             Observations, or filename of a pickled numpy array.
 
         y : ndarray or str
-            Class labels (0 = numeerator, 1 = denominator), or filename of a pickled numpy array.
+            Class labels (0 = numerator, 1 = denominator), or filename of a pickled numpy array.
 
         theta0 : ndarray or str
             Numerator parameter point, or filename of a pickled numpy array.
@@ -259,6 +259,7 @@ def train(
             assert x_val.shape[1] == n_observables
             assert theta0_val.shape[1] == n_parameters
             assert theta1_val.shape[1] == n_parameters
+
             if r_xz is not None:
                 assert r_xz_val is not None, "When providing r_xz and sep. validation data, also provide r_xz_val"
             if t_xz0 is not None:
@@ -315,9 +316,17 @@ def train(
 
         # Data
         data = self._package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1)
+
         if external_validation:
             data_val = self._package_training_data(
-                method, x_val, theta0_val, theta1_val, y_val, r_xz_val, t_xz0_val, t_xz1_val
+                method,
+                x_val,
+                theta0_val,
+                theta1_val,
+                y_val,
+                r_xz_val,
+                t_xz0_val,
+                t_xz1_val,
             )
         else:
             data_val = None
diff --git a/madminer/ml/ensemble.py b/madminer/ml/ensemble.py
index c30f11b8a..0269912f1 100644
--- a/madminer/ml/ensemble.py
+++ b/madminer/ml/ensemble.py
@@ -679,7 +679,7 @@ def _check_consistency(self):
                     " or local score estimators. Found types " + ", ".join(all_types) + "."
                 )
 
-        # Check consistency of parameter and observable numnbers
+        # Check consistency of parameter and observable numbers
         self.n_parameters = None
         self.n_observables = None
 
diff --git a/madminer/ml/likelihood.py b/madminer/ml/likelihood.py
index 0567323b2..4d9b7a35d 100644
--- a/madminer/ml/likelihood.py
+++ b/madminer/ml/likelihood.py
@@ -45,7 +45,7 @@ class LikelihoodEstimator(ConditionalEstimator):
     activation : {'tanh', 'sigmoid', 'relu'}, optional
         Activation function. Default value: 'tanh'.
 
-    batch_norm : None or floar, optional
+    batch_norm : None or float, optional
         If not None, batch normalization is used, where this value sets the alpha parameter in the calculation
         of the running average of the mean and variance. Default value: None.
 
@@ -365,7 +365,7 @@ def evaluate_log_likelihood(self, x, theta, test_all_combinations=True, evaluate
         # Scale observables
         x = self._transform_inputs(x)
 
-        # Restrict featuers
+        # Restrict features
         if self.features is not None:
             x = x[:, self.features]
 
diff --git a/madminer/ml/parameterized_ratio.py b/madminer/ml/parameterized_ratio.py
index 63a994f1a..f63f9f183 100644
--- a/madminer/ml/parameterized_ratio.py
+++ b/madminer/ml/parameterized_ratio.py
@@ -88,7 +88,7 @@ def train(
             Observations, or filename of a pickled numpy array.
 
         y : ndarray or str
-            Class labels (0 = numeerator, 1 = denominator), or filename of a pickled numpy array.
+            Class labels (0 = numerator, 1 = denominator), or filename of a pickled numpy array.
 
         theta : ndarray or str
             Numerator parameter point, or filename of a pickled numpy array.
@@ -330,7 +330,7 @@ def train(
 
     def evaluate_log_likelihood_ratio(self, x, theta, test_all_combinations=True, evaluate_score=False):
         """
-        Evaluates the log likelihood ratio for given observations x betwen the given parameter point theta and the
+        Evaluates the log likelihood ratio for given observations x between the given parameter point theta and the
         reference hypothesis.
 
         Parameters
@@ -421,7 +421,7 @@ def evaluate_log_likelihood_ratio(self, x, theta, test_all_combinations=True, ev
 
     def evaluate_log_likelihood_ratio_torch(self, x, theta, test_all_combinations=True):
         """
-        Evaluates the log likelihood ratio for given observations x betwen the given parameter point theta and the
+        Evaluates the log likelihood ratio for given observations x between the given parameter point theta and the
         reference hypothesis.
 
         Parameters
@@ -474,7 +474,7 @@ def evaluate_log_likelihood(self, *args, **kwargs):
 
     def evaluate_score(self, x, theta, nuisance_mode="keep"):
         """
-        Evaluates the scores for given observations x betwen at a given parameter point theta.
+        Evaluates the scores for given observations x between at a given parameter point theta.
 
         Parameters
         ----------
@@ -505,10 +505,8 @@ def evaluate_score(self, x, theta, nuisance_mode="keep"):
         return all_t_hat
 
     def calculate_fisher_information(self, x, theta, weights=None, n_events=1, sum_events=True):
-        fisher_information = super(ParameterizedRatioEstimator, self).calculate_fisher_information(
-            x, theta, weights, n_events, sum_events
-        )
-        return fisher_information
+        return super(ParameterizedRatioEstimator, self) \
+            .calculate_fisher_information(x, theta, weights, n_events, sum_events)
 
     def evaluate(self, *args, **kwargs):
         return self.evaluate_log_likelihood_ratio(*args, **kwargs)
diff --git a/madminer/ml/score.py b/madminer/ml/score.py
index 40e5300e8..03142db16 100644
--- a/madminer/ml/score.py
+++ b/madminer/ml/score.py
@@ -267,10 +267,10 @@ def set_nuisance(self, fisher_information, parameters_of_interest):
         Parameters
         ----------
         fisher_information : ndarray
-            Fisher informatioin with shape `(n_parameters, n_parameters)`.
+            Fisher information with shape `(n_parameters, n_parameters)`.
 
         parameters_of_interest : list of int
-            List of int, with 0 <= remaining_compoinents[i] < n_parameters. Denotes which parameters are kept in the
+            List of int, with 0 <= remaining_components[i] < n_parameters. Denotes which parameters are kept in the
             profiling, and their new order.
 
         Returns
@@ -350,7 +350,7 @@ def evaluate_score(self, x, theta=None, nuisance_mode="auto"):
         # Scale observables
         x = self._transform_inputs(x)
 
-        # Restrict featuers
+        # Restrict features
         if self.features is not None:
             x = x[:, self.features]
 
@@ -358,7 +358,7 @@ def evaluate_score(self, x, theta=None, nuisance_mode="auto"):
         logger.debug("Starting score evaluation")
         t_hat = evaluate_local_score_model(model=self.model, xs=x)
 
-        # Treatment of nuisance paramters
+        # Treatment of nuisance parameters
         if nuisance_mode == "keep":
             logger.debug("Keeping nuisance parameter in score")
 
@@ -395,10 +395,8 @@ def evaluate(self, *args, **kwargs):
         return self.evaluate_score(*args, **kwargs)
 
     def calculate_fisher_information(self, x, theta=None, weights=None, n_events=1, sum_events=True):
-        fisher_information = super(ScoreEstimator, self).calculate_fisher_information(
-            x, theta, weights, n_events, sum_events
-        )
-        return fisher_information
+        return super(ScoreEstimator, self) \
+            .calculate_fisher_information(x, theta, weights, n_events, sum_events)
 
     def save(self, filename, save_model=False):
         super(ScoreEstimator, self).save(filename, save_model)

From 8a104cca7c660b58361fd4daf62b2336053a735f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 10:37:17 -0400
Subject: [PATCH 37/65] src: ML module remove 'six'

---
 madminer/ml/base.py                       |  6 ++----
 madminer/ml/double_parameterized_ratio.py |  8 +++-----
 madminer/ml/ensemble.py                   | 15 ++++++---------
 madminer/ml/likelihood.py                 |  8 +++-----
 madminer/ml/lookup.py                     |  4 +---
 madminer/ml/morphing_aware.py             |  6 ++----
 madminer/ml/parameterized_ratio.py        | 10 ++++------
 madminer/ml/score.py                      | 11 ++++-------
 8 files changed, 25 insertions(+), 43 deletions(-)

diff --git a/madminer/ml/base.py b/madminer/ml/base.py
index bd79c44ea..56c646861 100644
--- a/madminer/ml/base.py
+++ b/madminer/ml/base.py
@@ -1,9 +1,7 @@
-from __future__ import absolute_import, division, print_function
-
-import logging
-import os
 import json
+import logging
 import numpy as np
+import os
 import torch
 
 from ..utils.various import create_missing_folders, load_and_check
diff --git a/madminer/ml/double_parameterized_ratio.py b/madminer/ml/double_parameterized_ratio.py
index 5ac36e090..bcb4f6151 100644
--- a/madminer/ml/double_parameterized_ratio.py
+++ b/madminer/ml/double_parameterized_ratio.py
@@ -1,15 +1,13 @@
-from __future__ import absolute_import, division, print_function
-
 import logging
 import numpy as np
 from collections import OrderedDict
 
-from ..utils.ml.models.ratio import DenseDoublyParameterizedRatioModel
+from .base import ConditionalEstimator, TheresAGoodReasonThisDoesntWork
 from ..utils.ml.eval import evaluate_ratio_model
+from ..utils.ml.models.ratio import DenseDoublyParameterizedRatioModel
+from ..utils.ml.trainer import DoubleParameterizedRatioTrainer
 from ..utils.ml.utils import get_optimizer, get_loss
 from ..utils.various import load_and_check, shuffle, restrict_samplesize
-from ..utils.ml.trainer import DoubleParameterizedRatioTrainer
-from .base import ConditionalEstimator, TheresAGoodReasonThisDoesntWork
 
 try:
     FileNotFoundError
diff --git a/madminer/ml/ensemble.py b/madminer/ml/ensemble.py
index 0269912f1..eb997fa9c 100644
--- a/madminer/ml/ensemble.py
+++ b/madminer/ml/ensemble.py
@@ -1,11 +1,8 @@
-from __future__ import absolute_import, division, print_function
-
-import six
-import logging
 import json
+import logging
 import numpy as np
-
 from madminer.utils.various import create_missing_folders, load_and_check
+
 from .base import Estimator
 from .double_parameterized_ratio import DoubleParameterizedRatioEstimator
 from .likelihood import LikelihoodEstimator
@@ -140,7 +137,7 @@ def train_all(self, **kwargs):
         """
         logger.info("Training %s estimators in ensemble", self.n_estimators)
 
-        for key, value in six.iteritems(kwargs):
+        for key, value in kwargs.items():
             if not isinstance(value, list):
                 kwargs[key] = [value for _ in range(self.n_estimators)]
 
@@ -148,7 +145,7 @@ def train_all(self, **kwargs):
 
         for i, estimator in enumerate(self.estimators):
             kwargs_this_estimator = {}
-            for key, value in six.iteritems(kwargs):
+            for key, value in kwargs.items():
                 kwargs_this_estimator[key] = value[i]
 
             logger.info("Training estimator %s / %s in ensemble", i + 1, self.n_estimators)
@@ -448,7 +445,7 @@ def calculate_fisher_information(
         # "modified_score" mode:
         elif mode == "modified_score":
             # Load training data
-            if isinstance(x, six.string_types):
+            if isinstance(x, str):
                 x = load_and_check(x)
             n_samples = x.shape[0]
 
@@ -511,7 +508,7 @@ def calculate_fisher_information(
         # "score" mode:
         elif mode == "score":
             # Load training data
-            if isinstance(x, six.string_types):
+            if isinstance(x, str):
                 x = load_and_check(x)
             n_samples = x.shape[0]
 
diff --git a/madminer/ml/likelihood.py b/madminer/ml/likelihood.py
index 4d9b7a35d..4a405f96c 100644
--- a/madminer/ml/likelihood.py
+++ b/madminer/ml/likelihood.py
@@ -1,16 +1,14 @@
-from __future__ import absolute_import, division, print_function
-
 import logging
 import numpy as np
 from collections import OrderedDict
 
+from .base import ConditionalEstimator
+from ..utils.ml.eval import evaluate_flow_model
 from ..utils.ml.models.maf import ConditionalMaskedAutoregressiveFlow
 from ..utils.ml.models.maf_mog import ConditionalMixtureMaskedAutoregressiveFlow
-from ..utils.ml.eval import evaluate_flow_model
+from ..utils.ml.trainer import FlowTrainer
 from ..utils.ml.utils import get_optimizer, get_loss
 from ..utils.various import load_and_check, shuffle, restrict_samplesize
-from ..utils.ml.trainer import FlowTrainer
-from .base import ConditionalEstimator
 
 try:
     FileNotFoundError
diff --git a/madminer/ml/lookup.py b/madminer/ml/lookup.py
index 4d16cc44c..8a965775d 100644
--- a/madminer/ml/lookup.py
+++ b/madminer/ml/lookup.py
@@ -1,7 +1,5 @@
-from __future__ import absolute_import, division, print_function
-
-import os
 import json
+import os
 
 from .ensemble import Ensemble
 from .double_parameterized_ratio import DoubleParameterizedRatioEstimator
diff --git a/madminer/ml/morphing_aware.py b/madminer/ml/morphing_aware.py
index 76eaa0d17..5c33780fa 100644
--- a/madminer/ml/morphing_aware.py
+++ b/madminer/ml/morphing_aware.py
@@ -1,12 +1,10 @@
-from __future__ import absolute_import, division, print_function
-
 import logging
 import numpy as np
 
-from ..utils.ml.models.ratio import DenseMorphingAwareRatioModel, DenseQuadraticMorphingAwareRatioModel
+from .parameterized_ratio import ParameterizedRatioEstimator
 from ..utils.interfaces.madminer_hdf5 import load_madminer_settings
+from ..utils.ml.models.ratio import DenseMorphingAwareRatioModel, DenseQuadraticMorphingAwareRatioModel
 from ..utils.morphing import PhysicsMorpher
-from .parameterized_ratio import ParameterizedRatioEstimator
 
 try:
     FileNotFoundError
diff --git a/madminer/ml/parameterized_ratio.py b/madminer/ml/parameterized_ratio.py
index f63f9f183..a78f80bd1 100644
--- a/madminer/ml/parameterized_ratio.py
+++ b/madminer/ml/parameterized_ratio.py
@@ -1,16 +1,14 @@
-from __future__ import absolute_import, division, print_function
-
 import logging
 import numpy as np
-from collections import OrderedDict
 import torch
+from collections import OrderedDict
 
-from ..utils.ml.models.ratio import DenseSingleParameterizedRatioModel
+from .base import ConditionalEstimator, TheresAGoodReasonThisDoesntWork
 from ..utils.ml.eval import evaluate_ratio_model
+from ..utils.ml.models.ratio import DenseSingleParameterizedRatioModel
+from ..utils.ml.trainer import SingleParameterizedRatioTrainer
 from ..utils.ml.utils import get_optimizer, get_loss
 from ..utils.various import load_and_check, shuffle, restrict_samplesize
-from ..utils.ml.trainer import SingleParameterizedRatioTrainer
-from .base import ConditionalEstimator, TheresAGoodReasonThisDoesntWork
 
 try:
     FileNotFoundError
diff --git a/madminer/ml/score.py b/madminer/ml/score.py
index 03142db16..e420c4948 100644
--- a/madminer/ml/score.py
+++ b/madminer/ml/score.py
@@ -1,16 +1,13 @@
-from __future__ import absolute_import, division, print_function
-
 import logging
 import numpy as np
 from collections import OrderedDict
 
-from ..utils.ml.models.score import DenseLocalScoreModel
+from .base import Estimator, TheresAGoodReasonThisDoesntWork
 from ..utils.ml.eval import evaluate_local_score_model
-from ..utils.ml.utils import get_optimizer, get_loss
-from ..utils.various import load_and_check, shuffle, restrict_samplesize
-from ..utils.various import separate_information_blocks
+from ..utils.ml.models.score import DenseLocalScoreModel
 from ..utils.ml.trainer import LocalScoreTrainer
-from .base import Estimator, TheresAGoodReasonThisDoesntWork
+from ..utils.ml.utils import get_optimizer, get_loss
+from ..utils.various import load_and_check, shuffle, restrict_samplesize, separate_information_blocks
 
 try:
     FileNotFoundError

From 3b53a5e763f1fc2a145ad2e783ee754666b447ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 10:40:51 -0400
Subject: [PATCH 38/65] src: ML module legacy exceptions

---
 madminer/ml/base.py                       | 4 ----
 madminer/ml/double_parameterized_ratio.py | 4 ----
 madminer/ml/ensemble.py                   | 5 -----
 madminer/ml/likelihood.py                 | 4 ----
 madminer/ml/morphing_aware.py             | 4 ----
 madminer/ml/parameterized_ratio.py        | 4 ----
 madminer/ml/score.py                      | 4 ----
 7 files changed, 29 deletions(-)

diff --git a/madminer/ml/base.py b/madminer/ml/base.py
index 56c646861..feb5698ce 100644
--- a/madminer/ml/base.py
+++ b/madminer/ml/base.py
@@ -6,10 +6,6 @@
 
 from ..utils.various import create_missing_folders, load_and_check
 
-try:
-    FileNotFoundError
-except NameError:
-    FileNotFoundError = IOError
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/ml/double_parameterized_ratio.py b/madminer/ml/double_parameterized_ratio.py
index bcb4f6151..d0415d67b 100644
--- a/madminer/ml/double_parameterized_ratio.py
+++ b/madminer/ml/double_parameterized_ratio.py
@@ -9,10 +9,6 @@
 from ..utils.ml.utils import get_optimizer, get_loss
 from ..utils.various import load_and_check, shuffle, restrict_samplesize
 
-try:
-    FileNotFoundError
-except NameError:
-    FileNotFoundError = IOError
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/ml/ensemble.py b/madminer/ml/ensemble.py
index eb997fa9c..415ac51db 100644
--- a/madminer/ml/ensemble.py
+++ b/madminer/ml/ensemble.py
@@ -10,11 +10,6 @@
 from .score import ScoreEstimator
 
 
-try:
-    FileNotFoundError
-except NameError:
-    FileNotFoundError = IOError
-
 logger = logging.getLogger(__name__)
 
 
diff --git a/madminer/ml/likelihood.py b/madminer/ml/likelihood.py
index 4a405f96c..59e4dcf0d 100644
--- a/madminer/ml/likelihood.py
+++ b/madminer/ml/likelihood.py
@@ -10,10 +10,6 @@
 from ..utils.ml.utils import get_optimizer, get_loss
 from ..utils.various import load_and_check, shuffle, restrict_samplesize
 
-try:
-    FileNotFoundError
-except NameError:
-    FileNotFoundError = IOError
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/ml/morphing_aware.py b/madminer/ml/morphing_aware.py
index 5c33780fa..4cb947a12 100644
--- a/madminer/ml/morphing_aware.py
+++ b/madminer/ml/morphing_aware.py
@@ -6,10 +6,6 @@
 from ..utils.ml.models.ratio import DenseMorphingAwareRatioModel, DenseQuadraticMorphingAwareRatioModel
 from ..utils.morphing import PhysicsMorpher
 
-try:
-    FileNotFoundError
-except NameError:
-    FileNotFoundError = IOError
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/ml/parameterized_ratio.py b/madminer/ml/parameterized_ratio.py
index a78f80bd1..a189a9512 100644
--- a/madminer/ml/parameterized_ratio.py
+++ b/madminer/ml/parameterized_ratio.py
@@ -10,10 +10,6 @@
 from ..utils.ml.utils import get_optimizer, get_loss
 from ..utils.various import load_and_check, shuffle, restrict_samplesize
 
-try:
-    FileNotFoundError
-except NameError:
-    FileNotFoundError = IOError
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/ml/score.py b/madminer/ml/score.py
index e420c4948..e199c7031 100644
--- a/madminer/ml/score.py
+++ b/madminer/ml/score.py
@@ -9,10 +9,6 @@
 from ..utils.ml.utils import get_optimizer, get_loss
 from ..utils.various import load_and_check, shuffle, restrict_samplesize, separate_information_blocks
 
-try:
-    FileNotFoundError
-except NameError:
-    FileNotFoundError = IOError
 
 logger = logging.getLogger(__name__)
 

From 70030fd8ab97c8db3b65a7bdf799efdcb1f76ddb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 10:59:48 -0400
Subject: [PATCH 39/65] src: plotting module f-strings

---
 madminer/plotting/distributions.py     |  8 ++++----
 madminer/plotting/fisherinformation.py |  5 +++--
 madminer/plotting/limits.py            | 12 ++++++------
 madminer/plotting/uncertainties.py     |  2 +-
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/madminer/plotting/distributions.py b/madminer/plotting/distributions.py
index 7bc64f001..68584dcfc 100644
--- a/madminer/plotting/distributions.py
+++ b/madminer/plotting/distributions.py
@@ -148,7 +148,7 @@ def plot_distributions(
     n_parameter_points = len(parameter_points)
 
     if colors is None:
-        colors = ["C" + str(i) for i in range(10)] * (n_parameter_points // 10 + 1)
+        colors = [f"C{i}" for i in range(10)] * (n_parameter_points // 10 + 1)
     elif not isinstance(colors, list):
         colors = [colors for _ in range(n_parameter_points)]
 
@@ -421,11 +421,11 @@ def plot_histograms(
     # Basic setup
     n_histos = len(histos)
     dim = len(histos[0].edges)
-    assert dim in [1, 2], "Only 1- or 2-dimensional histograms are supported, but found {} dimensions".format(dim)
+    assert dim in [1, 2], f"Only 1- or 2-dimensional histograms are supported, but found {dim} dimensions"
 
     # Defaults
     if colors is None:
-        colors = ["C" + str(i) for i in range(10)] * (n_histos // 10 + 1)
+        colors = [f"C{i}" for i in range(10)] * (n_histos // 10 + 1)
     elif not isinstance(colors, list):
         colors = [colors for _ in range(n_histos)]
     if linestyles is None:
@@ -435,7 +435,7 @@ def plot_histograms(
     if not isinstance(linewidths, list):
         linewidths = [linewidths for _ in range(n_histos)]
     if histo_labels is None:
-        histo_labels = ["Histogram {}".format(i + 1) for i in range(n_histos)]
+        histo_labels = [f"Histogram {i+1}" for i in range(n_histos)]
 
     # 1D plot
     if dim == 1:
diff --git a/madminer/plotting/fisherinformation.py b/madminer/plotting/fisherinformation.py
index 705e32729..eaf8ed91e 100644
--- a/madminer/plotting/fisherinformation.py
+++ b/madminer/plotting/fisherinformation.py
@@ -105,7 +105,8 @@ def plot_fisher_information_contours_2d(
 
     if fisher_information_matrices.shape != (n_matrices, 2, 2):
         raise RuntimeError(
-            "Fisher information matrices have shape {}, not (n, 2,2)!".format(fisher_information_matrices.shape)
+            f"Fisher information matrices have shape {fisher_information_matrices.shape}. "
+            f"Not (n, 2,2)!"
         )
 
     if fisher_information_covariances is None:
@@ -301,7 +302,7 @@ def plot_fisherinfo_barplot(
         bar_colors_light = bar_colors
 
     if eigenvalue_colors is None:
-        eigenvalue_colors = ["C{}".format(str(i)) for i in range(10)]
+        eigenvalue_colors = [f"C{i}" for i in range(10)]
     eigenvalue_linewidth = 1.5
 
     # Upper plot
diff --git a/madminer/plotting/limits.py b/madminer/plotting/limits.py
index 4acc2797f..6d78d7524 100644
--- a/madminer/plotting/limits.py
+++ b/madminer/plotting/limits.py
@@ -134,20 +134,20 @@ def plot_pvalue_limits(
             cmap="Greys_r",
         )
         cbar = fig.colorbar(pcm, ax=ax, extend="both")
-        cbar.set_label("Expected p-value ({})".format(labels[show_index]))
+        cbar.set_label(f"Expected p-value ({labels[show_index]})")
     for ipanel in range(len(p_values)):
         ax.contour(
             xcenters,
             ycenters,
             p_values[ipanel].reshape((grid_resolutions[0], grid_resolutions[1])).T,
             levels=levels,
-            colors="C{}".format(ipanel),
+            colors=f"C{ipanel}",
         )
         ax.scatter(
             theta_grid[best_fits[ipanel]][0],
             theta_grid[best_fits[ipanel]][1],
             s=80.0,
-            color="C{}".format(ipanel),
+            color=f"C{ipanel}",
             marker="*",
             label=labels[ipanel],
         )
@@ -167,19 +167,19 @@ def plot_pvalue_limits(
                 cmap="Greys_r",
             )
             cbar = fig.colorbar(pcm, ax=ax, extend="both")
-            cbar.set_label("Expected p-value ({})".format(labels[ipanel]))
+            cbar.set_label(f"Expected p-value ({labels[ipanel]})")
             ax.contour(
                 xcenters,
                 ycenters,
                 p_values[ipanel].reshape((grid_resolutions[0], grid_resolutions[1])).T,
                 levels=levels,
-                colors="C{}".format(ipanel),
+                colors=f"C{ipanel}",
             )
             ax.scatter(
                 theta_grid[best_fits[ipanel]][0],
                 theta_grid[best_fits[ipanel]][1],
                 s=80.0,
-                color="C{}".format(ipanel),
+                color=f"C{ipanel}",
                 marker="*",
                 label=labels[ipanel],
             )
diff --git a/madminer/plotting/uncertainties.py b/madminer/plotting/uncertainties.py
index bb66e9a8d..247ff78f3 100644
--- a/madminer/plotting/uncertainties.py
+++ b/madminer/plotting/uncertainties.py
@@ -307,7 +307,7 @@ def plot_systematics(
 
     # Colors
     if bandcolors is None:
-        bandcolors = ["C{}".format(i) for i in range(10)]
+        bandcolors = [f"C{i}" for i in range(10)]
 
     # Load data
     sa = SampleAugmenter(filename, include_nuisance_parameters=True)

From a7173779f0255ee1ff46bdffc70d966d0b7e9557 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 11:04:27 -0400
Subject: [PATCH 40/65] src: plotting module improve fmt

---
 madminer/plotting/__init__.py          | 2 +-
 madminer/plotting/distributions.py     | 2 +-
 madminer/plotting/fisherinformation.py | 7 ++++++-
 madminer/plotting/uncertainties.py     | 2 +-
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/madminer/plotting/__init__.py b/madminer/plotting/__init__.py
index 6d52e9102..cef287c8e 100644
--- a/madminer/plotting/__init__.py
+++ b/madminer/plotting/__init__.py
@@ -1,9 +1,9 @@
 from .distributions import plot_distributions, plot_histograms
 from .morphing import (
+    plot_1d_morphing_basis,
     plot_2d_morphing_basis,
     plot_nd_morphing_basis_scatter,
     plot_nd_morphing_basis_slices,
-    plot_1d_morphing_basis,
 )
 from .fisherinformation import (
     plot_fisherinfo_barplot,
diff --git a/madminer/plotting/distributions.py b/madminer/plotting/distributions.py
index 68584dcfc..cda4e5b89 100644
--- a/madminer/plotting/distributions.py
+++ b/madminer/plotting/distributions.py
@@ -69,7 +69,7 @@ def plot_distributions(
         Whether the distribution is normalized to the total cross section. Default value: False.
 
     log : bool, optional
-        Whether to draw the y axes on a logarithmic scale. Defaul value: False.
+        Whether to draw the y axes on a logarithmic scale. Default value: False.
 
     observable_labels : None or list of (str or None), optional
         x-axis labels naming the observables. If None, the observable names from the MadMiner file are used. Default
diff --git a/madminer/plotting/fisherinformation.py b/madminer/plotting/fisherinformation.py
index eaf8ed91e..73f6091e4 100644
--- a/madminer/plotting/fisherinformation.py
+++ b/madminer/plotting/fisherinformation.py
@@ -92,6 +92,7 @@ def plot_fisher_information_contours_2d(
 
     ax: axes or None, optional
         Predefined axes as part of figure instead of standalone figure. Default: None
+
     Returns
     -------
     figure : Figure
@@ -220,7 +221,11 @@ def plot_fisher_information_contours_2d(
 
 
 def plot_fisherinfo_barplot(
-    fisher_information_matrices, labels, determinant_indices=None, eigenvalue_colors=None, bar_colors=None
+    fisher_information_matrices,
+    labels,
+    determinant_indices=None,
+    eigenvalue_colors=None,
+    bar_colors=None,
 ):
     """
 
diff --git a/madminer/plotting/uncertainties.py b/madminer/plotting/uncertainties.py
index 247ff78f3..f1a8f31b5 100644
--- a/madminer/plotting/uncertainties.py
+++ b/madminer/plotting/uncertainties.py
@@ -295,7 +295,7 @@ def plot_systematics(
     bandcolors : None or list of str, optional
         Error band colors. Default value: None.
 
-    ratio_range : tuple of two floar
+    ratio_range : tuple of two float
         y-axis range for the plots of the ratio to the central prediction. Default value: (0.8, 1.2).
 
     Returns

From d3f1e970e6e2d1f0c0c3506eb166b5484da82003 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 11:07:52 -0400
Subject: [PATCH 41/65] src: plotting module remove 'six'

---
 madminer/plotting/distributions.py     |  4 +---
 madminer/plotting/fisherinformation.py |  2 --
 madminer/plotting/limits.py            |  2 --
 madminer/plotting/morphing.py          |  2 --
 madminer/plotting/uncertainties.py     | 15 ++++++---------
 5 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/madminer/plotting/distributions.py b/madminer/plotting/distributions.py
index cda4e5b89..24b1b5451 100644
--- a/madminer/plotting/distributions.py
+++ b/madminer/plotting/distributions.py
@@ -1,13 +1,11 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import matplotlib
 import numpy as np
 from matplotlib import pyplot as plt
 
+from ..sampling import SampleAugmenter
 from ..utils.morphing import NuisanceMorpher
 from ..utils.various import shuffle, sanitize_array, mdot, weighted_quantile
-from ..sampling import SampleAugmenter
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/plotting/fisherinformation.py b/madminer/plotting/fisherinformation.py
index 73f6091e4..a73f1b24e 100644
--- a/madminer/plotting/fisherinformation.py
+++ b/madminer/plotting/fisherinformation.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import matplotlib
 import numpy as np
diff --git a/madminer/plotting/limits.py b/madminer/plotting/limits.py
index 6d78d7524..8c8a9083a 100644
--- a/madminer/plotting/limits.py
+++ b/madminer/plotting/limits.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import matplotlib
 import numpy as np
diff --git a/madminer/plotting/morphing.py b/madminer/plotting/morphing.py
index 408013274..43c4dba56 100644
--- a/madminer/plotting/morphing.py
+++ b/madminer/plotting/morphing.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import matplotlib
 import numpy as np
diff --git a/madminer/plotting/uncertainties.py b/madminer/plotting/uncertainties.py
index f1a8f31b5..30f4b7e44 100644
--- a/madminer/plotting/uncertainties.py
+++ b/madminer/plotting/uncertainties.py
@@ -1,13 +1,10 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
 import logging
 import numpy as np
 from matplotlib import pyplot as plt, gridspec
 
+from ..sampling import SampleAugmenter
 from ..utils.morphing import NuisanceMorpher
 from ..utils.various import mdot, shuffle, sanitize_array
-from ..sampling import SampleAugmenter
 
 logger = logging.getLogger(__name__)
 
@@ -120,7 +117,7 @@ def plot_uncertainty(
     # Restrict nuisance parameters
     if systematics is not None:
         nuisance_parameters = []
-        for npar, (npar_syst, _, _) in six.iteritems(sa.nuisance_parameters):
+        for npar, (npar_syst, _, _) in sa.nuisance_parameters.items():
             if npar_syst in systematics:
                 nuisance_parameters.append(npar)
 
@@ -342,7 +339,7 @@ def plot_systematics(
 
     # Systematics
     n_systematics = len(sa.systematics) + 1
-    labels = list(six.iterkeys(sa.systematics)) + ["combined"]
+    labels = list(sa.systematics.keys()) + ["combined"]
 
     # Nuisance parameters
     n_nuisance_params = sa.n_nuisance_parameters
@@ -351,11 +348,11 @@ def plot_systematics(
     nuisance_toys = nuisance_toys.reshape(n_systematics, n_toys, n_nuisance_params)
 
     # Restrict nuisance parameters
-    all_nuisance_parameters = list(six.iterkeys(sa.nuisance_parameters))
-    for i_syst, syst_name in enumerate(six.iterkeys(sa.systematics)):
+    all_nuisance_parameters = list(sa.nuisance_parameters.keys())
+    for i_syst, syst_name in enumerate(sa.systematics.keys()):
         n_used = n_nuisance_params
         used_nuisance_parameters = []
-        for npar, (npar_syst, _, _) in six.iteritems(sa.nuisance_parameters):
+        for npar, (npar_syst, _, _) in sa.nuisance_parameters.items():
             if npar_syst == syst_name:
                 used_nuisance_parameters.append(npar)
 

From 96fd583bd3e358059e0f2a4a5ec3ce189e5c269a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 11:24:09 -0400
Subject: [PATCH 42/65] src: sampling module f-strings

---
 madminer/sampling/combine.py         |  3 +-
 madminer/sampling/sampleaugmenter.py | 74 ++++++++++++++--------------
 2 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/madminer/sampling/combine.py b/madminer/sampling/combine.py
index 3b4281227..4efc53b5b 100644
--- a/madminer/sampling/combine.py
+++ b/madminer/sampling/combine.py
@@ -78,7 +78,8 @@ def combine_and_shuffle(
 
     if len(input_filenames) != len(k_factors):
         raise RuntimeError(
-            "Inconsistent length of input filenames and k factors: %s vs %s", len(input_filenames), len(k_factors)
+            f"Inconsistent length of input filenames and k factors: "
+            f"{len(input_filenames)} vs {len(k_factors)}"
         )
 
     # Copy first file to output_filename
diff --git a/madminer/sampling/sampleaugmenter.py b/madminer/sampling/sampleaugmenter.py
index e16a0d228..6ed33a403 100644
--- a/madminer/sampling/sampleaugmenter.py
+++ b/madminer/sampling/sampleaugmenter.py
@@ -182,8 +182,8 @@ def sample_train_plain(
 
         # Save data
         if filename is not None and folder is not None:
-            np.save(folder + "/theta_" + filename + ".npy", theta)
-            np.save(folder + "/x_" + filename + ".npy", x)
+            np.save(f"{folder}/theta_{filename}.npy", theta)
+            np.save(f"{folder}/x_{filename}.npy", x)
 
         return x, theta, min(effective_n_samples)
 
@@ -327,9 +327,9 @@ def sample_train_local(
 
         # Save data
         if filename is not None and folder is not None:
-            np.save(folder + "/theta_" + filename + ".npy", theta)
-            np.save(folder + "/x_" + filename + ".npy", x)
-            np.save(folder + "/t_xz_" + filename + ".npy", t_xz)
+            np.save(f"{folder}/theta_{filename}.npy", theta)
+            np.save(f"{folder}/x_{filename}.npy", x)
+            np.save(f"{folder}/t_xz_{filename}.npy", t_xz)
 
         return x, theta, t_xz, min(effective_n_samples)
 
@@ -699,13 +699,13 @@ def sample_train_ratio(
 
         # Save data
         if filename is not None and folder is not None:
-            np.save(folder + "/theta0_" + filename + ".npy", theta0)
-            np.save(folder + "/theta1_" + filename + ".npy", theta1)
-            np.save(folder + "/x_" + filename + ".npy", x)
-            np.save(folder + "/y_" + filename + ".npy", y)
-            np.save(folder + "/r_xz_" + filename + ".npy", r_xz)
+            np.save(f"{folder}/theta0_{filename}.npy", theta0)
+            np.save(f"{folder}/theta1_{filename}.npy", theta1)
+            np.save(f"{folder}/x_{filename}.npy", x)
+            np.save(f"{folder}/y_{filename}.npy", y)
+            np.save(f"{folder}/r_xz_{filename}.npy", r_xz)
             if self.morpher is not None:
-                np.save(folder + "/t_xz_" + filename + ".npy", t_xz)
+                np.save(f"{folder}/t_xz_{filename}.npy", t_xz)
 
         if not return_individual_n_effective:
             n_effective = np.min(n_effective)
@@ -1029,13 +1029,13 @@ def sample_train_more_ratios(
 
         # Save data
         if filename is not None and folder is not None:
-            np.save(folder + "/theta0_" + filename + ".npy", theta0)
-            np.save(folder + "/theta1_" + filename + ".npy", theta1)
-            np.save(folder + "/x_" + filename + ".npy", x)
-            np.save(folder + "/y_" + filename + ".npy", y)
-            np.save(folder + "/r_xz_" + filename + ".npy", r_xz)
-            np.save(folder + "/t_xz0_" + filename + ".npy", t_xz0)
-            np.save(folder + "/t_xz1_" + filename + ".npy", t_xz1)
+            np.save(f"{folder}/theta0_{filename}.npy", theta0)
+            np.save(f"{folder}/theta1_{filename}.npy", theta1)
+            np.save(f"{folder}/x_{filename}.npy", x)
+            np.save(f"{folder}/y_{filename}.npy", y)
+            np.save(f"{folder}/r_xz_{filename}.npy", r_xz)
+            np.save(f"{folder}/t_xz0_{filename}.npy", t_xz0)
+            np.save(f"{folder}/t_xz1_{filename}.npy", t_xz1)
 
         return x, theta0, theta1, y, r_xz, t_xz0, t_xz1, min(min(n_effective_samples_0), min(n_effective_samples_1))
 
@@ -1146,8 +1146,8 @@ def sample_test(
 
         # Save data
         if filename is not None and folder is not None:
-            np.save(folder + "/theta_" + filename + ".npy", theta)
-            np.save(folder + "/x_" + filename + ".npy", x)
+            np.save(f"{folder}/theta_{filename}.npy", theta)
+            np.save(f"{folder}/x_{filename}.npy", x)
 
         return x, theta, min(n_effective_samples)
 
@@ -1675,9 +1675,8 @@ def _sample_set(
             # Check that we got 'em all, otherwise repeat
             if not np.all(done):
                 logger.debug(
-                    "  After full pass through event files, {} / {} samples not found, with u = {}".format(
-                        np.sum(np.invert(done)), done.size, u[np.invert(done)]
-                    )
+                    f"  After full pass through event files, {np.sum(np.invert(done))} / {done.size} "
+                    f"samples not found, with u = {u[np.invert(done)]}"
                 )
 
         n_eff_samples = 1.0 / max(1.0e-12, largest_event_probability)
@@ -1716,7 +1715,7 @@ def _calculate_augmented_data(
                 score = score.T  # (n_samples, n_gradients)
                 augmented_data.append(score)
             else:
-                raise ValueError("Unknown augmented data type {}".format(definition[0]))
+                raise ValueError(f"Unknown augmented data type {definition[0]}")
 
         return augmented_data
 
@@ -1815,11 +1814,11 @@ def _parse_theta(theta, n_samples):
                     prior_std = prior[2]
                     thetas_out.append(np.random.normal(loc=prior_mean, scale=prior_std, size=n_benchmarks))
                 else:
-                    raise ValueError("Unknown prior {}".format(prior))
+                    raise ValueError(f"Unknown prior {prior}")
             thetas_out = np.array(thetas_out).T
 
         else:
-            raise ValueError("Unknown theta specification {}".format(theta))
+            raise ValueError(f"Unknown theta specification {theta}")
 
         return thetas_out, n_samples_per_theta
 
@@ -1861,18 +1860,18 @@ def _parse_nu(self, nu, n_thetas):
                     prior_std = prior[2]
                     nu_out.append(np.random.normal(loc=prior_mean, scale=prior_std, size=n_thetas))
                 else:
-                    raise ValueError("Unknown prior {}".format(prior))
+                    raise ValueError(f"Unknown prior {prior}")
             nu_out = np.array(nu_out).T
 
         else:
-            raise ValueError("Unknown nu specification {}".format(nu))
+            raise ValueError(f"Unknown nu specification {nu}")
 
         return nu_out
 
     @staticmethod
     def _build_sets(thetas, nus):
         if len(nus) != len(thetas):
-            raise RuntimeError("Mismatching thetas and nus: {} vs {}".format(len(thetas), len(nus)))
+            raise RuntimeError(f"Mismatching thetas and nus: {len(thetas)} vs {len(nus)}")
 
         n_sets = max([len(param) for param in thetas + nus])
         sets = [[] for _ in range(n_sets)]
@@ -1883,9 +1882,8 @@ def _build_sets(thetas, nus):
 
             if n_theta_sets_before <= 0 or n_nu_sets_before <= 0:
                 raise RuntimeError(
-                    (
-                        "Inconsistent number of sets in _build_sets: thetas = {}, nus = {}, theta = {}, " "nu = {}"
-                    ).format(thetas, nus, theta, nu)
+                    f"Inconsistent number of sets in _build_sets: "
+                    f"thetas = {thetas}, nus = {nus}, theta = {theta}, nu = {nu}"
                 )
 
             for i_set in range(n_sets):
@@ -1900,19 +1898,19 @@ def _format_sampling(theta):
         elif theta[0] == "morphing_point":
             return str(theta[1])
         elif theta[0] == "benchmarks":
-            return "{} benchmarks, starting with {}".format(len(theta[1]), theta[1][:3])
+            return f"{len(theta[1])} benchmarks, starting with {theta[1][:3]}"
         elif theta[0] == "morphing_points":
-            return "{} morphing points, starting with {}".format(len(theta[1]), theta[1][:3])
+            return f"{len(theta[1])} morphing points, starting with {theta[1][:3]}"
         elif theta[0] == "random_morphing_points":
             prior_str = ""
             for i, (type_, arg0, arg1) in enumerate(theta[1][1]):
                 prior_str += "\n"
                 if type_ == "gaussian":
-                    prior_str += "  theta_{} ~ Gaussian with mean {} and std {}".format(i, arg0, arg1)
+                    prior_str += f"  theta_{i} ~ Gaussian with mean {arg0} and std {arg1}"
                 elif type_ == "flat":
-                    prior_str += "  theta_{} ~ flat from {} to {}".format(i, arg0, arg1)
+                    prior_str += f"  theta_{i} ~ flat from {arg0} to {arg1}"
 
             if theta[1][0] is None:
-                return "Maximally many random morphing points, drawn from the following priors:{}".format(prior_str)
+                return f"Maximally many random morphing points, drawn from the following priors: {prior_str}"
             else:
-                return "{} random morphing points, drawn from the following priors:{}".format(theta[1][0], prior_str)
+                return f"{theta[1][0]} random morphing points, drawn from the following priors: {prior_str}"

From d12dd2e96c4d36a0edddbbe466ac85f328fa2f85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 11:34:31 -0400
Subject: [PATCH 43/65] src: sampling module improve fmt

---
 madminer/sampling/combine.py         |  8 +++++++-
 madminer/sampling/parameters.py      |  4 ++--
 madminer/sampling/sampleaugmenter.py | 27 ++++++++++++++++-----------
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/madminer/sampling/combine.py b/madminer/sampling/combine.py
index 4efc53b5b..fdce943e1 100644
--- a/madminer/sampling/combine.py
+++ b/madminer/sampling/combine.py
@@ -14,15 +14,21 @@
 def _calculate_n_events(sampling_ids, n_benchmarks):
     if sampling_ids is None:
         return None, None
+
     unique, counts = np.unique(sampling_ids, return_counts=True)
     results = dict(zip(unique, counts))
+
     n_events_backgrounds = results.get(-1, 0)
     n_events_signal_per_benchmark = np.array([results.get(i, 0) for i in range(n_benchmarks)], dtype=np.int)
     return n_events_signal_per_benchmark, n_events_backgrounds
 
 
 def combine_and_shuffle(
-    input_filenames, output_filename, k_factors=None, overwrite_existing_file=True, recalculate_header=True
+    input_filenames,
+    output_filename,
+    k_factors=None,
+    overwrite_existing_file=True,
+    recalculate_header=True,
 ):
     """
     Combines multiple MadMiner files into one, and shuffles the order of the events.
diff --git a/madminer/sampling/parameters.py b/madminer/sampling/parameters.py
index 4a693e914..3e495ac76 100644
--- a/madminer/sampling/parameters.py
+++ b/madminer/sampling/parameters.py
@@ -109,7 +109,7 @@ def random_morphing_points(n_thetas, priors):
 def iid_nuisance_parameters(shape="gaussian", param0=0.0, param1=1.0):
     """
     Utility function to be used as input to various SampleAugmenter functions, specifying that nuisance parameters are
-    fixed at their nominal valuees.
+    fixed at their nominal values.
 
     Parameters
     ----------
@@ -134,7 +134,7 @@ def iid_nuisance_parameters(shape="gaussian", param0=0.0, param1=1.0):
 def nominal_nuisance_parameters():
     """
     Utility function to be used as input to various SampleAugmenter functions, specifying that nuisance parameters are
-    fixed at their nominal valuees.
+    fixed at their nominal values.
 
     Returns
     -------
diff --git a/madminer/sampling/sampleaugmenter.py b/madminer/sampling/sampleaugmenter.py
index 6ed33a403..0f272fc2c 100644
--- a/madminer/sampling/sampleaugmenter.py
+++ b/madminer/sampling/sampleaugmenter.py
@@ -22,8 +22,8 @@ class SampleAugmenter(DataAnalyzer):
     (related) tasks: unweighting, i.e. the creation of samples that do not carry individual weights but follow some
     distribution, and the extraction of the joint likelihood ratio and / or joint score (the "augmented data").
 
-    After inializing `SampleAugmenter` with the filename of a MadMiner file, this is done with a single function call.
-    Depending on the downstream infference algorithm, there are different possibilities:
+    After initializing `SampleAugmenter` with the filename of a MadMiner file, this is done with a single function call.
+    Depending on the downstream inference algorithm, there are different possibilities:
 
     * `SampleAugmenter.sample_train_plain()` creates plain training samples without augmented data.
     * `SampleAugmenter.sample_train_local()` creates training samples for local methods based on the score,
@@ -685,6 +685,7 @@ def sample_train_ratio(
             t_xz = np.vstack([t_xz0, t_xz1])
         else:
             t_xz = None
+
         theta0 = np.vstack([theta0_0, theta0_1])
         theta1 = np.vstack([theta1_0, theta1_1])
         y = np.zeros(x.shape[0])
@@ -1168,7 +1169,7 @@ def cross_sections(self, theta, nu=None):
             Tuple (type, value) that defines the nuisance parameter point or prior over nuisance parameter points at
             which the cross section is calculated. Pass the output of the functions `benchmark()`,
             `benchmarks()`, `morphing_point()`, `morphing_points()`, or
-            `random_morphing_points()`. Default valuee: None.
+            `random_morphing_points()`. Default value: None.
 
         Returns
         -------
@@ -1247,17 +1248,20 @@ def _sample(
             If True, any joint score in the augmented data definitions is also calculated with respect to the nuisance
             parameters. Default value: True.
 
-        use_train_events : bool, optional
-            Decides whether to use the train or test split of the events. Default value: True.
+        partition : {"train", "test", "validation", "all"}, optional
+            Which event partition to use. Default value: "train".
 
         test_split : float or None, optional
             Fraction of events reserved for the evaluation sample (that will not be used for any training samples).
             Default value: 0.2.
 
+        validation_split : float or None, optional
+            Fraction of events reserved for testing. Default value: 0.2.
+
         n_processes : None or int, optional
             If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case,
-            n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value:
-            1.
+            n_workers sets the number of jobs running in parallel, and None will use the number of CPUs.
+            Default value: 1.
 
         update_patience : float, optional
             Wait time (in s) between log update checks if n_workers > 1 (or None). Default value: 0.01
@@ -1271,6 +1275,9 @@ def _sample(
             and ignore other events. This can help to reduce statistical effects caused by a small number of events
             with very large weights obtained by the morphing procedure. Default value: None
 
+        double_precision : bool, optional
+            Use double floating-point precision. Default value: False.
+
         Returns
         -------
         x :  ndarray
@@ -1720,8 +1727,7 @@ def _calculate_augmented_data(
         return augmented_data
 
     def _combine_thetas_nus(self, all_thetas, all_nus):
-        n_thetas = len(all_thetas)
-        assert n_thetas == len(all_nus)
+        assert len(all_thetas) == len(all_nus)
 
         # all_nus is a list of a list of (None or ndarray)
         # Figure out if there's anything nontrivial in there
@@ -1870,8 +1876,7 @@ def _parse_nu(self, nu, n_thetas):
 
     @staticmethod
     def _build_sets(thetas, nus):
-        if len(nus) != len(thetas):
-            raise RuntimeError(f"Mismatching thetas and nus: {len(thetas)} vs {len(nus)}")
+        assert len(thetas) == len(nus)
 
         n_sets = max([len(param) for param in thetas + nus])
         sets = [[] for _ in range(n_sets)]

From 60d4607c67dc44c67ebe5236f556e370dd9da696 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 11:35:32 -0400
Subject: [PATCH 44/65] src: sampling module remove 'future'

---
 madminer/sampling/combine.py         | 2 --
 madminer/sampling/parameters.py      | 2 --
 madminer/sampling/sampleaugmenter.py | 2 --
 3 files changed, 6 deletions(-)

diff --git a/madminer/sampling/combine.py b/madminer/sampling/combine.py
index fdce943e1..45f120a0c 100644
--- a/madminer/sampling/combine.py
+++ b/madminer/sampling/combine.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 
diff --git a/madminer/sampling/parameters.py b/madminer/sampling/parameters.py
index 3e495ac76..226e18ad2 100644
--- a/madminer/sampling/parameters.py
+++ b/madminer/sampling/parameters.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 
diff --git a/madminer/sampling/sampleaugmenter.py b/madminer/sampling/sampleaugmenter.py
index 0f272fc2c..0db78af4b 100644
--- a/madminer/sampling/sampleaugmenter.py
+++ b/madminer/sampling/sampleaugmenter.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import time
 import logging
 import numpy as np

From 68fc6fd52317104f5e10671456ad2bfba2b155ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 24 Sep 2020 11:17:59 -0400
Subject: [PATCH 45/65] src: utils module f-strings

---
 madminer/utils/histo.py    |  5 ++---
 madminer/utils/morphing.py |  5 ++---
 madminer/utils/various.py  | 14 +++++++++-----
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/madminer/utils/histo.py b/madminer/utils/histo.py
index 83ff2ba7a..78c870420 100644
--- a/madminer/utils/histo.py
+++ b/madminer/utils/histo.py
@@ -36,9 +36,8 @@ def __init__(self, x, weights=None, bins=20, epsilon=0.0):
 
         if weights is not None:
             weights = weights.flatten()
-            assert weights.shape == (self.n_samples,), "Inconsistent weight shape {} should be {}".format(
-                weights.shape, (self.n_samples,)
-            )
+            assert weights.shape == (self.n_samples,), \
+                f"Inconsistent weight shape {weights.shape} should be {(self.n_samples,)}"
 
         logger.debug("Creating histogram:")
         logger.debug("  Samples:       %s", self.n_samples)
diff --git a/madminer/utils/morphing.py b/madminer/utils/morphing.py
index f685de47c..e3f268c83 100644
--- a/madminer/utils/morphing.py
+++ b/madminer/utils/morphing.py
@@ -142,9 +142,8 @@ def find_components(self, max_overall_power=4):
         for max_power in self.parameter_max_power:
             if n_regions != len(max_power):
                 raise RuntimeError(
-                    "Parameters have different number of partitions of max powers: {} {}".format(
-                        max_overall_power, self.parameter_max_power
-                    )
+                    f"Parameters have different number of partitions of max powers: "
+                    f"{max_overall_power} vs {self.parameter_max_power}"
                 )
 
         # Go through regions and finds components for each
diff --git a/madminer/utils/various.py b/madminer/utils/various.py
index 9c2b4a308..7fb8387ff 100644
--- a/madminer/utils/various.py
+++ b/madminer/utils/various.py
@@ -25,7 +25,8 @@ def call_command(cmd, log_file=None, return_std=False):
 
         if exitcode != 0:
             raise RuntimeError(
-                "Calling command {} returned exit code {}. Output in file {}.".format(cmd, exitcode, log_file)
+                f"Calling command {cmd} returned exit code {exitcode}. "
+                f"Output in file {log_file}."
             )
     else:
         proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
@@ -34,9 +35,12 @@ def call_command(cmd, log_file=None, return_std=False):
 
         if exitcode != 0:
             raise RuntimeError(
-                "Calling command {} returned exit code {}.\n\nStd output:\n\n{}Error output:\n\n{}".format(
-                    cmd, exitcode, out, err
-                )
+                f"Calling command {cmd} returned exit code {exitcode}."
+                f"\n\n"
+                f"Std output: {out}"
+                f"\n\n"
+                f"Error output: {err}"
+                f"\n\n"
             )
 
         if return_std:
@@ -63,7 +67,7 @@ def create_missing_folders(folders):
             os.makedirs(folder)
 
         elif not os.path.isdir(folder):
-            raise OSError("Path {} exists, but is no directory!".format(folder))
+            raise OSError(f"Path {folder} exists, but is no directory!")
 
 
 def format_benchmark(parameters, precision=2):

From 962d978b204da151e82c99e480a4e2935c73c7e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 24 Sep 2020 11:25:41 -0400
Subject: [PATCH 46/65] src: utils module improve fmt

---
 madminer/utils/histo.py    | 21 ++++++++++++++++++---
 madminer/utils/morphing.py | 20 ++++++--------------
 madminer/utils/various.py  |  4 +++-
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/madminer/utils/histo.py b/madminer/utils/histo.py
index 78c870420..449b4032b 100644
--- a/madminer/utils/histo.py
+++ b/madminer/utils/histo.py
@@ -123,9 +123,20 @@ def _adaptive_binning(x, n_bins, weights=None, lower_cutoff_percentile=0.1, uppe
     def _fit(self, x, weights=None, epsilon=0.0):
         # Fill histograms
         ranges = [(edges[0], edges[-1]) for edges in self.edges]
-        histo, _ = np.histogramdd(x, bins=self.edges, range=ranges, normed=False, weights=weights)
+
+        histo, _ = np.histogramdd(
+            x,
+            bins=self.edges,
+            range=ranges,
+            normed=False,
+            weights=weights,
+        )
         histo_w2, _ = np.histogramdd(
-            x, bins=self.edges, range=ranges, normed=False, weights=None if weights is None else weights ** 2
+            x,
+            bins=self.edges,
+            range=ranges,
+            normed=False,
+            weights=None if weights is None else weights ** 2,
         )
 
         # Uncertainties
@@ -154,10 +165,12 @@ def _fit(self, x, weights=None, epsilon=0.0):
                     axis_edges[-1], axis_edges[-1] + 2.0 * (axis_edges[-1] - axis_edges[-2])
                 )  # Last bin is treated as at most twice as big as second-to-last
             modified_histo_edges.append(axis_edges)
+
         # Calculate cell volumes
         bin_widths = [axis_edges[1:] - axis_edges[:-1] for axis_edges in modified_histo_edges]
         shape = tuple(self.n_bins)
         volumes = np.ones(shape)
+
         for obs in range(self.n_observables):
             # Broadcast bin widths to array with shape like volumes
             bin_widths_broadcasted = np.ones(shape)
@@ -183,7 +196,9 @@ def _report_binning(self):
 
     def _report_uncertainties(self):
         rel_uncertainties = np.where(
-            self.histo.flatten() > 0.0, self.histo_uncertainties.flatten() / self.histo.flatten(), np.nan
+            self.histo.flatten() > 0.0,
+            self.histo_uncertainties.flatten() / self.histo.flatten(),
+            np.nan,
         )
         if np.nanmax(rel_uncertainties) > 0.5:
             logger.debug(
diff --git a/madminer/utils/morphing.py b/madminer/utils/morphing.py
index e3f268c83..58f83af53 100644
--- a/madminer/utils/morphing.py
+++ b/madminer/utils/morphing.py
@@ -103,7 +103,6 @@ def set_components(self, components):
             scales a given component. For instance, a typical signal, interference, background situation with one
             parameter might be described by the components [[2], [1], [0]].
 
-
         Returns
         -------
             None
@@ -227,7 +226,7 @@ def optimize_basis(
     ):
 
         """
-        Optimizes the morphing basis. If either fixed_benchmarks_from_maxminer or fixed_benchmarks_numpy are not
+        Optimizes the morphing basis. If either fixed_benchmarks_from_madminer or fixed_benchmarks_numpy are not
         None, then these will be used as fixed basis points and only the remaining part of the basis will be optimized.
 
         Parameters
@@ -345,7 +344,6 @@ def calculate_morphing_matrix(self, basis=None):
         morphing_matrix : ndarray
             Morphing matrix with shape `(n_basis_benchmarks, n_components)`
 
-
         """
 
         # Check all data is there
@@ -394,9 +392,7 @@ def calculate_morphing_matrix(self, basis=None):
             morphing_submatrix = morphing_submatrix.T
             morphing_matrix[i * n_benchmarks_this_basis : (i + 1) * n_benchmarks_this_basis] = morphing_submatrix
 
-        morphing_matrix = morphing_matrix.T
-
-        return morphing_matrix
+        return morphing_matrix.T
 
     def calculate_morphing_weights(self, theta, basis=None, morphing_matrix=None):
 
@@ -454,9 +450,7 @@ def calculate_morphing_weights(self, theta, basis=None, morphing_matrix=None):
         component_weights = np.array(component_weights)
 
         # Transform to basis weights
-        weights = morphing_matrix.T.dot(component_weights)
-
-        return weights
+        return morphing_matrix.T.dot(component_weights)
 
     def calculate_morphing_weight_gradient(self, theta, basis=None, morphing_matrix=None):
 
@@ -522,11 +516,8 @@ def calculate_morphing_weight_gradient(self, theta, basis=None, morphing_matrix=
                 component_weight_gradients[c, i] = factor
 
         # Transform to basis weights
-        weight_gradients = morphing_matrix.T.dot(
-            component_weight_gradients
-        ).T  # Shape (n_parameters, n_benchmarks_phys)
-
-        return weight_gradients
+        # Shape (n_parameters, n_benchmarks_phys)
+        return morphing_matrix.T.dot(component_weight_gradients).T
 
     def evaluate_morphing(self, basis=None, morphing_matrix=None, n_test_thetas=100, return_weights_and_thetas=False):
 
@@ -663,6 +654,7 @@ def __init__(self, nuisance_parameters_from_madminer, benchmark_names, reference
         self.i_benchmarks_pos = []
         self.i_benchmarks_neg = []
         self.degrees = []
+
         for key, value in six.iteritems(self.nuisance_parameters):
             self.i_benchmarks_pos.append(benchmark_names.index(value[1]))
             if value[2] is None:
diff --git a/madminer/utils/various.py b/madminer/utils/various.py
index 7fb8387ff..62abb0763 100644
--- a/madminer/utils/various.py
+++ b/madminer/utils/various.py
@@ -112,13 +112,13 @@ def shuffle(*arrays):
 
         shuffled_a = a[permutation]
         shuffled_arrays.append(shuffled_a)
-        a = None
 
     return shuffled_arrays
 
 
 def restrict_samplesize(n, *arrays):
     restricted_arrays = []
+
     for i, a in enumerate(arrays):
         if a is None:
             restricted_arrays.append(None)
@@ -265,8 +265,10 @@ def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False
     # Input
     values = np.array(values, dtype=np.float64)
     quantiles = np.array(quantiles)
+
     if sample_weight is None:
         sample_weight = np.ones(len(values))
+
     sample_weight = np.array(sample_weight, dtype=np.float64)
     assert np.all(quantiles >= 0.0) and np.all(quantiles <= 1.0), "quantiles should be in [0, 1]"
 

From 7e27064bd46bfb63e6584787a54cdd56d28590ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 24 Sep 2020 11:28:55 -0400
Subject: [PATCH 47/65] src: utils module remove 'six'

---
 madminer/utils/histo.py    |  4 +---
 madminer/utils/morphing.py | 13 +++++--------
 madminer/utils/particle.py |  4 +---
 madminer/utils/various.py  | 19 ++++++++-----------
 4 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/madminer/utils/histo.py b/madminer/utils/histo.py
index 449b4032b..b68f341de 100644
--- a/madminer/utils/histo.py
+++ b/madminer/utils/histo.py
@@ -1,7 +1,5 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import numpy as np
 import logging
+import numpy as np
 from madminer.utils.various import weighted_quantile
 
 logger = logging.getLogger(__name__)
diff --git a/madminer/utils/morphing.py b/madminer/utils/morphing.py
index 58f83af53..3538a39e8 100644
--- a/madminer/utils/morphing.py
+++ b/madminer/utils/morphing.py
@@ -1,11 +1,8 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
+import itertools
 import logging
 import numpy as np
-from collections import OrderedDict
-import itertools
 
+from collections import OrderedDict
 from madminer.utils.various import sanitize_array
 
 logger = logging.getLogger(__name__)
@@ -200,7 +197,7 @@ def set_basis(self, basis_from_madminer=None, basis_numpy=None, morphing_matrix=
 
         if basis_from_madminer is not None:
             self.basis = []
-            for bname, benchmark_in in six.iteritems(basis_from_madminer):
+            for bname, benchmark_in in basis_from_madminer.items():
                 self.basis.append([benchmark_in[key] for key in self.parameter_names])
             self.basis = np.array(self.basis)
         elif basis_numpy is not None:
@@ -268,7 +265,7 @@ def optimize_basis(
         if fixed_benchmarks_from_madminer is not None:
             fixed_benchmarks = []
             fixed_benchmark_names = []
-            for bname, benchmark_in in six.iteritems(fixed_benchmarks_from_madminer):
+            for bname, benchmark_in in fixed_benchmarks_from_madminer.items():
                 fixed_benchmark_names.append(bname)
                 fixed_benchmarks.append([benchmark_in[key] for key in self.parameter_names])
             fixed_benchmarks = np.array(fixed_benchmarks)
@@ -655,7 +652,7 @@ def __init__(self, nuisance_parameters_from_madminer, benchmark_names, reference
         self.i_benchmarks_neg = []
         self.degrees = []
 
-        for key, value in six.iteritems(self.nuisance_parameters):
+        for key, value in self.nuisance_parameters.items():
             self.i_benchmarks_pos.append(benchmark_names.index(value[1]))
             if value[2] is None:
                 self.degrees.append(1)
diff --git a/madminer/utils/particle.py b/madminer/utils/particle.py
index 56812cbe6..504674a56 100644
--- a/madminer/utils/particle.py
+++ b/madminer/utils/particle.py
@@ -1,7 +1,5 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-from skhep.math.vectors import LorentzVector
 import logging
+from skhep.math.vectors import LorentzVector
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/various.py b/madminer/utils/various.py
index 62abb0763..b01f7bfc8 100644
--- a/madminer/utils/various.py
+++ b/madminer/utils/various.py
@@ -1,15 +1,12 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
 import logging
-import os
-import stat
-from subprocess import Popen, PIPE
-import io
+import gzip
 import numpy as np
+import os
 import shutil
+import stat
+
 from contextlib import contextmanager
-import gzip
+from subprocess import Popen, PIPE
 
 logger = logging.getLogger(__name__)
 
@@ -18,7 +15,7 @@
 
 def call_command(cmd, log_file=None, return_std=False):
     if log_file is not None:
-        with io.open(log_file, "wb") as log:
+        with open(log_file, "wb") as log:
             proc = Popen(cmd, stdout=log, stderr=log, shell=True)
             _ = proc.communicate()
             exitcode = proc.returncode
@@ -73,7 +70,7 @@ def create_missing_folders(folders):
 def format_benchmark(parameters, precision=2):
     output = ""
 
-    for i, (key, value) in enumerate(six.iteritems(parameters)):
+    for i, (key, value) in enumerate(parameters.items()):
         if i > 0:
             output += ", "
 
@@ -160,7 +157,7 @@ def load_and_check(filename, warning_threshold=1.0e9, memmap_files_larger_than_g
     if filename is None:
         return None
 
-    if not isinstance(filename, six.string_types):
+    if not isinstance(filename, str):
         data = filename
         memmap = False
     else:

From 51865398d49121f017b1a6ca249f8689d4123d3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 11:41:34 -0400
Subject: [PATCH 48/65] src: utils/interfaces module f-strings

---
 madminer/utils/interfaces/delphes.py       |  15 +-
 madminer/utils/interfaces/delphes_root.py  |   8 +-
 madminer/utils/interfaces/hepmc.py         |   2 +-
 madminer/utils/interfaces/lhe.py           |  18 +--
 madminer/utils/interfaces/madminer_hdf5.py |   2 +-
 madminer/utils/interfaces/mg.py            | 161 +++++++++------------
 madminer/utils/interfaces/mg_cards.py      |  43 +++---
 7 files changed, 109 insertions(+), 140 deletions(-)

diff --git a/madminer/utils/interfaces/delphes.py b/madminer/utils/interfaces/delphes.py
index c75c408a0..aeacc7d15 100644
--- a/madminer/utils/interfaces/delphes.py
+++ b/madminer/utils/interfaces/delphes.py
@@ -20,7 +20,7 @@ def run_delphes(
 ):
     """ Runs Delphes on a HepMC sample """
 
-    # Untar event file
+    # Unzip event file
     filename, extension = os.path.splitext(hepmc_sample_filename)
     to_delete = None
     if extension == ".gz":
@@ -38,9 +38,9 @@ def run_delphes(
 
         for i in range(1, 1000):
             if i == 1:
-                filename_candidate = filename_prefix + "_delphes.root"
+                filename_candidate = f"{filename_prefix}_delphes.root"
             else:
-                filename_candidate = filename_prefix + "_delphes_" + str(i) + ".root"
+                filename_candidate = f"{filename_prefix}_delphes_{i}.root"
 
             if not os.path.exists(filename_candidate):
                 delphes_sample_filename = filename_candidate
@@ -61,13 +61,14 @@ def run_delphes(
 
     # Call Delphes
     _ = call_command(
-        "{}{}/DelphesHepMC {} {} {}".format(
-            initial_command, delphes_directory, delphes_card_filename, delphes_sample_filename, hepmc_sample_filename
-        ),
+        f"{initial_command}{delphes_directory}/DelphesHepMC "
+        f"{delphes_card_filename} "
+        f"{delphes_sample_filename} "
+        f"{hepmc_sample_filename}",
         log_file=log_file,
     )
 
-    # Delete untarred file
+    # Delete unzipped file
     if to_delete is not None:
         logger.debug("Deleting %s", to_delete)
         os.remove(to_delete)
diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py
index f8d15dfa8..ec5083985 100644
--- a/madminer/utils/interfaces/delphes_root.py
+++ b/madminer/utils/interfaces/delphes_root.py
@@ -285,10 +285,10 @@ def _get_particles_truth(tree, pt_min, eta_max, included_pdgids=None):
 
 
 def _get_particles_charged(tree, name, mass, pdgid_positive_charge, pt_min, eta_max):
-    pts = tree.array(name + ".PT")
-    etas = tree.array(name + ".Eta")
-    phis = tree.array(name + ".Phi")
-    charges = tree.array(name + ".Charge")
+    pts = tree.array(f"{name}.PT")
+    etas = tree.array(f"{name}.Eta")
+    phis = tree.array(f"{name}.Phi")
+    charges = tree.array(f"{name}.Charge")
 
     all_particles = []
 
diff --git a/madminer/utils/interfaces/hepmc.py b/madminer/utils/interfaces/hepmc.py
index 338a46b3d..0a52c9611 100644
--- a/madminer/utils/interfaces/hepmc.py
+++ b/madminer/utils/interfaces/hepmc.py
@@ -9,7 +9,7 @@
 
 
 def extract_weight_order(filename, default_weight_label=None):
-    # Untar event file
+    # Unzip event file
     new_filename, extension = os.path.splitext(filename)
     if extension == ".gz":
         if not os.path.exists(new_filename):
diff --git a/madminer/utils/interfaces/lhe.py b/madminer/utils/interfaces/lhe.py
index 4575b101e..c4733f584 100644
--- a/madminer/utils/interfaces/lhe.py
+++ b/madminer/utils/interfaces/lhe.py
@@ -273,7 +273,7 @@ def parse_lhe_file(
             elif isinstance(processing, float):
                 output_weights[nuisance_benchmark0] = processing * weights_all_events[weight_name0]
             else:
-                raise RuntimeError("Unknown nuisance processiing {}".format(processing))
+                raise RuntimeError(f"Unknown nuisance processing {processing}")
 
             # Store second benchmark associated with nuisance param
             if nuisance_benchmark1 is None or weight_name1 is None:
@@ -283,7 +283,7 @@ def parse_lhe_file(
             elif isinstance(processing, float):
                 output_weights[nuisance_benchmark1] = processing * weights_all_events[weight_name1]
             else:
-                raise RuntimeError("Unknown nuisance processing {}".format(processing))
+                raise RuntimeError(f"Unknown nuisance processing {processing}")
 
     return observations_dict, output_weights
 
@@ -532,8 +532,8 @@ def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_de
     syst_type = systematics_definition[0]
 
     if syst_type == "norm":
-        nuisance_param_name = "{}_nuisance_param_0".format(systematics_name)
-        benchmark_name = "{}_benchmark_0".format(nuisance_param_name)
+        nuisance_param_name = f"{systematics_name}_nuisance_param_0"
+        benchmark_name = f"{nuisance_param_name}_benchmark_0"
         nuisance_param_definition = (benchmark_name, None), (None, None), systematics_definition[1]
         return {nuisance_param_name: nuisance_param_definition}
 
@@ -596,7 +596,7 @@ def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_de
 
                     # Matching time!
                     if approx_equal(weight_mur, mur) and approx_equal(weight_muf, muf):
-                        benchmark_name = "{}_nuisance_param_0_benchmark_{}".format(systematics_name, k)
+                        benchmark_name = f"{systematics_name}_nuisance_param_0_benchmark_{k}"
                         nuisance_param_definition_parts.append((benchmark_name, weight_id))
                         break
 
@@ -614,7 +614,7 @@ def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_de
             return {}
         else:
             # Output
-            nuisance_param_name = "{}_nuisance_param_0".format(systematics_name)
+            nuisance_param_name = f"{systematics_name}_nuisance_param_0"
             if len(nuisance_param_definition_parts) > 1:
                 nuisance_dict = {
                     nuisance_param_name: (nuisance_param_definition_parts[0], nuisance_param_definition_parts[1], None)
@@ -657,8 +657,8 @@ def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_de
                 logger.debug("Found PDF weight %s / %s", weight_id, weight_pdf)
 
                 # Add every PDF Hessian direction to nuisance parameters
-                nuisance_param_name = "{}_nuisance_param_{}".format(systematics_name, i)
-                benchmark_name = "{}_benchmark_0".format(nuisance_param_name)
+                nuisance_param_name = f"{systematics_name}_nuisance_param_{i}"
+                benchmark_name = f"{nuisance_param_name}_benchmark_0"
                 nuisance_dict[nuisance_param_name] = (benchmark_name, weight_id), (None, None), None
 
         # Check that everything was found
@@ -854,7 +854,7 @@ def _parse_lhe_file_with_bad_chars(filename):
 
 
 def _untar_and_parse_lhe_file(filename, tags=None):
-    # Untar event file
+    # Unzip event file
     new_filename, extension = os.path.splitext(filename)
     if extension == ".gz":
         if not os.path.exists(new_filename):
diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py
index 325243ce9..35f8ac32b 100644
--- a/madminer/utils/interfaces/madminer_hdf5.py
+++ b/madminer/utils/interfaces/madminer_hdf5.py
@@ -711,7 +711,7 @@ def _load_systematics(filename):
                     syst_data[1] = str(syst_data[1])
                     syst_data[2] = str(syst_data[2])
                 else:
-                    raise RuntimeError("Error while reading systematics from HDF5 file: {}".format(syst_data))
+                    raise RuntimeError(f"Error while reading systematics from HDF5 file: {syst_data}")
                 systematics[name] = tuple(syst_data)
 
         except KeyError:
diff --git a/madminer/utils/interfaces/mg.py b/madminer/utils/interfaces/mg.py
index 85f067ac9..b0489ac43 100644
--- a/madminer/utils/interfaces/mg.py
+++ b/madminer/utils/interfaces/mg.py
@@ -70,31 +70,27 @@ def generate_mg_process(
         copy_ufo_model(ufo_model_directory, mg_directory)
 
     # MG commands
-    temp_proc_card_file = temp_directory + "/generate.mg5"
+    temp_proc_card_file = f"{temp_directory}/generate.mg5"
     shutil.copyfile(proc_card_file, temp_proc_card_file)
 
     with open(temp_proc_card_file, "a") as myfile:
-        myfile.write("\n\noutput " + mg_process_directory)
+        myfile.write(f"\n")
+        myfile.write(f"\n")
+        myfile.write(f"output {mg_process_directory}")
 
     # Call MG5
-    if initial_command is None:
-        initial_command = ""
-    else:
-        initial_command = initial_command + "; "
+    initial_command = f"{initial_command}; " if initial_command else ""
 
     # Explicitly call Python 2 if necessary
     if explicit_python_call:
-        python_call = python_executable + " " if python_executable is not None else "python2.7 "
+        python_call = f"{python_executable} "  if python_executable is not None else "python2.7 "
     else:
         python_call = ""
 
-    logger.info(
-        "Calling MadGraph: %s", initial_command + python_call + mg_directory + "/bin/mg5_aMC " + temp_proc_card_file
-    )
+    command = f"{initial_command}{python_call}{mg_directory}/bin/mg5_aMC {temp_proc_card_file}"
+    logger.info(f"Calling MadGraph: {command}")
 
-    _ = call_command(
-        initial_command + python_call + mg_directory + "/bin/mg5_aMC " + temp_proc_card_file, log_file=log_file
-    )
+    _ = call_command(cmd=command, log_file=log_file)
 
 
 def setup_mg_with_scripts(
@@ -198,19 +194,19 @@ def setup_mg_with_scripts(
     # Find filenames for process card and script
     if proc_card_filename_from_mgprocdir is None:
         for i in range(1000):
-            proc_card_filename_from_mgprocdir = "/Cards/start_event_generation_{}.mg5".format(i)
-            if not os.path.isfile(mg_process_directory + "/" + proc_card_filename_from_mgprocdir):
+            proc_card_filename_from_mgprocdir = f"/Cards/start_event_generation_{i}.mg5"
+            if not os.path.isfile(f"{mg_process_directory}/{proc_card_filename_from_mgprocdir}"):
                 break
     else:
         proc_card_filename = mg_process_directory + "/" + proc_card_filename_from_mgprocdir
 
     if script_file_from_mgprocdir is None:
         for i in range(1000):
-            script_file = mg_process_directory + "/madminer/scripts/madminer_run_{}.sh".format(i)
+            script_file = f"{mg_process_directory}/madminer/scripts/madminer_run_{i}.sh"
             if not os.path.isfile(script_file):
                 break
     else:
-        script_file = mg_process_directory + "/" + script_file_from_mgprocdir
+        script_file = f"{mg_process_directory}/{script_file_from_mgprocdir}"
 
     script_filename = os.path.basename(script_file)
 
@@ -243,48 +239,34 @@ def setup_mg_with_scripts(
     #  Card copying commands
     copy_commands = ""
     if run_card_file_from_mgprocdir is not None:
-        copy_commands += "cp {}/{} {}{}\n".format(
-            mg_process_directory_placeholder,
-            run_card_file_from_mgprocdir,
-            mg_process_directory_placeholder,
-            "/Cards/run_card.dat",
-        )
+        copy_commands += f"cp " \
+            f"{mg_process_directory_placeholder}/{run_card_file_from_mgprocdir} " \
+            f"{mg_process_directory_placeholder}/Cards/run_card.dat\n"
+
     if param_card_file_from_mgprocdir is not None:
-        copy_commands += "cp {}/{} {}{}\n".format(
-            mg_process_directory_placeholder,
-            param_card_file_from_mgprocdir,
-            mg_process_directory_placeholder,
-            "/Cards/param_card.dat",
-        )
+        copy_commands += f"cp " \
+            f"{mg_process_directory_placeholder}/{param_card_file_from_mgprocdir} " \
+            f"{mg_process_directory_placeholder}/Cards/param_card.dat\n"
+
     if reweight_card_file_from_mgprocdir is not None and not is_background:
-        copy_commands += "cp {}/{} {}{}\n".format(
-            mg_process_directory_placeholder,
-            reweight_card_file_from_mgprocdir,
-            mg_process_directory_placeholder,
-            "/Cards/reweight_card.dat",
-        )
+        copy_commands += f"cp " \
+            f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " \
+            f"{mg_process_directory_placeholder}/Cards/reweight_card.dat\n"
+
     if pythia8_card_file_from_mgprocdir is not None and order == "LO":
-        copy_commands += "cp {}/{} {}{}\n".format(
-            mg_process_directory_placeholder,
-            pythia8_card_file_from_mgprocdir,
-            mg_process_directory_placeholder,
-            "/Cards/pythia8_card.dat",
-        )
-    elif pythia8_card_file_from_mgprocdir is not None and order == "NLO":
-        copy_commands += "cp {}/{} {}{}\n".format(
-            mg_process_directory_placeholder,
-            pythia8_card_file_from_mgprocdir,
-            mg_process_directory_placeholder,
-            "/Cards/shower_card.dat",
-        )
+        copy_commands += f"cp " \
+            f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " \
+            f"{mg_process_directory_placeholder}/Cards/pythia8_card.dat\n"
+
+    if pythia8_card_file_from_mgprocdir is not None and order == "NLO":
+        copy_commands += f"cp " \
+            f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " \
+            f"{mg_process_directory_placeholder}/Cards/shower_card.dat\n"
 
     if configuration_file_from_mgprocdir is not None:
-        copy_commands += "cp {}/{} {}{}\n".format(
-            mg_process_directory_placeholder,
-            configuration_file_from_mgprocdir,
-            mg_process_directory_placeholder,
-            "/Cards/me5_configuration.txt",
-        )
+        copy_commands += f"cp " \
+            f"{mg_process_directory_placeholder}/{configuration_file_from_mgprocdir} " \
+            f"{mg_process_directory_placeholder}/Cards/me5_configuration.txt\n"
 
     # Replace environment variable in proc card
     replacement_command = """sed -e 's@\$mgprocdir@'"$mgprocdir"'@' {}/{} > {}/{}""".format(
@@ -323,13 +305,11 @@ def setup_mg_with_scripts(
     make_file_executable(script_file)
 
     # How to call it from master script
-    call_placeholder = "{}/{} {} {} {}".format(
-        mg_process_directory_placeholder,
-        script_file_from_mgprocdir,
-        mg_directory_placeholder,
-        mg_process_directory_placeholder,
-        log_dir_placeholder,
-    )
+    call_placeholder = \
+        f"{mg_process_directory_placeholder}/{script_file_from_mgprocdir} " \
+        f"{mg_directory_placeholder} " \
+        f"{mg_process_directory_placeholder} " \
+        f"{log_dir_placeholder}"
 
     return call_placeholder
 
@@ -415,22 +395,22 @@ def run_mg(
 
     # Copy cards
     if run_card_file is not None:
-        shutil.copyfile(run_card_file, mg_process_directory + "/Cards/run_card.dat")
+        shutil.copyfile(run_card_file, f"{mg_process_directory}/Cards/run_card.dat")
     if param_card_file is not None:
-        shutil.copyfile(param_card_file, mg_process_directory + "/Cards/param_card.dat")
+        shutil.copyfile(param_card_file, f"{mg_process_directory}/Cards/param_card.dat")
     if reweight_card_file is not None and not is_background:
-        shutil.copyfile(reweight_card_file, mg_process_directory + "/Cards/reweight_card.dat")
+        shutil.copyfile(reweight_card_file, f"{mg_process_directory}/Cards/reweight_card.dat")
     if pythia8_card_file is not None and order == "LO":
-        shutil.copyfile(pythia8_card_file, mg_process_directory + "/Cards/pythia8_card.dat")
+        shutil.copyfile(pythia8_card_file, f"{mg_process_directory}/Cards/pythia8_card.dat")
     if pythia8_card_file is not None and order == "NLO":
-        shutil.copyfile(pythia8_card_file, mg_process_directory + "/Cards/shower_card.dat")
+        shutil.copyfile(pythia8_card_file, f"{mg_process_directory}/Cards/shower_card.dat")
     if configuration_card_file is not None:
-        shutil.copyfile(configuration_card_file, mg_process_directory + "/Cards/me5_configuration.txt")
+        shutil.copyfile(configuration_card_file, f"{mg_process_directory}/Cards/me5_configuration.txt")
 
     # Find filenames for process card and script
     if proc_card_filename is None:
         for i in range(1000):
-            proc_card_filename = mg_process_directory + "/Cards/start_event_generation_{}.mg5".format(i)
+            proc_card_filename = f"{mg_process_directory}/Cards/start_event_generation_{i}.mg5"
             if not os.path.isfile(proc_card_filename):
                 break
 
@@ -454,20 +434,18 @@ def run_mg(
         file.write(mg_commands)
 
     # Call MG5
-    if initial_command is None:
-        initial_command = ""
-    else:
-        initial_command = initial_command + "; "
+    initial_command = f"{initial_command}; " if initial_command else ""
 
     # Explicitly call Python 2 if necessary
     if explicit_python_call:
-        python_call = python_executable + " " if python_executable is not None else "python2.7 "
+        python_call = f"{python_executable} "  if python_executable is not None else "python2.7 "
     else:
         python_call = ""
 
-    _ = call_command(
-        initial_command + python_call + mg_directory + "/bin/mg5_aMC " + proc_card_filename, log_file=log_file
-    )
+    command = f"{initial_command}{python_call}{mg_directory}/bin/mg5_aMC {proc_card_filename}"
+    logger.info(f"Calling MadGraph: {command}")
+
+    _ = call_command(cmd=command, log_file=log_file)
 
 
 def setup_mg_reweighting_with_scripts(
@@ -525,10 +503,11 @@ def setup_mg_reweighting_with_scripts(
     log_dir_placeholder = "$mmlogdir"
     placeholder_definition = r"mgprocdir=${1:-" + mg_process_directory + r"}" + "\n"
     placeholder_definition += r"mmlogdir=${2:-" + log_dir + r"}"
+
     if script_file_from_mgprocdir is None:
-        script_file = mg_process_directory + "/madminer/scripts/madminer_reweight_{}.sh".format(run_name)
+        script_file = f"{mg_process_directory}/madminer/scripts/madminer_reweight_{run_name}.sh"
     else:
-        script_file = mg_process_directory + "/" + script_file_from_mgprocdir
+        script_file = f"{mg_process_directory}/{script_file_from_mgprocdir}"
 
     script_filename = os.path.basename(script_file)
 
@@ -541,12 +520,9 @@ def setup_mg_reweighting_with_scripts(
 
     #  Card copying commands
     if reweight_card_file_from_mgprocdir is not None:
-        copy_commands = "cp {}/{} {}{}\n".format(
-            mg_process_directory_placeholder,
-            reweight_card_file_from_mgprocdir,
-            mg_process_directory_placeholder,
-            "/Cards/reweight_card.dat",
-        )
+        copy_commands = f"cp " \
+            f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " \
+            f"{mg_process_directory_placeholder}/Cards/reweight_card.dat\n"
     else:
         copy_commands = ""
 
@@ -570,9 +546,9 @@ def setup_mg_reweighting_with_scripts(
     make_file_executable(script_file)
 
     # How to call it from master script
-    call_instruction = "{}/{} [MG_process_directory] [log_directory]".format(
-        mg_process_directory, script_file_from_mgprocdir
-    )
+    call_instruction = \
+        f"{mg_process_directory}/{script_file_from_mgprocdir} " \
+        f"[MG_process_directory] [log_directory]"
 
     return call_instruction
 
@@ -618,19 +594,17 @@ def run_mg_reweighting(mg_process_directory, run_name, reweight_card_file=None,
         shutil.copyfile(reweight_card_file, mg_process_directory + "/Cards/reweight_card.dat")
 
     # Call MG5 reweight feature
-    if initial_command is None:
-        initial_command = ""
-    else:
-        initial_command = initial_command + "; "
+    initial_command = f"{initial_command}; " if initial_command else ""
 
     _ = call_command(
-        "{}{}/bin/madevent reweight {} -f".format(initial_command, mg_process_directory, run_name), log_file=log_file
+        cmd=f"{initial_command}{mg_process_directory}/bin/madevent reweight {run_name} -f",
+        log_file=log_file,
     )
 
 
 def copy_ufo_model(ufo_directory, mg_directory):
     _, model_name = os.path.split(ufo_directory)
-    destination = mg_directory + "/models/" + model_name
+    destination = f"{mg_directory}/models/{model_name}"
 
     if os.path.isdir(destination):
         return
@@ -648,6 +622,7 @@ def create_master_script(log_directory, master_script_filename, mg_directory, mg
         + "# Usage: run.sh [MG_directory] [MG_process_directory] [log_directory]\n\n"
         + "{}\n\n{}"
     ).format(placeholder_definition, commands)
+
     with open(master_script_filename, "w") as file:
         file.write(script)
     make_file_executable(master_script_filename)
diff --git a/madminer/utils/interfaces/mg_cards.py b/madminer/utils/interfaces/mg_cards.py
index 39bfbab18..708f655d0 100644
--- a/madminer/utils/interfaces/mg_cards.py
+++ b/madminer/utils/interfaces/mg_cards.py
@@ -49,7 +49,7 @@ def export_param_card(benchmark, parameters, param_card_template_file, mg_proces
                     continue
 
                 if lha_id == parameter_lha_id:
-                    lines[i] = "    " + str(parameter_lha_id) + "    " + str(parameter_value) + "    # MadMiner"
+                    lines[i] = f"    {parameter_lha_id}    {parameter_value}    # MadMiner"
                     changed_line = True
                     break
 
@@ -61,25 +61,18 @@ def export_param_card(benchmark, parameters, param_card_template_file, mg_proces
 
                 current_block = None
                 if lha_id == parameter_lha_id:
-                    lines[i] = (
-                        str(parameter_lha_block)
-                        + "    "
-                        + str(parameter_lha_id)
-                        + "    "
-                        + str(parameter_value)
-                        + "    # MadMiner"
-                    )
+                    lines[i] = f"{parameter_lha_block}    {parameter_lha_id}    {parameter_value}    # MadMiner"
                     changed_line = True
                     break
 
         if not changed_line:
-            raise ValueError("Could not find LHA ID {0} in param_card template!".format(parameter_lha_id))
+            raise ValueError(f"Could not find LHA ID {parameter_lha_id} in param_card template!")
 
         param_card = "\n".join(lines)
 
     # Output filename
     if param_card_filename is None:
-        param_card_filename = mg_process_directory + "/Cards/param_card.dat"
+        param_card_filename = f"{mg_process_directory}/Cards/param_card.dat"
 
     # Save param_card.dat
     with open(param_card_filename, "w") as file:
@@ -114,7 +107,7 @@ def export_reweight_card(sample_benchmark, benchmarks, parameters, mg_process_di
                 variables = {"theta": parameter_value}
                 parameter_value = eval(parameter_transform, variables)
 
-            lines.append("  set {0} {1} {2}".format(parameter_lha_block, parameter_lha_id, parameter_value))
+            lines.append(f"  set {parameter_lha_block} {parameter_lha_id} {parameter_value}")
 
         lines.append("")
 
@@ -122,7 +115,7 @@ def export_reweight_card(sample_benchmark, benchmarks, parameters, mg_process_di
 
     # Output filename
     if reweight_card_filename is None:
-        reweight_card_filename = mg_process_directory + "/Cards/reweight_card.dat"
+        reweight_card_filename = f"{mg_process_directory}/Cards/reweight_card.dat"
 
     # Save param_card.dat
     with open(reweight_card_filename, "w") as file:
@@ -182,7 +175,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde
         line_key = elements[-1].strip()
 
         if line_key in entries_to_comment_out:
-            run_card_lines[i] = "# {} # Commented out by MadMiner".format(line)
+            run_card_lines[i] = f"# {line} # Commented out by MadMiner"
             continue
 
     # Add new entries - sytematics
@@ -192,7 +185,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde
         run_card_lines.append("# MadMiner systematics setup                                         *")
         run_card_lines.append("#*********************************************************************")
         for key, value in six.iteritems(settings):
-            run_card_lines.append("{} = {}".format(value, key))
+            run_card_lines.append(f"{value} = {key}")
         run_card_lines.append("")
 
     # Write new run card
@@ -217,31 +210,31 @@ def create_systematics_arguments(systematics):
         if value[0] == "scale" and value[1] == "mu":
             if mur_done or muf_done:
                 raise ValueError("Multiple nuisance parameter for scale variation!")
-            systematics_arguments.append("'--mur={}'".format(value[2]))
-            systematics_arguments.append("'--muf={}'".format(value[2]))
-            systematics_arguments.append("'--together=mur,muf'")
-            systematics_arguments.append("'--dyn=-1'")
+            systematics_arguments.append(f"'--mur={value[2]}'")
+            systematics_arguments.append(f"'--muf={value[2]}'")
+            systematics_arguments.append(f"'--together=mur,muf'")
+            systematics_arguments.append(f"'--dyn=-1'")
             mur_done = True
             muf_done = True
         elif value[0] == "scale" and value[1] == "mur":
             if mur_done:
                 raise ValueError("Multiple nuisance parameter for mur variation!")
-            systematics_arguments.append("'--mur={}'".format(value[2]))
-            systematics_arguments.append("'--dyn=-1'")
+            systematics_arguments.append(f"'--mur={value[2]}'")
+            systematics_arguments.append(f"'--dyn=-1'")
             mur_done = True
         elif value[0] == "scale" and value[1] == "muf":
             if muf_done:
                 raise ValueError("Multiple nuisance parameter for muf variation!")
-            systematics_arguments.append("'--muf={}'".format(value[2]))
-            systematics_arguments.append("'--dyn=-1'")
+            systematics_arguments.append(f"'--muf={value[2]}'")
+            systematics_arguments.append(f"'--dyn=-1'")
             muf_done = True
         elif value[0] == "pdf":
             if pdf_done:
                 raise ValueError("Multiple nuisance parameter for PDF variation!")
-            systematics_arguments.append("'--pdf={}'".format(value[1]))
+            systematics_arguments.append(f"'--pdf={value[1]}'")
             pdf_done = True
 
     if len(systematics_arguments) > 0:
-        return "[" + ", ".join(systematics_arguments) + "]"
+        return f"[{', '.join(systematics_arguments)}]"
 
     return ""

From 0f157a4b2598a5fa0735952944a30ae08d916df3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 13:45:07 -0400
Subject: [PATCH 49/65] src: utils/interfaces module improve fmt

---
 madminer/utils/interfaces/delphes_root.py  |  5 +--
 madminer/utils/interfaces/lhe.py           | 38 +++++++++++++++++++---
 madminer/utils/interfaces/madminer_hdf5.py | 28 +++++++++++++++-
 madminer/utils/interfaces/mg_cards.py      |  2 +-
 4 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py
index ec5083985..6e6d0b85e 100644
--- a/madminer/utils/interfaces/delphes_root.py
+++ b/madminer/utils/interfaces/delphes_root.py
@@ -42,15 +42,12 @@ def parse_delphes_root_file(
         logger.debug("Extracting weights %s", weight_labels)
 
     # Delphes ROOT file
-    root_file = uproot.open(str(delphes_sample_file))
-    # The str() call is important when using numpy 1.16.0 and Python 2.7. In this combination of versions, a unicode
-    # delphes_sample_file would lead to a crash.
+    root_file = uproot.open(delphes_sample_file)
 
     # Delphes tree
     tree = root_file["Delphes"]
 
     # Weights
-    n_weights = 0
     weights = None
     if weight_labels is not None:
         try:
diff --git a/madminer/utils/interfaces/lhe.py b/madminer/utils/interfaces/lhe.py
index c4733f584..d76874617 100644
--- a/madminer/utils/interfaces/lhe.py
+++ b/madminer/utils/interfaces/lhe.py
@@ -70,7 +70,7 @@ def parse_lhe_file(
     if efficiencies_default_pass is None:
         efficiencies_default_pass = {key: 1.0 for key in six.iterkeys(efficiencies)}
 
-    # Untar and open LHE file
+    # Unzip and open LHE file
     run_card = None
     for elem in _untar_and_parse_lhe_file(filename):
         if elem.tag == "MGRunCard":
@@ -305,11 +305,14 @@ def _report_parse_results(
         logger.info("  %s / %s events pass efficiency %s", n_pass, n_pass + n_fail, efficiency)
     for n_eff, efficiency, n_pass, n_fail in zip(avg_efficiencies, efficiencies, pass_efficiencies, fail_efficiencies):
         logger.info("  average efficiency for %s is %s", efficiency, n_eff / (n_pass + n_fail))
+
     n_events_pass = len(observations_all_events)
+
     if len(cuts) > 0:
         logger.info("  %s events pass all cuts/efficiencies", n_events_pass)
     if n_events_with_negative_weights > 0:
         logger.warning("  %s events contain negative weights", n_events_with_negative_weights)
+
     return n_events_pass
 
 
@@ -365,14 +368,26 @@ def _parse_event(
     pass_all_cuts = True
     if pass_all_observation:
         pass_all_cuts = _parse_cuts(
-            cuts, cuts_default_pass, fail_cuts, observables, observations, pass_all_cuts, pass_cuts, variables
+            cuts,
+            cuts_default_pass,
+            fail_cuts,
+            observables,
+            observations,
+            pass_all_cuts,
+            pass_cuts,
+            variables,
         )
 
     # Efficiencies
     pass_all_efficiencies = True
     if pass_all_observation and pass_all_cuts:
         pass_all_efficiencies, total_efficiency = _parse_efficiencies(
-            avg_efficiencies, efficiencies, efficiencies_default_pass, fail_efficiencies, pass_efficiencies, variables
+            avg_efficiencies,
+            efficiencies,
+            efficiencies_default_pass,
+            fail_efficiencies,
+            pass_efficiencies,
+            variables,
         )
 
         if pass_all_efficiencies:
@@ -395,18 +410,21 @@ def _parse_event(
 
 def _report_negative_weights(n_events_with_negative_weights, weights):
     n_negative_weights = np.sum(np.array(list(weights.values())) < 0.0)
+
     if n_negative_weights > 0:
         n_events_with_negative_weights += 1
         if n_events_with_negative_weights <= 3:
             logger.warning("Found %s negative weights in event. Weights: %s", n_negative_weights, weights)
         if n_events_with_negative_weights == 3:
             logger.warning("Skipping warnings about negative weights from now on...")
+
     return n_events_with_negative_weights
 
 
 def _parse_observations(observables, observables_defaults, observables_required, variables):
     observations = []
     pass_all_observation = True
+
     for obs_name, obs_definition in six.iteritems(observables):
         if isinstance(obs_definition, six.string_types):
             try:
@@ -434,15 +452,22 @@ def _parse_observations(observables, observables_defaults, observables_required,
                 if default is None:
                     default = np.nan
                 observations.append(default)
+
     return observations, pass_all_observation
 
 
 def _parse_efficiencies(
-    avg_efficiencies, efficiencies, efficiencies_default_pass, fail_efficiencies, pass_efficiencies, variables
+    avg_efficiencies,
+    efficiencies,
+    efficiencies_default_pass,
+    fail_efficiencies,
+    pass_efficiencies,
+    variables,
 ):
     # Apply efficiencies
     total_efficiency = 1.0
     pass_all_efficiencies = True
+
     for i_efficiency, (efficiency, default_pass) in enumerate(zip(efficiencies, efficiencies_default_pass)):
         try:
             efficiency_result = eval(efficiency, variables)
@@ -462,6 +487,7 @@ def _parse_efficiencies(
             else:
                 fail_efficiencies[i_efficiency] += 1
                 pass_all_efficiencies = False
+
     return pass_all_efficiencies, total_efficiency
 
 
@@ -469,6 +495,7 @@ def _parse_cuts(cuts, cuts_default_pass, fail_cuts, observables, observations, p
     # Objects for cuts
     for obs_name, obs_value in zip(observables.keys(), observations):
         variables[obs_name] = obs_value
+
     # Check cuts
     for i_cut, (cut, default_pass) in enumerate(zip(cuts, cuts_default_pass)):
         try:
@@ -485,6 +512,7 @@ def _parse_cuts(cuts, cuts_default_pass, fail_cuts, observables, observations, p
             else:
                 fail_cuts[i_cut] += 1
                 pass_all_cuts = False
+
     return pass_all_cuts
 
 
@@ -504,7 +532,7 @@ def extract_nuisance_parameters_from_lhe_file(filename, systematics):
     # Parse scale factors from strings in systematics
     logger.debug("Systematics setup: %s", systematics)
 
-    # Untar and parse LHE file
+    # Unzip and parse LHE file
     initrwgts = _untar_and_parse_lhe_file(filename, ["initrwgt"])
 
     # Find weight groups
diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py
index 35f8ac32b..0c3ef778b 100644
--- a/madminer/utils/interfaces/madminer_hdf5.py
+++ b/madminer/utils/interfaces/madminer_hdf5.py
@@ -58,7 +58,12 @@ def save_nuisance_setup_to_madminer_file(
 
 
 def save_preformatted_events_to_madminer_file(
-    filename, observations, weights, sampling_benchmarks, copy_setup_from, overwrite_existing_samples=True
+    filename,
+    observations,
+    weights,
+    sampling_benchmarks,
+    copy_setup_from,
+    overwrite_existing_samples=True,
 ):
     _copy_madminer_file(copy_setup_from, filename, overwrite_existing_samples)
     _save_events(
@@ -269,6 +274,7 @@ def madminer_event_loader(
 
 def _save_parameters(filename, overwrite_existing_files, parameters):
     io_tag = "w" if overwrite_existing_files else "x"
+
     with h5py.File(filename, io_tag) as f:
         # Prepare parameters
         parameter_names = [pname for pname in parameters]
@@ -293,11 +299,13 @@ def _save_parameters(filename, overwrite_existing_files, parameters):
         f.create_dataset("parameters/max_power", data=parameter_max_power)
         f.create_dataset("parameters/ranges", data=parameter_ranges)
         f.create_dataset("parameters/transforms", (n_parameters,), dtype="S256", data=parameter_transforms)
+
     return parameter_names
 
 
 def _save_benchmarks(benchmarks, benchmarks_is_nuisance, filename, parameter_names):
     io_tag = "a"  # Read-write if file exists, otherwise create
+
     with h5py.File(filename, io_tag) as f:
         # Prepare benchmarks
         benchmark_names = [bname for bname in benchmarks]
@@ -320,6 +328,7 @@ def _save_benchmarks(benchmarks, benchmarks_is_nuisance, filename, parameter_nam
 
 def _save_benchmarks2(benchmark_is_nuisance, benchmark_names, benchmark_values, filename, reference_benchmark):
     io_tag = "a"  # Read-write if file exists, otherwise create
+
     with h5py.File(filename, io_tag) as f:
         # Prepare benchmarks for saving
         n_benchmarks = len(benchmark_names)
@@ -367,6 +376,7 @@ def _save_finite_differences(filename, finite_difference_benchmarks, finite_diff
 
 def _save_morphing(filename, morphing_components, morphing_matrix):
     io_tag = "a"  # Read-write if file exists, otherwise create
+
     with h5py.File(filename, io_tag) as f:
         # Store morphing info
         if morphing_components is not None:
@@ -377,6 +387,7 @@ def _save_morphing(filename, morphing_components, morphing_matrix):
 
 def _save_systematics(filename, systematics):
     io_tag = "a"
+
     with h5py.File(filename, io_tag) as f:
         # Prepare and store systematics setup
         if systematics is not None and len(systematics) > 0:
@@ -393,6 +404,7 @@ def _save_systematics(filename, systematics):
 
 def _save_nuisance_parameters(filename, nuisance_parameters, overwrite_existing_nuisance_parameters):
     io_tag = "a"  # Read-write if file exists, otherwise create
+
     with h5py.File(filename, io_tag) as f:
         # Make space for nuisance params
         if overwrite_existing_nuisance_parameters:
@@ -441,6 +453,7 @@ def _save_nuisance_parameters(filename, nuisance_parameters, overwrite_existing_
 
 def _save_n_events(filename, n_events_background, n_events_per_sampling_benchmark, overwrite_existing_samples):
     io_tag = "a"  # Read-write if file exists, otherwise create
+
     with h5py.File(filename, io_tag) as f:
         # Check if groups exist already
         if overwrite_existing_samples:
@@ -493,6 +506,7 @@ def _save_events(
 
 def _save_observables(filename, observables, overwrite_existing_samples):
     io_tag = "a"  # Read-write if file exists, otherwise create
+
     if observables is None:
         return None
 
@@ -524,6 +538,7 @@ def _save_observables(filename, observables, overwrite_existing_samples):
 
 
 def _load_parameters(filename):
+
     with h5py.File(filename, "r") as f:
         # Parameters
         try:
@@ -556,10 +571,12 @@ def _load_parameters(filename):
 
         except KeyError:
             raise IOError("Cannot read parameters from HDF5 file")
+
     return parameter_names, parameters
 
 
 def _load_benchmarks(filename, include_nuisance_benchmarks, parameter_names, return_dict=True):
+
     with h5py.File(filename, "r") as f:
         # Benchmarks
         try:
@@ -620,6 +637,7 @@ def _load_finite_differences(filename, parameter_names):
 
 
 def _load_n_samples(filename):
+
     with h5py.File(filename, "r") as f:
         # Number of samples
         try:
@@ -645,6 +663,7 @@ def _load_n_samples(filename):
 
 
 def _load_morphing(filename):
+
     with h5py.File(filename, "r") as f:
         # Morphing
         try:
@@ -658,6 +677,7 @@ def _load_morphing(filename):
 
 
 def _load_nuisance_params(filename):
+
     with h5py.File(filename, "r") as f:
         # Nuisance parameters
         try:
@@ -687,10 +707,12 @@ def _load_nuisance_params(filename):
 
         except KeyError:
             nuisance_parameters = None
+
     return nuisance_parameters
 
 
 def _load_systematics(filename):
+
     with h5py.File(filename, "r") as f:
         # Systematics setup
         try:
@@ -716,10 +738,12 @@ def _load_systematics(filename):
 
         except KeyError:
             systematics = OrderedDict()
+
     return systematics
 
 
 def _load_observables(filename):
+
     with h5py.File(filename, "r") as f:
         # Observables
         try:
@@ -734,6 +758,7 @@ def _load_observables(filename):
                 observables[oname] = odef
         except KeyError:
             observables = None
+
     return observables
 
 
@@ -771,6 +796,7 @@ def _sort_weights(benchmark_names, weights):
     except Exception as e:
         logger.warning("Issue matching weight names in HepMC file to benchmark names in MadMiner file:\n%s", e)
         weights_sorted = [weights[key] for key in weights]
+
     return weights_sorted
 
 
diff --git a/madminer/utils/interfaces/mg_cards.py b/madminer/utils/interfaces/mg_cards.py
index 708f655d0..efbf58b37 100644
--- a/madminer/utils/interfaces/mg_cards.py
+++ b/madminer/utils/interfaces/mg_cards.py
@@ -178,7 +178,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde
             run_card_lines[i] = f"# {line} # Commented out by MadMiner"
             continue
 
-    # Add new entries - sytematics
+    # Add new entries - systematics
     if order == "LO":
         run_card_lines.append("")
         run_card_lines.append("#*********************************************************************")

From 73cf4fde14e1eb1826f4d5fa48579a7c5bb420fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Wed, 23 Sep 2020 13:58:24 -0400
Subject: [PATCH 50/65] src: utils/interfaces module remove 'six'

---
 madminer/utils/interfaces/delphes.py       |  2 --
 madminer/utils/interfaces/delphes_root.py  | 15 ++++-----
 madminer/utils/interfaces/hepmc.py         |  6 ++--
 madminer/utils/interfaces/lhe.py           | 38 ++++++++++------------
 madminer/utils/interfaces/madminer_hdf5.py | 10 +++---
 madminer/utils/interfaces/mg.py            |  4 +--
 madminer/utils/interfaces/mg_cards.py      | 15 ++++-----
 7 files changed, 36 insertions(+), 54 deletions(-)

diff --git a/madminer/utils/interfaces/delphes.py b/madminer/utils/interfaces/delphes.py
index aeacc7d15..81f062d83 100644
--- a/madminer/utils/interfaces/delphes.py
+++ b/madminer/utils/interfaces/delphes.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import os
 import logging
 
diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py
index 6e6d0b85e..ba6b5cca7 100644
--- a/madminer/utils/interfaces/delphes_root.py
+++ b/madminer/utils/interfaces/delphes_root.py
@@ -1,11 +1,8 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-import six
-
+import logging
 import numpy as np
-from collections import OrderedDict
-import uproot
 import os
-import logging
+import uproot
+from collections import OrderedDict
 
 from madminer.utils.particle import MadMinerParticle
 from madminer.utils.various import math_commands
@@ -124,14 +121,14 @@ def get_objects(ievent):
     # Observations
     observable_values = OrderedDict()
 
-    for obs_name, obs_definition in six.iteritems(observables):
+    for obs_name, obs_definition in observables.items():
         values_this_observable = []
 
         # Loop over events
         for event in range(n_events):
             variables = get_objects(event)
 
-            if isinstance(obs_definition, six.string_types):
+            if isinstance(obs_definition, str):
                 try:
                     values_this_observable.append(eval(obs_definition, variables))
                 except (SyntaxError, NameError, TypeError, ZeroDivisionError, IndexError):
@@ -184,7 +181,7 @@ def get_objects(ievent):
     # Check for existence of required observables
     combined_filter = None
 
-    for obs_name, obs_required in six.iteritems(observables_required):
+    for obs_name, obs_required in observables_required.items():
         if obs_required:
             this_filter = np.isfinite(observable_values[obs_name])
             n_pass = np.sum(this_filter)
diff --git a/madminer/utils/interfaces/hepmc.py b/madminer/utils/interfaces/hepmc.py
index 0a52c9611..0fdc0314f 100644
--- a/madminer/utils/interfaces/hepmc.py
+++ b/madminer/utils/interfaces/hepmc.py
@@ -1,8 +1,6 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import os
-from io import open
 import logging
+import os
+
 from madminer.utils.various import unzip_file
 
 logger = logging.getLogger(__name__)
diff --git a/madminer/utils/interfaces/lhe.py b/madminer/utils/interfaces/lhe.py
index d76874617..f0669464d 100644
--- a/madminer/utils/interfaces/lhe.py
+++ b/madminer/utils/interfaces/lhe.py
@@ -1,22 +1,17 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
+import logging
 import numpy as np
-from collections import OrderedDict
 import os
-import logging
+from collections import OrderedDict
 
 try:
     import xml.etree.cElementTree as ET
-
     use_celementtree = True
 except ImportError:
     import xml.etree.ElementTree as ET
-
     use_celementtree = False
 
-from madminer.utils.various import unzip_file, approx_equal, math_commands
 from madminer.utils.particle import MadMinerParticle
+from madminer.utils.various import unzip_file, approx_equal, math_commands
 
 logger = logging.getLogger(__name__)
 
@@ -56,19 +51,19 @@ def parse_lhe_file(
     if k_factor is None:
         k_factor = 1.0
     if observables_required is None:
-        observables_required = {key: False for key in six.iterkeys(observables)}
+        observables_required = {key: False for key in observables.keys()}
     if observables_defaults is None:
-        observables_defaults = {key: None for key in six.iterkeys(observables)}
+        observables_defaults = {key: None for key in observables.keys()}
     if is_background and benchmark_names is None:
         raise RuntimeError("Parsing background LHE files required benchmark names to be provided.")
     if cuts is None:
         cuts = OrderedDict()
     if cuts_default_pass is None:
-        cuts_default_pass = {key: False for key in six.iterkeys(cuts)}
+        cuts_default_pass = {key: False for key in cuts.keys()}
     if efficiencies is None:
         efficiencies = OrderedDict()
     if efficiencies_default_pass is None:
-        efficiencies_default_pass = {key: 1.0 for key in six.iterkeys(efficiencies)}
+        efficiencies_default_pass = {key: 1.0 for key in efficiencies.keys()}
 
     # Unzip and open LHE file
     run_card = None
@@ -260,11 +255,12 @@ def parse_lhe_file(
             output_weights[benchmark_name] = weights_all_events[sampling_benchmark]
         else:
             output_weights[benchmark_name] = weights_all_events[benchmark_name]
-    for syst_name, syst_data in six.iteritems(systematics_dict):
+
+    for syst_name, syst_data in systematics_dict.items():
         for (
             nuisance_param_name,
             ((nuisance_benchmark0, weight_name0), (nuisance_benchmark1, weight_name1), processing),
-        ) in six.iteritems(syst_data):
+        ) in syst_data.items():
             # Store first benchmark associated with nuisance param
             if weight_name0 is None:
                 weight_name0 = sampling_benchmark
@@ -425,8 +421,8 @@ def _parse_observations(observables, observables_defaults, observables_required,
     observations = []
     pass_all_observation = True
 
-    for obs_name, obs_definition in six.iteritems(observables):
-        if isinstance(obs_definition, six.string_types):
+    for obs_name, obs_definition in observables.items():
+        if isinstance(obs_definition, str):
             try:
                 observations.append(eval(obs_definition, variables))
             except (SyntaxError, NameError, TypeError, ZeroDivisionError, IndexError):
@@ -547,7 +543,7 @@ def extract_nuisance_parameters_from_lhe_file(filename, systematics):
     logger.debug("%s weight groups", len(weight_groups))
 
     # Loop over systematics
-    for syst_name, syst_value in six.iteritems(systematics):
+    for syst_name, syst_value in systematics.items():
         nuisance_param_dict = _extract_nuisance_param_dict(weight_groups, syst_name, syst_value)
         systematics_dict[syst_name] = nuisance_param_dict
 
@@ -1023,10 +1019,10 @@ def _smear_particles(particles, energy_resolutions, pt_resolutions, eta_resoluti
         pdgid = particle.pdgid
 
         if (
-            pdgid not in six.iterkeys(energy_resolutions)
-            or pdgid not in six.iterkeys(pt_resolutions)
-            or pdgid not in six.iterkeys(eta_resolutions)
-            or pdgid not in six.iterkeys(phi_resolutions)
+            pdgid not in energy_resolutions.keys()
+            or pdgid not in pt_resolutions.keys()
+            or pdgid not in eta_resolutions.keys()
+            or pdgid not in phi_resolutions.keys()
         ):
             continue
 
diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py
index 0c3ef778b..cf721799d 100644
--- a/madminer/utils/interfaces/madminer_hdf5.py
+++ b/madminer/utils/interfaces/madminer_hdf5.py
@@ -1,11 +1,9 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
-import shutil
 import h5py
+import logging
 import numpy as np
+import shutil
 from collections import OrderedDict
-import logging
+
 
 logger = logging.getLogger(__name__)
 
@@ -525,7 +523,7 @@ def _save_observables(filename, observables, overwrite_existing_samples):
         observable_definitions = []
         for key in observable_names:
             definition = observables[key]
-            if isinstance(definition, six.string_types):
+            if isinstance(definition, str):
                 observable_definitions.append(definition.encode("ascii", "ignore"))
             else:
                 observable_definitions.append("".encode("ascii", "ignore"))
diff --git a/madminer/utils/interfaces/mg.py b/madminer/utils/interfaces/mg.py
index b0489ac43..0f436c897 100644
--- a/madminer/utils/interfaces/mg.py
+++ b/madminer/utils/interfaces/mg.py
@@ -1,8 +1,6 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
+import logging
 import os
 import shutil
-import logging
 
 from madminer.utils.various import call_command, make_file_executable, create_missing_folders
 
diff --git a/madminer/utils/interfaces/mg_cards.py b/madminer/utils/interfaces/mg_cards.py
index efbf58b37..25fdd0225 100644
--- a/madminer/utils/interfaces/mg_cards.py
+++ b/madminer/utils/interfaces/mg_cards.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
 import logging
 from collections import OrderedDict
 
@@ -14,7 +11,7 @@ def export_param_card(benchmark, parameters, param_card_template_file, mg_proces
     lines = param_card.splitlines()
 
     # Replace parameter values
-    for parameter_name, parameter_value in six.iteritems(benchmark):
+    for parameter_name, parameter_value in benchmark.items():
         parameter_lha_block = parameters[parameter_name][0]
         parameter_lha_id = parameters[parameter_name][1]
 
@@ -89,7 +86,7 @@ def export_reweight_card(sample_benchmark, benchmarks, parameters, mg_process_di
         "change helicity False",
     ]
 
-    for benchmark_name, benchmark in six.iteritems(benchmarks):
+    for benchmark_name, benchmark in benchmarks.items():
         if benchmark_name == sample_benchmark:
             continue
 
@@ -97,7 +94,7 @@ def export_reweight_card(sample_benchmark, benchmarks, parameters, mg_process_di
         lines.append("# MadMiner benchmark " + benchmark_name)
         lines.append("launch --rwgt_name=" + benchmark_name)
 
-        for parameter_name, parameter_value in six.iteritems(benchmark):
+        for parameter_name, parameter_value in benchmark.items():
             parameter_lha_block = parameters[parameter_name][0]
             parameter_lha_id = parameters[parameter_name][1]
 
@@ -131,7 +128,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde
 
     # Do we actually have to run MadGraph's systematics feature?
     run_systematics = False
-    for value in six.itervalues(systematics):
+    for value in systematics.values():
         if value[0] in ["pdf", "scale"]:
             run_systematics = True
 
@@ -184,7 +181,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde
         run_card_lines.append("#*********************************************************************")
         run_card_lines.append("# MadMiner systematics setup                                         *")
         run_card_lines.append("#*********************************************************************")
-        for key, value in six.iteritems(settings):
+        for key, value in settings.items():
             run_card_lines.append(f"{value} = {key}")
         run_card_lines.append("")
 
@@ -206,7 +203,7 @@ def create_systematics_arguments(systematics):
     muf_done = False
     pdf_done = False
 
-    for value in six.itervalues(systematics):
+    for value in systematics.values():
         if value[0] == "scale" and value[1] == "mu":
             if mur_done or muf_done:
                 raise ValueError("Multiple nuisance parameter for scale variation!")

From cb7cab240c612d858a2e364d48e952b0cf04e7ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 24 Sep 2020 10:55:50 -0400
Subject: [PATCH 51/65] src: utils/ml module f-strings

---
 madminer/utils/ml/trainer.py | 18 ++++++++----------
 madminer/utils/ml/utils.py   |  4 ++--
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/madminer/utils/ml/trainer.py b/madminer/utils/ml/trainer.py
index 2dd94ee0b..f07c444e5 100644
--- a/madminer/utils/ml/trainer.py
+++ b/madminer/utils/ml/trainer.py
@@ -216,7 +216,7 @@ def make_dataloaders(self, dataset, dataset_val, validation_split, batch_size):
             )
 
         else:
-            assert 0.0 < validation_split < 1.0, "Wrong validation split: {}".format(validation_split)
+            assert 0.0 < validation_split < 1.0, f"Wrong validation split: {validation_split}"
 
             n_samples = len(dataset)
             indices = list(range(n_samples))
@@ -402,7 +402,7 @@ def check_early_stopping(self, best_loss, best_model, best_epoch, loss, i_epoch,
     @staticmethod
     def report_batch(i_epoch, i_batch, loss_train):
         if i_batch in [0, 1, 10, 100, 1000]:
-            logger.debug("  Epoch {:>3d}, batch {:>3d}: loss {:>8.5f}".format(i_epoch + 1, i_batch + 1, loss_train))
+            logger.debug(f"  Epoch {(i_epoch+1):>3d}, batch {(i_batch+1):>3d}: loss {loss_train:>8.5f}")
 
     @staticmethod
     def report_epoch(
@@ -415,18 +415,16 @@ def contribution_summary(labels, contributions):
             for i, (label, value) in enumerate(zip(labels, contributions)):
                 if i > 0:
                     summary += ", "
-                summary += "{}: {:>6.3f}".format(label, value)
+                summary += f"{label}: {value:>6.3f}"
             return summary
 
-        train_report = "  Epoch {:>3d}: train loss {:>8.5f} ({})".format(
-            i_epoch + 1, loss_train, contribution_summary(loss_labels, loss_contributions_train)
-        )
+        summary = contribution_summary(loss_labels, loss_contributions_train)
+        train_report = f"  Epoch {(i_epoch+1):>3d}: train loss {loss_train:>8.5f} ({summary})"
         logging_fn(train_report)
 
         if loss_val is not None:
-            val_report = "             val. loss  {:>8.5f} ({})".format(
-                loss_val, contribution_summary(loss_labels, loss_contributions_val)
-            )
+            summary = contribution_summary(loss_labels, loss_contributions_train)
+            val_report = f"             val. loss  {loss_val:>8.5f} ({summary})"
             logging_fn(val_report)
 
     def wrap_up_early_stopping(self, best_model, currrent_loss, best_loss, best_epoch):
@@ -476,7 +474,7 @@ def _timer(self, start=None, stop=None):
     def _report_timer(self):
         logger.info("Training time spend on:")
         for key, value in six.iteritems(self.timer):
-            logger.info("  {:>32s}: {:6.2f}h".format(key, value / 3600.0))
+            logger.info(f"  {key:>32s}: {(value/3600.0):6.2f}h")
 
 
 class SingleParameterizedRatioTrainer(Trainer):
diff --git a/madminer/utils/ml/utils.py b/madminer/utils/ml/utils.py
index 2cf223f33..851a7997d 100644
--- a/madminer/utils/ml/utils.py
+++ b/madminer/utils/ml/utils.py
@@ -130,7 +130,7 @@ def get_optimizer(optimizer, nesterov_momentum):
         if nesterov_momentum is not None:
             opt_kwargs = {"momentum": nesterov_momentum}
     else:
-        raise ValueError("Unknown optimizer {}".format(optimizer))
+        raise ValueError(f"Unknown optimizer {optimizer}")
     return opt, opt_kwargs
 
 
@@ -184,7 +184,7 @@ def get_loss(method, alpha):
         loss_weights = [1.0, alpha]
         loss_labels = ["nll", "mse_score"]
     else:
-        raise NotImplementedError("Unknown method {}".format(method))
+        raise NotImplementedError("Unknown method {method}")
     return loss_functions, loss_labels, loss_weights
 
 

From 34bb65d0f6a37c29a1b520a3e63c366294946228 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 24 Sep 2020 11:04:35 -0400
Subject: [PATCH 52/65] src: utils/ml module improve fmt

---
 madminer/utils/ml/eval.py    | 27 ++++++++++++++++---
 madminer/utils/ml/trainer.py | 52 +++++++++++++++++++++++++++++++-----
 madminer/utils/ml/utils.py   | 16 ++++++-----
 3 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/madminer/utils/ml/eval.py b/madminer/utils/ml/eval.py
index 89d267e6d..3c013d667 100644
--- a/madminer/utils/ml/eval.py
+++ b/madminer/utils/ml/eval.py
@@ -114,20 +114,39 @@ def evaluate_ratio_model(
         if method_type == "parameterized_ratio":
             if return_grad_x:
                 s_hat, log_r_hat, t_hat0, x_gradients = model(
-                    theta0s, xs, return_grad_x=True, track_score=evaluate_score, create_gradient_graph=False
+                    theta0s,
+                    xs,
+                    return_grad_x=True,
+                    track_score=evaluate_score,
+                    create_gradient_graph=False,
                 )
             else:
-                s_hat, log_r_hat, t_hat0 = model(theta0s, xs, track_score=evaluate_score, create_gradient_graph=False)
+                s_hat, log_r_hat, t_hat0 = model(
+                    theta0s,
+                    xs,
+                    track_score=evaluate_score,
+                    create_gradient_graph=False,
+                )
                 x_gradients = None
             t_hat1 = None
+
         elif method_type == "double_parameterized_ratio":
             if return_grad_x:
                 s_hat, log_r_hat, t_hat0, t_hat1, x_gradients = model(
-                    theta0s, theta1s, xs, return_grad_x=True, track_score=evaluate_score, create_gradient_graph=False
+                    theta0s,
+                    theta1s,
+                    xs,
+                    return_grad_x=True,
+                    track_score=evaluate_score,
+                    create_gradient_graph=False,
                 )
             else:
                 s_hat, log_r_hat, t_hat0, t_hat1 = model(
-                    theta0s, theta1s, xs, track_score=evaluate_score, create_gradient_graph=False
+                    theta0s,
+                    theta1s,
+                    xs,
+                    track_score=evaluate_score,
+                    create_gradient_graph=False,
                 )
                 x_gradients = None
         else:
diff --git a/madminer/utils/ml/trainer.py b/madminer/utils/ml/trainer.py
index f07c444e5..542aa34bc 100644
--- a/madminer/utils/ml/trainer.py
+++ b/madminer/utils/ml/trainer.py
@@ -68,12 +68,15 @@ def train(
             logger.debug("Found external validation data set")
             self.check_data(data_val)
             self.report_data(data_val)
+
         self._timer(stop="check data", start="make dataset")
         data_labels, dataset = self.make_dataset(data)
+
         if data_val is not None:
             _, dataset_val = self.make_dataset(data_val)
         else:
             dataset_val = None
+
         self._timer(stop="make dataset", start="make dataloader")
         train_loader, val_loader = self.make_dataloaders(dataset, dataset_val, validation_split, batch_size)
 
@@ -84,6 +87,7 @@ def train(
 
         early_stopping = early_stopping and (validation_split is not None) and (epochs > 1)
         best_loss, best_model, best_epoch = None, None, None
+
         if early_stopping and early_stopping_patience is None:
             logger.debug("Using early stopping with infinite patience")
         elif early_stopping:
@@ -194,25 +198,39 @@ def check_data(data):
     def make_dataset(self, data):
         data_arrays = []
         data_labels = []
+
         for key, value in six.iteritems(data):
             data_labels.append(key)
             data_arrays.append(value)
         dataset = NumpyDataset(*data_arrays, dtype=self.dtype)
+
         return data_labels, dataset
 
     def make_dataloaders(self, dataset, dataset_val, validation_split, batch_size):
         if dataset_val is None and (validation_split is None or validation_split <= 0.0):
             train_loader = DataLoader(
-                dataset, batch_size=batch_size, shuffle=True, pin_memory=self.run_on_gpu, num_workers=self.n_workers
+                dataset,
+                batch_size=batch_size,
+                shuffle=True,
+                pin_memory=self.run_on_gpu,
+                num_workers=self.n_workers,
             )
             val_loader = None
 
         elif dataset_val is not None:
             train_loader = DataLoader(
-                dataset, batch_size=batch_size, shuffle=True, pin_memory=self.run_on_gpu, num_workers=self.n_workers
+                dataset,
+                batch_size=batch_size,
+                shuffle=True,
+                pin_memory=self.run_on_gpu,
+                num_workers=self.n_workers,
             )
             val_loader = DataLoader(
-                dataset_val, batch_size=batch_size, shuffle=True, pin_memory=self.run_on_gpu, num_workers=self.n_workers
+                dataset_val,
+                batch_size=batch_size,
+                shuffle=True,
+                pin_memory=self.run_on_gpu,
+                num_workers=self.n_workers,
             )
 
         else:
@@ -344,6 +362,7 @@ def batch_val(self, batch_data, loss_functions, loss_weights):
         loss = loss.item()
         loss_contributions = [contrib.item() for contrib in loss_contributions]
         self._timer(stop="validation sum losses")
+
         return loss, loss_contributions
 
     def forward_pass(self, batch_data, loss_functions):
@@ -364,13 +383,16 @@ def forward_pass(self, batch_data, loss_functions):
             Losses as scalar pyTorch tensors.
 
         """
-        raise NotImplementedError
+
+        raise NotImplementedError()
 
     @staticmethod
     def sum_losses(contributions, weights):
         loss = weights[0] * contributions[0]
+
         for _w, _l in zip(weights[1:], contributions[1:]):
             loss = loss + _w * _l
+
         return loss
 
     def optimizer_step(self, optimizer, loss, clip_gradient):
@@ -379,8 +401,10 @@ def optimizer_step(self, optimizer, loss, clip_gradient):
         self._timer(stop="opt: zero grad", start="opt: backward")
         loss.backward()
         self._timer(start="opt: clip grad norm", stop="opt: backward")
+
         if clip_gradient is not None:
             clip_grad_norm_(self.model.parameters(), clip_gradient)
+
         self._timer(stop="opt: clip grad norm", start="opt: step")
         optimizer.step()
         self._timer(stop="opt: step")
@@ -406,7 +430,13 @@ def report_batch(i_epoch, i_batch, loss_train):
 
     @staticmethod
     def report_epoch(
-        i_epoch, loss_labels, loss_train, loss_val, loss_contributions_train, loss_contributions_val, verbose=False
+        i_epoch,
+        loss_labels,
+        loss_train,
+        loss_val,
+        loss_contributions_train,
+        loss_contributions_val,
+        verbose=False,
     ):
         logging_fn = logger.info if verbose else logger.debug
 
@@ -488,7 +518,7 @@ def check_data(self, data):
             raise ValueError("Missing required information 'x', 'theta', or 'y' in training data!")
 
         for key in data_keys:
-            if key not in ["x", "theta", "y", "r_xz", "t_xz"]:
+            if key not in {"x", "theta", "y", "r_xz", "t_xz"}:
                 logger.warning("Unknown key %s in training data! Ignoring it.", key)
 
         self.calculate_model_score = "t_xz" in data_keys
@@ -502,6 +532,7 @@ def forward_pass(self, batch_data, loss_functions):
         theta = batch_data["theta"].to(self.device, self.dtype, non_blocking=True)
         x = batch_data["x"].to(self.device, self.dtype, non_blocking=True)
         y = batch_data["y"].to(self.device, self.dtype, non_blocking=True)
+
         try:
             r_xz = batch_data["r_xz"].to(self.device, self.dtype, non_blocking=True)
         except KeyError:
@@ -510,6 +541,7 @@ def forward_pass(self, batch_data, loss_functions):
             t_xz = batch_data["t_xz"].to(self.device, self.dtype, non_blocking=True)
         except KeyError:
             t_xz = None
+
         self._timer(stop="fwd: move data", start="fwd: check for nans")
         self._check_for_nans("Training data", theta, x, y)
         self._check_for_nans("Augmented training data", r_xz, t_xz)
@@ -558,6 +590,7 @@ def forward_pass(self, batch_data, loss_functions):
         theta1 = batch_data["theta1"].to(self.device, self.dtype, non_blocking=True)
         x = batch_data["x"].to(self.device, self.dtype, non_blocking=True)
         y = batch_data["y"].to(self.device, self.dtype, non_blocking=True)
+
         try:
             r_xz = batch_data["r_xz"].to(self.device, self.dtype, non_blocking=True)
         except KeyError:
@@ -570,6 +603,7 @@ def forward_pass(self, batch_data, loss_functions):
             t_xz1 = batch_data["t_xz1"].to(self.device, self.dtype, non_blocking=True)
         except KeyError:
             t_xz1 = None
+
         self._timer(stop="fwd: move data", start="fwd: check for nans")
         self._check_for_nans("Training data", theta0, theta1, x, y)
         self._check_for_nans("Augmented training data", r_xz, t_xz0, t_xz1)
@@ -639,7 +673,7 @@ def check_data(self, data):
             raise ValueError("Missing required information 'x' or 'theta' in training data!")
 
         for key in data_keys:
-            if key not in ["x", "theta", "t_xz"]:
+            if key not in {"x", "theta", "t_xz"}:
                 logger.warning("Unknown key %s in training data! Ignoring it.", key)
 
         self.calculate_model_score = "t_xz" in data_keys
@@ -652,21 +686,25 @@ def forward_pass(self, batch_data, loss_functions):
         self._timer(start="fwd: move data")
         x = batch_data["x"].to(self.device, self.dtype, non_blocking=True)
         theta = batch_data["theta"].to(self.device, self.dtype, non_blocking=True)
+
         try:
             t_xz = batch_data["t_xz"].to(self.device, self.dtype, non_blocking=True)
         except KeyError:
             t_xz = None
+
         self._timer(stop="fwd: move data", start="fwd: check for nans")
         self._check_for_nans("Training data", theta, x)
         self._check_for_nans("Augmented training data", t_xz)
 
         self._timer(start="fwd: model.forward", stop="fwd: check for nans")
+
         if self.calculate_model_score:
             theta.requires_grad = True
             _, log_likelihood, t_hat = self.model.log_likelihood_and_score(theta, x)
         else:
             _, log_likelihood = self.model.log_likelihood(theta, x)
             t_hat = None
+
         self._timer(stop="fwd: model.forward", start="fwd: check for nans")
         self._check_for_nans("Model output", log_likelihood, t_hat)
 
diff --git a/madminer/utils/ml/utils.py b/madminer/utils/ml/utils.py
index 851a7997d..efe95c280 100644
--- a/madminer/utils/ml/utils.py
+++ b/madminer/utils/ml/utils.py
@@ -85,7 +85,7 @@ def check_required_data(method, r_xz, t_xz0, t_xz1, theta0, theta1, x, y):
         data_is_there = False
     if (
         method
-        in [
+        in {
             "carl",
             "carl2",
             "nde",
@@ -98,23 +98,24 @@ def check_required_data(method, r_xz, t_xz0, t_xz1, theta0, theta1, x, y):
             "alice2",
             "rascal2",
             "alices2",
-        ]
+        }
         and theta0 is None
     ):
         data_is_there = False
-    if method in ["rolr", "alice", "rascal", "alices", "rolr2", "alice2", "rascal2", "alices2"] and r_xz is None:
+    if method in {"rolr", "alice", "rascal", "alices", "rolr2", "alice2", "rascal2", "alices2"} and r_xz is None:
         data_is_there = False
     if (
-        method in ["carl", "carl2", "rolr", "alice", "rascal", "alices", "rolr2", "alice2", "rascal2", "alices2"]
+        method in {"carl", "carl2", "rolr", "alice", "rascal", "alices", "rolr2", "alice2", "rascal2", "alices2"}
         and y is None
     ):
         data_is_there = False
-    if method in ["scandal", "rascal", "alices", "rascal2", "alices2", "sally", "sallino"] and t_xz0 is None:
+    if method in {"scandal", "rascal", "alices", "rascal2", "alices2", "sally", "sallino"} and t_xz0 is None:
         data_is_there = False
-    if method in ["carl2", "rolr2", "alice2", "rascal2", "alices2"] and theta1 is None:
+    if method in {"carl2", "rolr2", "alice2", "rascal2", "alices2"} and theta1 is None:
         data_is_there = False
-    if method in ["rascal2", "alices2"] and t_xz1 is None:
+    if method in {"rascal2", "alices2"} and t_xz1 is None:
         data_is_there = False
+
     return data_is_there
 
 
@@ -185,6 +186,7 @@ def get_loss(method, alpha):
         loss_labels = ["nll", "mse_score"]
     else:
         raise NotImplementedError("Unknown method {method}")
+
     return loss_functions, loss_labels, loss_weights
 
 

From fcf99f781f58abaa4002b2d5eba41a63994bd53f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 24 Sep 2020 11:09:09 -0400
Subject: [PATCH 53/65] src: utils/ml module remove 'six'

---
 madminer/utils/ml/eval.py    |  7 ++++---
 madminer/utils/ml/losses.py  |  4 +---
 madminer/utils/ml/trainer.py | 21 ++++++++++++---------
 madminer/utils/ml/utils.py   | 15 +++++++--------
 4 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/madminer/utils/ml/eval.py b/madminer/utils/ml/eval.py
index 3c013d667..f23c3faf2 100644
--- a/madminer/utils/ml/eval.py
+++ b/madminer/utils/ml/eval.py
@@ -1,11 +1,12 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
 import numpy as np
 import torch
 from torch import tensor
 
-from madminer.utils.ml.models.ratio import DenseSingleParameterizedRatioModel, DenseDoublyParameterizedRatioModel
+from madminer.utils.ml.models.ratio import (
+    DenseSingleParameterizedRatioModel,
+    DenseDoublyParameterizedRatioModel,
+)
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/losses.py b/madminer/utils/ml/losses.py
index 0daa3822d..64082c3f1 100644
--- a/madminer/utils/ml/losses.py
+++ b/madminer/utils/ml/losses.py
@@ -1,9 +1,7 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
+import logging
 import numpy as np
 import torch
 from torch.nn import BCELoss, MSELoss
-import logging
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/trainer.py b/madminer/utils/ml/trainer.py
index 542aa34bc..d1bb6fa76 100644
--- a/madminer/utils/ml/trainer.py
+++ b/madminer/utils/ml/trainer.py
@@ -1,16 +1,19 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import six
 import logging
-from collections import OrderedDict
 import numpy as np
 import time
 import torch
 import torch.optim as optim
-from madminer.utils.ml.utils import EarlyStoppingException, NanException, NumpyDataset
+
+from collections import OrderedDict
+from torch.nn.utils import clip_grad_norm_
 from torch.utils.data import DataLoader
 from torch.utils.data.sampler import SubsetRandomSampler
-from torch.nn.utils import clip_grad_norm_
+
+from madminer.utils.ml.utils import (
+    EarlyStoppingException,
+    NanException,
+    NumpyDataset,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -177,7 +180,7 @@ def train(
     @staticmethod
     def report_data(data):
         logger.debug("Training data:")
-        for key, value in six.iteritems(data):
+        for key, value in data.items():
             if value is None:
                 logger.debug("  %s: -", key)
             else:
@@ -199,7 +202,7 @@ def make_dataset(self, data):
         data_arrays = []
         data_labels = []
 
-        for key, value in six.iteritems(data):
+        for key, value in data.items():
             data_labels.append(key)
             data_arrays.append(value)
         dataset = NumpyDataset(*data_arrays, dtype=self.dtype)
@@ -503,7 +506,7 @@ def _timer(self, start=None, stop=None):
 
     def _report_timer(self):
         logger.info("Training time spend on:")
-        for key, value in six.iteritems(self.timer):
+        for key, value in self.timer.items():
             logger.info(f"  {key:>32s}: {(value/3600.0):6.2f}h")
 
 
diff --git a/madminer/utils/ml/utils.py b/madminer/utils/ml/utils.py
index efe95c280..23d3f58ff 100644
--- a/madminer/utils/ml/utils.py
+++ b/madminer/utils/ml/utils.py
@@ -1,14 +1,13 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
+import logging
 import numpy as np
 import torch
-from torch.nn import functional as F
-import logging
+
 from torch import optim
+from torch.nn import functional as F
+from torch.utils.data import Dataset
 
-import madminer.utils
 from madminer.utils.ml import losses
-from torch.utils.data import Dataset
+
 
 logger = logging.getLogger(__name__)
 
@@ -177,11 +176,11 @@ def get_loss(method, alpha):
         loss_weights = [1.0]
         loss_labels = ["mse_score"]
     elif method == "nde":
-        loss_functions = [madminer.utils.ml.losses.flow_nll]
+        loss_functions = [losses.flow_nll]
         loss_weights = [1.0]
         loss_labels = ["nll"]
     elif method == "scandal":
-        loss_functions = [madminer.utils.ml.losses.flow_nll, madminer.utils.ml.losses.flow_score_mse]
+        loss_functions = [losses.flow_nll, losses.flow_score_mse]
         loss_weights = [1.0, alpha]
         loss_labels = ["nll", "mse_score"]
     else:

From 46b6d606b704b0dee999d645023d1c134b1525dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 24 Sep 2020 11:39:53 -0400
Subject: [PATCH 54/65] src: utils/ml/models module improve fmt

---
 madminer/utils/ml/models/base.py       | 11 +++++------
 madminer/utils/ml/models/batch_norm.py |  9 ++-------
 madminer/utils/ml/models/made.py       | 16 ++++++++++------
 madminer/utils/ml/models/made_mog.py   |  4 ++--
 madminer/utils/ml/models/maf.py        |  4 +---
 madminer/utils/ml/models/maf_mog.py    |  2 +-
 madminer/utils/ml/models/score.py      | 10 +++++++---
 7 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/madminer/utils/ml/models/base.py b/madminer/utils/ml/models/base.py
index e06739211..3c40fd261 100644
--- a/madminer/utils/ml/models/base.py
+++ b/madminer/utils/ml/models/base.py
@@ -11,14 +11,13 @@ class BaseFlow(nn.Module):
 
     def __init__(self, n_inputs, **kwargs):
         super(BaseFlow, self).__init__()
-
         self.n_inputs = n_inputs
 
     def forward(self, x, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def generate_samples(self, n_samples=1, u=None, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def log_likelihood(self, x, **kwargs):
         """ Calculates log p(x) with a Gaussian base density """
@@ -39,17 +38,17 @@ def log_likelihood_and_score(self, x, **kwargs):
 
 
 class BaseConditionalFlow(nn.Module):
+
     def __init__(self, n_conditionals, n_inputs, **kwargs):
         super(BaseConditionalFlow, self).__init__()
-
         self.n_conditionals = n_conditionals
         self.n_inputs = n_inputs
 
     def forward(self, theta, x, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def generate_samples(self, theta, u=None, **kwargs):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def log_likelihood(self, theta, x, **kwargs):
         """ Calculates u(x) and log p(x) with a Gaussian base density """
diff --git a/madminer/utils/ml/models/batch_norm.py b/madminer/utils/ml/models/batch_norm.py
index 029929c5d..3fe14349c 100644
--- a/madminer/utils/ml/models/batch_norm.py
+++ b/madminer/utils/ml/models/batch_norm.py
@@ -56,20 +56,15 @@ def forward(self, x, fixed_params=False):
         return u, logdet
 
     def inverse(self, u):
-
         """Calculates u -> x(u) (the approximate inverse transformation based on running mean and variance)"""
 
-        x = torch.sqrt(self.running_var) * u + self.running_mean
-
-        return x
+        return torch.sqrt(self.running_var) * u + self.running_mean
 
     def generate_samples(self, n_samples=1, u=None, **kwargs):
         if u is None:
             u = tensor(rng.randn(n_samples, self.n_inputs))
 
-        x = torch.sqrt(self.running_var) * u + self.running_mean
-
-        return x
+        return torch.sqrt(self.running_var) * u + self.running_mean
 
     def to(self, *args, **kwargs):
         logger.debug("Transforming BatchNorm to %s", args)
diff --git a/madminer/utils/ml/models/made.py b/madminer/utils/ml/models/made.py
index 12add77ce..5f375bd18 100644
--- a/madminer/utils/ml/models/made.py
+++ b/madminer/utils/ml/models/made.py
@@ -52,7 +52,6 @@ def forward(self, x, **kwargs):
 
         **kwargs :
 
-
         Returns
         -------
 
@@ -94,7 +93,6 @@ def generate_samples(self, n_samples=1, u=None, **kwargs):
              (Default value = None)
         **kwargs :
 
-
         Returns
         -------
 
@@ -131,7 +129,6 @@ def to(self, *args, **kwargs):
 
         **kwargs :
 
-
         Returns
         -------
 
@@ -159,7 +156,13 @@ class ConditionalGaussianMADE(BaseConditionalFlow):
     """ """
 
     def __init__(
-        self, n_conditionals, n_inputs, n_hiddens, activation="relu", input_order="sequential", mode="sequential"
+        self,
+        n_conditionals,
+        n_inputs,
+        n_hiddens,
+        activation="relu",
+        input_order="sequential",
+        mode="sequential",
     ):
         super(ConditionalGaussianMADE, self).__init__(n_conditionals, n_inputs)
 
@@ -198,7 +201,10 @@ def forward(self, theta, x, **kwargs):
         x :
 
         **kwargs :
+<<<<<<< HEAD
 
+=======
+>>>>>>> src: utils/ml/models module improve fmt
 
         Returns
         -------
@@ -267,7 +273,6 @@ def generate_samples(self, theta, u=None, **kwargs):
              (Default value = None)
         **kwargs :
 
-
         Returns
         -------
 
@@ -306,7 +311,6 @@ def to(self, *args, **kwargs):
 
         **kwargs :
 
-
         Returns
         -------
 
diff --git a/madminer/utils/ml/models/made_mog.py b/madminer/utils/ml/models/made_mog.py
index db274fb72..4d119a788 100644
--- a/madminer/utils/ml/models/made_mog.py
+++ b/madminer/utils/ml/models/made_mog.py
@@ -73,7 +73,6 @@ def forward(self, theta, x, **kwargs):
 
         **kwargs :
 
-
         Returns
         -------
 
@@ -174,7 +173,8 @@ def generate_samples(self, theta, u=None, **kwargs):
             u = u.to(*self.to_args, **self.to_kwargs)
 
         for i in range(1, self.n_inputs + 1):
-            self.forward(theta, x)  # Sets Gaussian parameters (self.m and self.logp) and mixture coeffs (self.loga)
+            # Sets Gaussian parameters (self.m and self.logp) and mixture coeffs (self.loga)
+            self.forward(theta, x)
 
             ix = np.argwhere(self.input_order == i)[0, 0]
 
diff --git a/madminer/utils/ml/models/maf.py b/madminer/utils/ml/models/maf.py
index 5daa413f7..9df44f447 100644
--- a/madminer/utils/ml/models/maf.py
+++ b/madminer/utils/ml/models/maf.py
@@ -157,7 +157,7 @@ def __init__(
             if not (isinstance(input_order, str) and input_order != "random"):
                 input_order = made.input_order[::-1]
 
-        # Batch normalizatino
+        # Batch normalization
         self.bns = None
         if self.batch_norm:
             self.bns = nn.ModuleList()
@@ -220,7 +220,6 @@ def generate_samples(self, theta, u=None, **kwargs):
              (Default value = None)
         **kwargs :
 
-
         Returns
         -------
 
@@ -252,7 +251,6 @@ def to(self, *args, **kwargs):
 
         **kwargs :
 
-
         Returns
         -------
 
diff --git a/madminer/utils/ml/models/maf_mog.py b/madminer/utils/ml/models/maf_mog.py
index be00d8a9f..78468feb7 100644
--- a/madminer/utils/ml/models/maf_mog.py
+++ b/madminer/utils/ml/models/maf_mog.py
@@ -65,7 +65,7 @@ def __init__(
             mode=mode,
         )
 
-        # Batch normalizatino
+        # Batch normalization
         self.bns = None
         if self.batch_norm:
             self.bns = nn.ModuleList()
diff --git a/madminer/utils/ml/models/score.py b/madminer/utils/ml/models/score.py
index 3ca339ac3..02575a89d 100644
--- a/madminer/utils/ml/models/score.py
+++ b/madminer/utils/ml/models/score.py
@@ -53,9 +53,13 @@ def forward(self, x, return_grad_x=False):
 
         # Calculate gradient
         if return_grad_x:
-            x_gradient = grad(t_hat, x, grad_outputs=torch.ones_like(t_hat.data), only_inputs=True, create_graph=True)[
-                0
-            ]
+            x_gradient = grad(
+                t_hat,
+                x,
+                grad_outputs=torch.ones_like(t_hat.data),
+                only_inputs=True,
+                create_graph=True,
+            )[0]
 
             return t_hat, x_gradient
 

From b8acb4c5c9d79a6d027ab7dcbfb957fad38ce5b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Thu, 24 Sep 2020 11:44:12 -0400
Subject: [PATCH 55/65] src: utils/ml/models module remove 'future'

---
 madminer/utils/ml/models/base.py       | 3 +--
 madminer/utils/ml/models/batch_norm.py | 4 +---
 madminer/utils/ml/models/made.py       | 6 ++----
 madminer/utils/ml/models/made_mog.py   | 7 +++----
 madminer/utils/ml/models/maf.py        | 5 ++---
 madminer/utils/ml/models/maf_mog.py    | 7 +++----
 madminer/utils/ml/models/masks.py      | 7 +++----
 madminer/utils/ml/models/ratio.py      | 9 ++++-----
 madminer/utils/ml/models/score.py      | 7 +++----
 9 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/madminer/utils/ml/models/base.py b/madminer/utils/ml/models/base.py
index 3c40fd261..1772c3a72 100644
--- a/madminer/utils/ml/models/base.py
+++ b/madminer/utils/ml/models/base.py
@@ -1,8 +1,7 @@
-from __future__ import absolute_import, division, print_function
-
 import numpy as np
 import torch
 import torch.nn as nn
+
 from torch.autograd import grad
 
 
diff --git a/madminer/utils/ml/models/batch_norm.py b/madminer/utils/ml/models/batch_norm.py
index 3fe14349c..caa39f1d6 100644
--- a/madminer/utils/ml/models/batch_norm.py
+++ b/madminer/utils/ml/models/batch_norm.py
@@ -1,11 +1,9 @@
-from __future__ import absolute_import, division, print_function
-
 import logging
 import numpy.random as rng
 import torch
-from torch import tensor
 
 from madminer.utils.ml.models.base import BaseFlow
+from torch import tensor
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/models/made.py b/madminer/utils/ml/models/made.py
index 5f375bd18..8f1145f1a 100644
--- a/madminer/utils/ml/models/made.py
+++ b/madminer/utils/ml/models/made.py
@@ -1,16 +1,14 @@
-from __future__ import absolute_import, division, print_function
-
+import logging
 import numpy as np
 import numpy.random as rng
 import torch
-from torch import tensor
 import torch.nn as nn
 import torch.nn.functional as F
 
 from madminer.utils.ml.models.base import BaseFlow, BaseConditionalFlow
 from madminer.utils.ml.models.masks import create_degrees, create_masks, create_weights, create_weights_conditional
 from madminer.utils.ml.utils import get_activation_function
-import logging
+from torch import tensor
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/models/made_mog.py b/madminer/utils/ml/models/made_mog.py
index 4d119a788..15fe54f73 100644
--- a/madminer/utils/ml/models/made_mog.py
+++ b/madminer/utils/ml/models/made_mog.py
@@ -1,14 +1,13 @@
-from __future__ import absolute_import, division, print_function
-
+import logging
 import numpy as np
 import numpy.random as rng
 import torch
-from torch import tensor
 import torch.nn.functional as F
+
 from madminer.utils.ml.models.base import BaseConditionalFlow
 from madminer.utils.ml.models.masks import create_degrees, create_masks, create_weights_conditional
 from madminer.utils.ml.utils import get_activation_function
-import logging
+from torch import tensor
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/models/maf.py b/madminer/utils/ml/models/maf.py
index 9df44f447..22aa77141 100644
--- a/madminer/utils/ml/models/maf.py
+++ b/madminer/utils/ml/models/maf.py
@@ -1,12 +1,11 @@
-from __future__ import absolute_import, division, print_function
-
+import logging
 import numpy.random as rng
 import torch.nn as nn
+
 from torch import tensor
 from madminer.utils.ml.models.base import BaseFlow, BaseConditionalFlow
 from madminer.utils.ml.models.made import GaussianMADE, ConditionalGaussianMADE
 from madminer.utils.ml.models.batch_norm import BatchNorm
-import logging
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/models/maf_mog.py b/madminer/utils/ml/models/maf_mog.py
index 78468feb7..73b9adf40 100644
--- a/madminer/utils/ml/models/maf_mog.py
+++ b/madminer/utils/ml/models/maf_mog.py
@@ -1,11 +1,10 @@
-from __future__ import absolute_import, division, print_function
-
+import logging
 import torch.nn as nn
+
 from madminer.utils.ml.models.base import BaseConditionalFlow
-from madminer.utils.ml.models.made import ConditionalGaussianMADE
 from madminer.utils.ml.models.batch_norm import BatchNorm
+from madminer.utils.ml.models.made import ConditionalGaussianMADE
 from madminer.utils.ml.models.made_mog import ConditionalMixtureMADE
-import logging
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/models/masks.py b/madminer/utils/ml/models/masks.py
index dfa2875e6..e677794bf 100644
--- a/madminer/utils/ml/models/masks.py
+++ b/madminer/utils/ml/models/masks.py
@@ -1,10 +1,9 @@
-from __future__ import absolute_import, division, print_function
-
+import logging
 import numpy as np
 import numpy.random as rng
-from torch import tensor
 import torch.nn as nn
-import logging
+
+from torch import tensor
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/models/ratio.py b/madminer/utils/ml/models/ratio.py
index a1821d5dc..bfd06df8f 100644
--- a/madminer/utils/ml/models/ratio.py
+++ b/madminer/utils/ml/models/ratio.py
@@ -1,11 +1,10 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
+import logging
+import numpy as np
 import torch
 import torch.nn as nn
-import numpy as np
-from torch.autograd import grad
+
 from madminer.utils.ml.utils import get_activation_function, check_for_nan, check_for_nonpos, NanException
-import logging
+from torch.autograd import grad
 
 logger = logging.getLogger(__name__)
 
diff --git a/madminer/utils/ml/models/score.py b/madminer/utils/ml/models/score.py
index 02575a89d..9d28e734d 100644
--- a/madminer/utils/ml/models/score.py
+++ b/madminer/utils/ml/models/score.py
@@ -1,10 +1,9 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
+import logging
 import torch
 import torch.nn as nn
-from torch.autograd import grad
+
 from madminer.utils.ml.utils import get_activation_function
-import logging
+from torch.autograd import grad
 
 logger = logging.getLogger(__name__)
 

From bd06b891f58d4bee27f7d0892ae0e729a825cd09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 26 Oct 2020 15:53:32 -0400
Subject: [PATCH 56/65] fix: limits module fmt + typos

---
 madminer/limits/asymptotic_limits.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/madminer/limits/asymptotic_limits.py b/madminer/limits/asymptotic_limits.py
index b3c46da91..4dec96b0e 100644
--- a/madminer/limits/asymptotic_limits.py
+++ b/madminer/limits/asymptotic_limits.py
@@ -82,7 +82,7 @@ def observed_limits(
         `x_observed` specifies the observed data as an array of observables, using the same observables and their order
         as used throughout the MadMiner workflow.
 
-        The p-values with frequent hypothesis tests using the likelihood ratio as test statistic. The asymptotic
+        The p-values with frequentist hypothesis tests using the likelihood ratio as test statistic. The asymptotic
         approximation is used, see https://arxiv.org/abs/1007.1727.
 
         Depending on the keyword `mode`, the likelihood ratio is calculated with one of several different methods:
@@ -1089,11 +1089,17 @@ def _calculate_log_likelihood_xsec(self, n_observed, theta_grid, luminosity=3000
     def _calculate_log_likelihood_ratio_kinematics(self, x_observed, theta_grid, model, theta1=None):
         if isinstance(model, ParameterizedRatioEstimator):
             log_r, _ = model.evaluate_log_likelihood_ratio(
-                x=x_observed, theta=theta_grid, test_all_combinations=True, evaluate_score=False
+                x=x_observed,
+                theta=theta_grid,
+                test_all_combinations=True,
+                evaluate_score=False,
             )
         elif isinstance(model, LikelihoodEstimator):
             log_r, _ = model.evaluate_log_likelihood(
-                x=x_observed, theta=theta_grid, test_all_combinations=True, evaluate_score=False
+                x=x_observed,
+                theta=theta_grid,
+                test_all_combinations=True,
+                evaluate_score=False,
             )
         elif isinstance(model, Ensemble) and model.estimator_type == "parameterized_ratio":
             log_r, _ = model.evaluate_log_likelihood_ratio(
@@ -1114,7 +1120,7 @@ def _calculate_log_likelihood_ratio_kinematics(self, x_observed, theta_grid, mod
         else:
             raise NotImplementedError(
                 "Likelihood ratio estimation is currently only implemented for "
-                "ParameterizedRatioEstimator and LikelihoodEstimator instancees"
+                "ParameterizedRatioEstimator and LikelihoodEstimator instances"
             )
         return log_r
 

From 4ffbb0243edbf081a3b958e7abe336bafba1efe0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Mon, 26 Oct 2020 16:11:26 -0400
Subject: [PATCH 57/65] fix: utils/interfaces module multi-line cmds

---
 madminer/utils/interfaces/mg.py | 61 +++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/madminer/utils/interfaces/mg.py b/madminer/utils/interfaces/mg.py
index 0f436c897..2363c9c67 100644
--- a/madminer/utils/interfaces/mg.py
+++ b/madminer/utils/interfaces/mg.py
@@ -237,34 +237,41 @@ def setup_mg_with_scripts(
     #  Card copying commands
     copy_commands = ""
     if run_card_file_from_mgprocdir is not None:
-        copy_commands += f"cp " \
-            f"{mg_process_directory_placeholder}/{run_card_file_from_mgprocdir} " \
+        copy_commands += (
+            f"cp "
+            f"{mg_process_directory_placeholder}/{run_card_file_from_mgprocdir} "
             f"{mg_process_directory_placeholder}/Cards/run_card.dat\n"
-
+        )
     if param_card_file_from_mgprocdir is not None:
-        copy_commands += f"cp " \
-            f"{mg_process_directory_placeholder}/{param_card_file_from_mgprocdir} " \
+        copy_commands += (
+            f"cp "
+            f"{mg_process_directory_placeholder}/{param_card_file_from_mgprocdir} "
             f"{mg_process_directory_placeholder}/Cards/param_card.dat\n"
-
+        )
     if reweight_card_file_from_mgprocdir is not None and not is_background:
-        copy_commands += f"cp " \
-            f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " \
+        copy_commands += (
+            f"cp "
+            f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} "
             f"{mg_process_directory_placeholder}/Cards/reweight_card.dat\n"
-
+        )
     if pythia8_card_file_from_mgprocdir is not None and order == "LO":
-        copy_commands += f"cp " \
-            f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " \
+        copy_commands += (
+            f"cp "
+            f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} "
             f"{mg_process_directory_placeholder}/Cards/pythia8_card.dat\n"
-
+        )
     if pythia8_card_file_from_mgprocdir is not None and order == "NLO":
-        copy_commands += f"cp " \
-            f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " \
+        copy_commands += (
+            f"cp "
+            f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} "
             f"{mg_process_directory_placeholder}/Cards/shower_card.dat\n"
-
+        )
     if configuration_file_from_mgprocdir is not None:
-        copy_commands += f"cp " \
-            f"{mg_process_directory_placeholder}/{configuration_file_from_mgprocdir} " \
+        copy_commands += (
+            f"cp "
+            f"{mg_process_directory_placeholder}/{configuration_file_from_mgprocdir} "
             f"{mg_process_directory_placeholder}/Cards/me5_configuration.txt\n"
+        )
 
     # Replace environment variable in proc card
     replacement_command = """sed -e 's@\$mgprocdir@'"$mgprocdir"'@' {}/{} > {}/{}""".format(
@@ -303,11 +310,12 @@ def setup_mg_with_scripts(
     make_file_executable(script_file)
 
     # How to call it from master script
-    call_placeholder = \
-        f"{mg_process_directory_placeholder}/{script_file_from_mgprocdir} " \
-        f"{mg_directory_placeholder} " \
-        f"{mg_process_directory_placeholder} " \
+    call_placeholder = (
+        f"{mg_process_directory_placeholder}/{script_file_from_mgprocdir} "
+        f"{mg_directory_placeholder} "
+        f"{mg_process_directory_placeholder} "
         f"{log_dir_placeholder}"
+    )
 
     return call_placeholder
 
@@ -518,9 +526,11 @@ def setup_mg_reweighting_with_scripts(
 
     #  Card copying commands
     if reweight_card_file_from_mgprocdir is not None:
-        copy_commands = f"cp " \
-            f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " \
+        copy_commands = (
+            f"cp "
+            f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} "
             f"{mg_process_directory_placeholder}/Cards/reweight_card.dat\n"
+        )
     else:
         copy_commands = ""
 
@@ -544,9 +554,10 @@ def setup_mg_reweighting_with_scripts(
     make_file_executable(script_file)
 
     # How to call it from master script
-    call_instruction = \
-        f"{mg_process_directory}/{script_file_from_mgprocdir} " \
+    call_instruction = (
+        f"{mg_process_directory}/{script_file_from_mgprocdir} "
         f"[MG_process_directory] [log_directory]"
+    )
 
     return call_instruction
 

From 636bb3df14b2b069c2adec3cec684b6056cea02a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Tue, 20 Apr 2021 09:50:05 -0400
Subject: [PATCH 58/65] fix: utils/interfaces module remove python2

---
 madminer/utils/interfaces/mg.py | 53 +++++++++------------------------
 1 file changed, 14 insertions(+), 39 deletions(-)

diff --git a/madminer/utils/interfaces/mg.py b/madminer/utils/interfaces/mg.py
index 2363c9c67..92d2ef73d 100644
--- a/madminer/utils/interfaces/mg.py
+++ b/madminer/utils/interfaces/mg.py
@@ -15,7 +15,6 @@ def generate_mg_process(
     ufo_model_directory=None,
     log_file=None,
     initial_command=None,
-    explicit_python_call=False,
     python_executable=None,
 ):
 
@@ -47,9 +46,6 @@ def generate_mg_process(
     log_file : str or None, optional
         Path to a log file in which the MadGraph output is saved. Default value: None.
 
-    explicit_python_call : bool, optional
-        Calls `python2.7` instead of `python`.
-
     python_executable : None or str, optional
         Overwrites the default Python executable
 
@@ -76,16 +72,11 @@ def generate_mg_process(
         myfile.write(f"\n")
         myfile.write(f"output {mg_process_directory}")
 
-    # Call MG5
-    initial_command = f"{initial_command}; " if initial_command else ""
-
-    # Explicitly call Python 2 if necessary
-    if explicit_python_call:
-        python_call = f"{python_executable} "  if python_executable is not None else "python2.7 "
-    else:
-        python_call = ""
+    # Call specific initial command and Python binary
+    initial_command = f"{initial_command}; " if initial_command is not None else ""
+    python_binary = f"{python_executable} " if python_executable is not None else ""
 
-    command = f"{initial_command}{python_call}{mg_directory}/bin/mg5_aMC {temp_proc_card_file}"
+    command = f"{initial_command}{python_binary}{mg_directory}/bin/mg5_aMC {temp_proc_card_file}"
     logger.info(f"Calling MadGraph: {command}")
 
     _ = call_command(cmd=command, log_file=log_file)
@@ -104,7 +95,6 @@ def setup_mg_with_scripts(
     initial_command=None,
     log_dir=None,
     log_file_from_logdir=None,
-    explicit_python_call=False,
     order="LO",
     python_executable=None,
 ):
@@ -160,9 +150,6 @@ def setup_mg_with_scripts(
         Path to a log file in which the MadGraph output is saved, relative from the default log directory. Default
         value: None.
 
-    explicit_python_call : bool, optional
-        Calls `python2.7` instead of `python`.
-
     python_executable : None or str, optional
         Overwrites the default Python executable
 
@@ -230,10 +217,6 @@ def setup_mg_with_scripts(
     with open(proc_card_filename, "w") as file:
         file.write(mg_commands)
 
-    # Initial commands
-    if initial_command is None:
-        initial_command = ""
-
     #  Card copying commands
     copy_commands = ""
     if run_card_file_from_mgprocdir is not None:
@@ -281,11 +264,9 @@ def setup_mg_with_scripts(
         "Cards/mg_commands.mg5",
     )
 
-    # Explicitly call Python 2 if necessary
-    if explicit_python_call:
-        python_call = python_executable + " " if python_executable is not None else "python2.7 "
-    else:
-        python_call = ""
+    # Call specific initial command and Python binary
+    initial_command = f"{initial_command} " if initial_command is not None else ""
+    python_binary = f"{python_executable} " if python_executable is not None else ""
 
     # Put together script
     script = (
@@ -297,7 +278,7 @@ def setup_mg_with_scripts(
         placeholder_definition,
         copy_commands,
         replacement_command,
-        python_call,
+        python_binary,
         mg_directory_placeholder,
         mg_process_directory_placeholder,
         "Cards/mg_commands.mg5",
@@ -332,7 +313,6 @@ def run_mg(
     is_background=False,
     initial_command=None,
     log_file=None,
-    explicit_python_call=False,
     order="LO",
     python_executable=None,
 ):
@@ -382,8 +362,8 @@ def run_mg(
     log_file : str or None, optional
         Path to a log file in which the MadGraph output is saved. Default value: None.
 
-    explicit_python_call : bool, optional
-        Calls `python2.7` instead of `python`.
+    python_executable : None or str, optional
+        Overwrites the default Python executable
 
     Returns
     -------
@@ -439,16 +419,11 @@ def run_mg(
     with open(proc_card_filename, "w") as file:
         file.write(mg_commands)
 
-    # Call MG5
-    initial_command = f"{initial_command}; " if initial_command else ""
-
-    # Explicitly call Python 2 if necessary
-    if explicit_python_call:
-        python_call = f"{python_executable} "  if python_executable is not None else "python2.7 "
-    else:
-        python_call = ""
+    # Call specific initial command and Python binary
+    initial_command = f"{initial_command}; " if initial_command is not None else ""
+    python_binary = f"{python_executable} " if python_executable is not None else ""
 
-    command = f"{initial_command}{python_call}{mg_directory}/bin/mg5_aMC {proc_card_filename}"
+    command = f"{initial_command}{python_binary}{mg_directory}/bin/mg5_aMC {proc_card_filename}"
     logger.info(f"Calling MadGraph: {command}")
 
     _ = call_command(cmd=command, log_file=log_file)

From 1ca1e3888896c128ffb8f66118af532723ab0e1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Tue, 20 Apr 2021 10:25:31 -0400
Subject: [PATCH 59/65] fix: utils/interfaces module remove 'six'

---
 madminer/utils/interfaces/madminer_hdf5.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py
index cf721799d..864c4ec03 100644
--- a/madminer/utils/interfaces/madminer_hdf5.py
+++ b/madminer/utils/interfaces/madminer_hdf5.py
@@ -359,12 +359,12 @@ def _save_finite_differences(filename, finite_difference_benchmarks, finite_diff
     io_tag = "a"  # Read-write if file exists, otherwise create
     with h5py.File(filename, io_tag) as f:
         n_keys = len(finite_difference_benchmarks)
-        n_values = len(finite_difference_benchmarks[list(six.iterkeys(finite_difference_benchmarks))[0]])
+        n_values = len(finite_difference_benchmarks[list(finite_difference_benchmarks.keys())[0]])
 
-        keys_ascii = [key.encode("ascii", "ignore") for key in six.iterkeys(finite_difference_benchmarks)]
+        keys_ascii = [key.encode("ascii", "ignore") for key in finite_difference_benchmarks.keys()]
         values_ascii = [
-            [val.encode("ascii", "ignore") for val in six.itervalues(values)]
-            for values in six.itervalues(finite_difference_benchmarks)
+            [val.encode("ascii", "ignore") for val in values.values()]
+            for values in finite_difference_benchmarks.values()
         ]
 
         f.create_dataset("finite_differences/base_benchmarks", (n_keys,), dtype="S256", data=keys_ascii)

From e898f82a3b9ea1add99e379a112fa19de7a263d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Tue, 20 Apr 2021 10:11:54 -0400
Subject: [PATCH 60/65] fix: core module remove python2

---
 madminer/core/madminer.py | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py
index 9cafddfc0..77ec1266b 100644
--- a/madminer/core/madminer.py
+++ b/madminer/core/madminer.py
@@ -716,7 +716,6 @@ def run(
         log_directory=None,
         temp_directory=None,
         initial_command=None,
-        python2_override=False,
         systematics=None,
         order="LO",
         python_executable=None,
@@ -793,11 +792,6 @@ def run(
             Initial shell commands that have to be executed before MG is run (e.g. to load a virtual environment).
             Default value: None.
 
-        python2_override : bool, optional
-            If True, MadMiner explicitly calls "python2" instead of relying on the system Python version to be
-            Python 2.6 or Python 2.7. If you use systematics, make sure that the python interface of LHAPDF was compiled
-            with the Python version you are using. Default: False.
-
         systematics : None or list of str, optional
             If list of str, defines which systematics are used for this run.
 
@@ -832,7 +826,6 @@ def run(
             log_directory=log_directory,
             temp_directory=temp_directory,
             initial_command=initial_command,
-            python2_override=python2_override,
             systematics=systematics,
             order=order,
             python_executable=python_executable,
@@ -854,7 +847,6 @@ def run_multiple(
         log_directory=None,
         temp_directory=None,
         initial_command=None,
-        python2_override=False,
         systematics=None,
         order="LO",
         python_executable=None,
@@ -924,11 +916,6 @@ def run_multiple(
             If not specified and `python2_override` is True, it adds the user-installed Python2 binaries to the PATH.
             Default value: None.
 
-        python2_override : bool, optional
-            If True, MadMiner explicitly calls "python2" instead of relying on the system Python version to be
-            Python 2.6 or Python 2.7. If you use systematics, make sure that the python interface of LHAPDF was compiled
-            with the Python version you are using. Default: False.
-
         systematics : None or list of str, optional
             If list of str, defines which systematics are used for these runs.
 
@@ -958,19 +945,14 @@ def run_multiple(
         if sample_benchmarks is None:
             sample_benchmarks = [benchmark for benchmark in self.benchmarks]
 
-        # Python 2 override options
-
-        # Gives 'python2_override' full power if 'initial_command' is empty.
-        # (Reference: https://github.com/diana-hep/madminer/issues/422)
-        if python2_override and initial_command is None and not python_executable:
-            logger.warning(
-                "The keyword python2_override is discouraged. "
-                "Instead, consider using python_executable."
-            )
+        # This snippet is useful when using virtual envs.
+        # (Derives from a Python2 - Python3 issue).
+        # Ref: https://github.com/diana-hep/madminer/issues/422
+        if python_executable and initial_command is None:
+            logger.info(f"Adding {python_executable} bin folder to PATH")
+            binary_path = os.popen(f"command -v {python_executable}").read().strip()
+            binary_folder = os.path.dirname(binary_path)
 
-            logger.info("Adding Python2.7 bin folder to PATH")
-            binary_path = os.popen("command -v python2.7").read().strip()
-            binary_folder = os.path.dirname(os.path.realpath(binary_path))
             initial_command = f"export PATH={binary_folder}:$PATH"
             logger.info(f"Using Python executable {binary_path}")
 
@@ -985,7 +967,6 @@ def run_multiple(
             ufo_model_directory=ufo_model_directory,
             initial_command=initial_command,
             log_file=log_file_generate,
-            explicit_python_call=python2_override or (python_executable is not None),
             python_executable=python_executable,
         )
 
@@ -1089,7 +1070,6 @@ def run_multiple(
                         initial_command=initial_command,
                         log_dir=log_directory,
                         log_file_from_logdir=log_file_run,
-                        explicit_python_call=python2_override or (python_executable is not None),
                         python_executable=python_executable,
                         order=order,
                     )
@@ -1107,7 +1087,6 @@ def run_multiple(
                         is_background=is_background,
                         initial_command=initial_command,
                         log_file=f"{log_directory}/{log_file_run}",
-                        explicit_python_call=python2_override or (python_executable is not None),
                         python_executable=python_executable,
                         order=order,
                     )

From c31c9055b26c08704093abffc473dd438acc0f21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Tue, 20 Apr 2021 10:01:39 -0400
Subject: [PATCH 61/65] fix: core module remove 'six'

---
 madminer/core/madminer.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py
index 77ec1266b..3c5d94e22 100644
--- a/madminer/core/madminer.py
+++ b/madminer/core/madminer.py
@@ -398,12 +398,11 @@ def finite_differences(self, epsilon=0.01):
         self.finite_difference_benchmarks = OrderedDict()
         self.finite_difference_epsilon = epsilon
 
-        for benchmark_key, benchmark_spec in six.iteritems(
-            self.benchmarks.copy()
-        ):  # Copy is necessary to avoid endless loop :/
+        # Copy is necessary to avoid endless loop :/
+        for benchmark_key, benchmark_spec in self.benchmarks.copy().items():
             fd_keys = {}
 
-            for param_key, param_value in six.iteritems(benchmark_spec):
+            for param_key, param_value in benchmark_spec.items():
                 fd_key = benchmark_key + "_plus_" + param_key
                 fd_spec = benchmark_spec.copy()
                 fd_spec[param_key] += epsilon

From 2da8c96611f8ed1e12594e0eeb735617abcdddfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Tue, 20 Apr 2021 10:21:00 -0400
Subject: [PATCH 62/65] docs: update installation guide

---
 docs/installation.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/installation.md b/docs/installation.md
index b6b79ce1b..269c06bda 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -3,10 +3,8 @@
 ## Simulator dependencies
 
 Make sure the following tools are installed and running:
-- MadGraph (we've tested our setup with MG5_aMC v2.6.2 and v2.6.5). See
-  [https://launchpad.net/mg5amcnlo](https://launchpad.net/mg5amcnlo) for installation instructions. Note that MadGraph
-  requires a Fortran compiler as well as Python 2.6 or 2.7. (Note that you can still run most MadMiner analysis steps
-  with Python 3.)
+- MadGraph (we've tested our setup with MG5_aMC v2.8.0+). See [https://launchpad.net/mg5amcnlo](https://launchpad.net/mg5amcnlo)
+  for installation instructions. Note that MadGraph requires a Fortran compiler as well as Python 3.6+.
 - For the analysis of systematic uncertainties, LHAPDF6 has to be installed with Python support (see also
   [the documentation of MadGraph's systematics tool](https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics)).
 

From 5e86402b659dad3fe7f4a7910f4fe1d6a474f2df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Fri, 16 Apr 2021 09:42:47 -0400
Subject: [PATCH 63/65] Bound uproot between 3.11 and 4.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 081a28cdf..42f5b291b 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@
     "scipy>=1.0.0",
     "scikit-hep>=0.5.0, <0.6.0",
     "torch>=1.0.0",
-    "uproot",
+    "uproot>=3.11.0,<4.0.0",
 ]
 
 EXTRAS_DOCS = sorted(

From 6379ad6a860e3a55e04ad95fe522c6570aa26310 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Fri, 16 Apr 2021 12:48:48 -0400
Subject: [PATCH 64/65] Set uproot3 instead of uproot

Co-authored-by: Matthew Feickert <matthew.feickert@cern.ch>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 42f5b291b..b45ea5ff2 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@
     "scipy>=1.0.0",
     "scikit-hep>=0.5.0, <0.6.0",
     "torch>=1.0.0",
-    "uproot>=3.11.0,<4.0.0",
+    "uproot3>=3.14.1",
 ]
 
 EXTRAS_DOCS = sorted(

From 9cc0cac6611a8f324511c70dce734089b8cc71ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= <Sinclert@hotmail.com>
Date: Fri, 16 Apr 2021 12:56:34 -0400
Subject: [PATCH 65/65] Use uproot3 instead of uproot (src)

---
 madminer/utils/interfaces/delphes_root.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py
index ba6b5cca7..cca262864 100644
--- a/madminer/utils/interfaces/delphes_root.py
+++ b/madminer/utils/interfaces/delphes_root.py
@@ -1,7 +1,7 @@
 import logging
 import numpy as np
 import os
-import uproot
+import uproot3
 from collections import OrderedDict
 
 from madminer.utils.particle import MadMinerParticle
@@ -39,7 +39,7 @@ def parse_delphes_root_file(
         logger.debug("Extracting weights %s", weight_labels)
 
     # Delphes ROOT file
-    root_file = uproot.open(delphes_sample_file)
+    root_file = uproot3.open(delphes_sample_file)
 
     # Delphes tree
     tree = root_file["Delphes"]