From cf731a464f63da54c06dc46e3b651ddf77ce1d39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:39:02 -0400 Subject: [PATCH 01/65] package: bump up version to 0.8.0 --- madminer/__info__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/madminer/__info__.py b/madminer/__info__.py index 26db542e5..a1f1f09e3 100644 --- a/madminer/__info__.py +++ b/madminer/__info__.py @@ -1,3 +1,3 @@ __authors__ = ", ".join(["Johann Brehmer", "Felix Kling", "Irina Espejo", "Sinclert Perez", "Kyle Cranmer"]) -__version__ = "0.7.6" +__version__ = "0.8.0" From 358d39fc8c9f6799c6c53aef3c01d2801a01dd2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:02:45 -0400 Subject: [PATCH 02/65] package: update setup.py to Python3.6 --- setup.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 150c3bcb5..081a28cdf 100644 --- a/setup.py +++ b/setup.py @@ -8,20 +8,21 @@ import os import sys +from pathlib import Path from shutil import rmtree from setuptools import find_packages, setup, Command -project_dir = os.path.abspath(os.path.dirname(__file__)) +project_dir = Path(__file__).parent # Import the README and use it as the long-description. -with open(os.path.join(project_dir, 'README.md')) as f: +with open(project_dir.joinpath('README.md')) as f: LONG_DESCRIPTION = '\n' + f.read() # Load the package's __version__.py module as a dictionary. info = {} -with open(os.path.join(project_dir, 'madminer', '__info__.py')) as f: +with open(project_dir.joinpath('madminer', '__info__.py')) as f: exec(f.read(), info) @@ -30,17 +31,15 @@ DESCRIPTION = 'Mining gold from MadGraph to improve limit setting in particle physics.' URL = 'https://github.com/diana-hep/madminer' EMAIL = 'johann.brehmer@nyu.edu' -REQUIRES_PYTHON = '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4' +REQUIRES_PYTHON = '>=3.6, <4' AUTHORS = info['__authors__'] VERSION = info['__version__'] REQUIRED = [ - "future", "h5py", "matplotlib>=2.0.0", "numpy>=1.13.0", "scipy>=1.0.0", "scikit-hep>=0.5.0, <0.6.0", - "six", "torch>=1.0.0", "uproot", ] @@ -129,6 +128,8 @@ def run(self): 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', ], # $ setup.py publish support. cmdclass={ From 5f5e116b45814d1471030cd06532fa07a4591343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 14:29:31 -0400 Subject: [PATCH 03/65] conda: remove Python2 deps. --- environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yml b/environment.yml index 4c31ec743..aef0ad51f 100644 --- a/environment.yml +++ b/environment.yml @@ -10,7 +10,6 @@ dependencies: - h5py - scipy>=1.0.0 - scikit-learn>=0.19.0 -- six - pytorch>=1.0.0 - pytest - pip From 68aa96873bc0988eb4d2eada7e25783cf9b35ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:06:25 -0400 Subject: [PATCH 04/65] docker: remove Python2 commands --- Dockerfile | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8f346ddde..637277b29 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,16 +4,10 @@ FROM rootproject/root-ubuntu USER root RUN apt-get update && apt-get install -y \ - python-dev \ - python-tk \ python3-tk \ python3-pip -# Python2 pip is not longer shiped with Ubuntu (20.04+) -RUN curl "https://bootstrap.pypa.io/get-pip.py" --output get-pip.py && \ - python get-pip.py - -RUN pip install --upgrade --no-cache-dir pip && \ - pip install --upgrade --no-cache-dir madminer +RUN pip3 install --upgrade --no-cache-dir pip && \ + pip3 install --upgrade --no-cache-dir madminer WORKDIR /home/ From 8d3c90a36ff6a06332fb55647f51ab1f331192bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:07:33 -0400 Subject: [PATCH 05/65] travis: update CI Python versions --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f19bbe291..ff96403fc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,9 @@ services: - docker language: python python: -- '2.7' -- '3.5' - '3.6' +- '3.7' +- '3.8' install: - pip install -q --no-cache-dir -e . - travis_wait pip install -q --no-cache-dir -e . From e6727900cdf888180040adab1d06cef53bdb3f6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:09:27 -0400 Subject: [PATCH 06/65] tests: remove empty module --- .travis.yml | 1 - tests/test_imports.py | 2 -- 2 files changed, 3 deletions(-) delete mode 100644 tests/test_imports.py diff --git a/.travis.yml b/.travis.yml index ff96403fc..e4398f53b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,6 @@ install: - pip install -q --no-cache-dir -e . - travis_wait pip install -q --no-cache-dir -e . script: -- pytest tests/test_imports.py - pytest -s tests/test_ratio_estimation.py - pytest -s tests/test_nuisance.py jobs: diff --git a/tests/test_imports.py b/tests/test_imports.py deleted file mode 100644 index 1d60b3ca2..000000000 --- a/tests/test_imports.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_imports(): - assert True From e1db7ca2d2b3534ad58c7f8367d6bd79e1a31c23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:23:05 -0400 Subject: [PATCH 07/65] tests: drop Python2 support --- tests/test_nuisance.py | 2 -- tests/test_ratio_estimation.py | 8 ++------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/test_nuisance.py b/tests/test_nuisance.py index 4ffc2ef49..3c9d6348e 100644 --- a/tests/test_nuisance.py +++ b/tests/test_nuisance.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import os import numpy as np from collections import OrderedDict diff --git a/tests/test_ratio_estimation.py b/tests/test_ratio_estimation.py index 2037b1860..c6ed1c98c 100644 --- a/tests/test_ratio_estimation.py +++ b/tests/test_ratio_estimation.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import os import numpy as np import logging @@ -139,9 +137,9 @@ def test_ratio_estimation(): for method, alpha in zip(methods, alphas): this_rmses = [] for sample_size in sample_sizes: - print("Training method {} on {} samples".format(method, sample_size)) + print(f"Training method {method} on {sample_size} samples") this_rmses.append(run_test(method, alpha, sample_size)) - print(" -> MSE =", this_rmses[-1]) + print(f" -> MSE = {this_rmses[-1]}") rmses.append(this_rmses) rmses = np.asarray(rmses) @@ -153,8 +151,6 @@ def test_ratio_estimation(): for method, this_rmses in zip(methods, rmses): print(" {:>6s} | {:11.3f} | {:11.3f} ".format(method, this_rmses[0], this_rmses[1])) - print("") - assert np.max(rmses[:, -1]) < 100.0 From 3756a8d2e6343a307cdb9c8c3a003d68ad33b8bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:25:13 -0400 Subject: [PATCH 08/65] tests: improve fmt --- tests/test_ratio_estimation.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/tests/test_ratio_estimation.py b/tests/test_ratio_estimation.py index c6ed1c98c..3da03f5c5 100644 --- a/tests/test_ratio_estimation.py +++ b/tests/test_ratio_estimation.py @@ -1,24 +1,21 @@ import os import numpy as np import logging + +from madminer import ParameterizedRatioEstimator from scipy.stats import norm + # MadMiner output logging.basicConfig( - format="%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s", datefmt="%H:%M", level=logging.WARNING + format="%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s", + datefmt="%H:%M", + level=logging.WARNING, ) -# Output of all other modules (e.g. matplotlib) -for key in logging.Logger.manager.loggerDict: - if "madminer" not in key: - logging.getLogger(key).setLevel(logging.WARNING) - -from madminer import ParameterizedRatioEstimator - if not os.path.exists("tests/data"): os.makedirs("tests/data") - # Simulator settings z_std = 2.0 x_std = 1.0 @@ -32,21 +29,20 @@ def simulate(theta, theta0=None, theta1=None, theta_score=None, npoints=None): # Draw observable x = np.random.normal(loc=z, scale=x_std, size=None) + r_xz = None + t_xz = None + # Calculate joint likelihood ratio and joint score if theta0 is not None and theta1 is not None: r_xz = norm(loc=theta0, scale=z_std).pdf(z) / norm(loc=theta1, scale=z_std).pdf(z) - else: - r_xz = None if theta_score is not None: t_xz = (x - theta_score) / z_std ** 2 - else: - t_xz = None return x, r_xz, t_xz -# True likeleihood ratio function +# True likelihood ratio function def calculate_likelihood_ratio(x, theta0, theta1=0.0): combined_std = (z_std ** 2 + x_std ** 2) ** 0.5 r_x = norm(loc=theta0, scale=combined_std).pdf(x) / norm(loc=theta1, scale=combined_std).pdf(x) @@ -54,7 +50,7 @@ def calculate_likelihood_ratio(x, theta0, theta1=0.0): def generate_data(sample_sizes): - # Run simulator and generate etraining data + # Run simulator and generate training data n_param_points = max(sample_sizes) // 2 # number of parameter points to train theta0 = np.random.uniform(low=-4.0, high=4.0, size=n_param_points) # numerator, uniform prior @@ -116,13 +112,13 @@ def run_test(method, alpha, sample_size): # Evaluation log_r_tests_alices, _ = estimator.evaluate( - theta="tests/data/theta_grid.npy", x="tests/data/x_test.npy", evaluate_score=False + theta="tests/data/theta_grid.npy", + x="tests/data/x_test.npy", + evaluate_score=False, ) # Calculate error - rmse = np.mean((log_r_test_true - log_r_tests_alices) ** 2) - - return rmse + return np.mean((log_r_test_true - log_r_tests_alices) ** 2) def test_ratio_estimation(): From e357bad6b537c32eb19a3e746340242614d107fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:35:30 -0400 Subject: [PATCH 09/65] docs: drop Python2 support --- docs/conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b1648860c..5c5d52b3d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,13 +15,13 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) -import os +from pathlib import Path # -- Project information ----------------------------------------------------- info = {} -project_folder = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -with open(os.path.join(project_folder, 'madminer', '__info__.py')) as f: +project_folder = Path(__file__).parent.parent +with open(project_folder.joinpath('madminer', '__info__.py')) as f: exec(f.read(), info) project = 'MadMiner' From 006e3d04b26eee1a7a0ebed44d531651b9619439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:36:42 -0400 Subject: [PATCH 10/65] docs: improve fmt --- docs/conf.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 5c5d52b3d..5f7a534d4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,8 +40,7 @@ # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.viewcode', @@ -56,7 +55,6 @@ # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] -#source_suffix = '.rst' # Markdown support from recommonmark.parser import CommonMarkParser @@ -107,11 +105,11 @@ # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html']``. # -# html_sidebars = {"toc.rst"} -html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] } +html_sidebars = { + '**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'], +} # -- Options for HTMLHelp output --------------------------------------------- @@ -154,8 +152,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'madminer', 'MadMiner Documentation', - [authors], 1) + (master_doc, 'madminer', 'MadMiner Documentation', [authors], 1), ] From 97753f4703203fa6028a4854bbd9a686db51f9d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:39:45 -0400 Subject: [PATCH 11/65] package: harmonize imports format --- madminer/__init__.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/madminer/__init__.py b/madminer/__init__.py index c0e936818..e21362299 100644 --- a/madminer/__init__.py +++ b/madminer/__init__.py @@ -2,9 +2,20 @@ from .analysis import DataAnalyzer from .core import MadMiner from .delphes import DelphesReader -from .fisherinformation import FisherInformation, InformationGeometry, profile_information, project_information +from .fisherinformation import ( + FisherInformation, + InformationGeometry, + profile_information, + project_information, +) from .lhe import LHEReader -from .likelihood import HistoLikelihood, NeuralLikelihood, fix_params, project_log_likelihood, profile_log_likelihood +from .likelihood import ( + HistoLikelihood, + NeuralLikelihood, + fix_params, + project_log_likelihood, + profile_log_likelihood, +) from .limits import AsymptoticLimits from .ml import ( ParameterizedRatioEstimator, From 946f16cad95c1adc2f75a90e1d25f2dd48d878df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:54:15 -0400 Subject: [PATCH 12/65] src: analyzer module f-strings --- madminer/analysis/dataanalyzer.py | 61 ++++++++++++++----------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/madminer/analysis/dataanalyzer.py b/madminer/analysis/dataanalyzer.py index 43a045c6d..974cd2e7d 100644 --- a/madminer/analysis/dataanalyzer.py +++ b/madminer/analysis/dataanalyzer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np import six @@ -134,7 +132,7 @@ def event_loader( include_nuisance_parameters = self.include_nuisance_parameters sampling_benchmark = self._find_closest_benchmark(generated_close_to) - logger.debug("Sampling benchmark closest to %s: %s", generated_close_to, sampling_benchmark) + logger.debug(f"Sampling benchmark closest to {generated_close_to}: {sampling_benchmark}") if sampling_benchmark is None: sampling_factors = self._calculate_sampling_factors() @@ -218,7 +216,7 @@ def weighted_events( x = x[idx] weights_benchmarks = weights_benchmarks[idx] elif n_draws is not None: - logger.warning("Requested %s events, but only %s available", n_draws, n_events) + logger.warning(f"Requested {n_draws} events, but only {n_events} available") # Process and return appropriate weights if theta is None: @@ -312,7 +310,7 @@ def xsecs( partition, test_split, validation_split ) else: - raise ValueError("Events has to be either 'all', 'train', or 'test', but got {}!".format(partition)) + raise ValueError(f"Invalid partition type: {partition}") # Theta matrices (translation of benchmarks to theta, at nominal nuisance params) if thetas is None: @@ -423,7 +421,7 @@ def xsec_gradients( Calculated cross section gradients in pb with shape (n_gradients,). """ - logger.debug("Calculating cross section gradients for thetas = %s and nus = %s", thetas, nus) + logger.debug(f"Calculating cross section gradients for thetas = {thetas} and nus = {nus}") # Inputs include_nuisance_benchmarks = nus is not None or gradients in ["all", "nu"] @@ -431,7 +429,7 @@ def xsec_gradients( nus = [None for _ in thetas] assert len(nus) == len(thetas), "Numbers of thetas and nus don't match!" if gradients not in ["all", "theta", "nu"]: - raise RuntimeError("Gradients has to be 'all', 'theta', or 'nu', but got {}".format(gradients)) + raise RuntimeError(f"Invalid gradients type: {gradients}") # Which events to use if partition == "all": @@ -442,7 +440,7 @@ def xsec_gradients( partition, test_split, validation_split ) else: - raise ValueError("Events has to be either 'all', 'train', or 'test', but got {}!".format(partition)) + raise ValueError(f"Invalid partition type: {partition}") # Theta matrices (translation of benchmarks to theta, at nominal nuisance params) theta_matrices = np.asarray( @@ -465,7 +463,7 @@ def xsec_gradients( ) ): n_batch, _ = benchmark_weights.shape - logger.debug("Batch %s with %s events", i_batch + 1, n_batch) + logger.debug(f"Batch {i_batch+1} with {n_batch} events") if gradients in ["all", "theta"]: nom_gradients = mdot( @@ -512,31 +510,33 @@ def _check_n_events(self): ) def _report_setup(self): - logger.info("Found %s parameters", self.n_parameters) - for i, (key, values) in enumerate(six.iteritems(self.parameters)): - logger.info(" %s: %s (%s)", i, key, " / ".join(str(x) for x in values)) + logger.info(f"Found {self.n_parameters} parameters") + for i, (key, values) in enumerate(self.parameters.items()): + values_str = " / ".join(str(x) for x in values) + logger.info(f" {i}: {key} ({values_str})") if self.nuisance_parameters is not None: - logger.info("Found %s nuisance parameters", self.n_nuisance_parameters) - for i, (key, values) in enumerate(six.iteritems(self.systematics)): - logger.info(" %s: %s (%s)", i, key, " / ".join(str(x) for x in values)) + logger.info(f"Found {self.n_nuisance_parameters} nuisance parameters") + for i, (key, values) in enumerate(self.systematics.items()): + values_str = " / ".join(str(x) for x in values) + logger.info(f" {i}: {key} ({values_str})") else: logger.info("Did not find nuisance parameters") self.include_nuisance_parameters = False - logger.info("Found %s benchmarks, of which %s physical", self.n_benchmarks, self.n_benchmarks_phys) + logger.info(f"Found {self.n_benchmarks} benchmarks") for (key, values), is_nuisance in zip(six.iteritems(self.benchmarks), self.benchmark_is_nuisance): if is_nuisance: logger.debug(" %s: systematics", key) else: logger.debug(" %s: %s", key, format_benchmark(values)) - logger.info("Found %s observables", self.n_observables) + logger.info(f"Found {self.n_observables} observables") if self.observables is not None: for i, obs in enumerate(self.observables): logger.debug(" %2.2s %s", i, obs) - logger.info("Found %s events", self.n_samples) + logger.info(f"Found {self.n_samples} events") if self.n_events_generated_per_benchmark is not None: for events, name in zip(self.n_events_generated_per_benchmark, six.iterkeys(self.benchmarks)): if events > 0: @@ -765,7 +765,7 @@ def _train_test_split(self, train, test_split): end_event = int(round((1.0 - test_split) * self.n_samples, 0)) correction_factor = 1.0 / (1.0 - test_split) if end_event < 0 or end_event > self.n_samples: - raise ValueError("Irregular train / test split: sample {} / {}", end_event, self.n_samples) + raise ValueError(f"Irregular split: sample {end_event} / {self.n_samples}") else: if test_split is None or test_split <= 0.0 or test_split >= 1.0: @@ -775,7 +775,7 @@ def _train_test_split(self, train, test_split): start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1 correction_factor = 1.0 / test_split if start_event < 0 or start_event > self.n_samples: - raise ValueError("Irregular train / test split: sample {} / {}", start_event, self.n_samples) + raise ValueError(f"Irregular split: sample {start_event} / {self.n_samples}") end_event = None @@ -826,9 +826,7 @@ def _train_validation_test_split(self, partition, test_split, validation_split): correction_factor = 1.0 / train_split if end_event < 0 or end_event > self.n_samples: - raise ValueError( - "Irregular train / validation / test split: sample {} / {}", end_event, self.n_samples - ) + raise ValueError(f"Irregular split: sample {end_event} / {self.n_samples}") elif partition == "validation": if validation_split is None or validation_split <= 0.0 or validation_split >= 1.0: @@ -842,13 +840,10 @@ def _train_validation_test_split(self, partition, test_split, validation_split): correction_factor = 1.0 / validation_split if start_event < 0 or start_event > self.n_samples: - raise ValueError( - "Irregular train / validation / test split: sample {} / {}", start_event, self.n_samples - ) + raise ValueError(f"Irregular split: sample {start_event} / {self.n_samples}") + if end_event < 0 or end_event > self.n_samples: - raise ValueError( - "Irregular train / validation / test split: sample {} / {}", end_event, self.n_samples - ) + raise ValueError(f"Irregular split: sample {end_event} / {self.n_samples}") elif partition == "test": end_event = None @@ -860,12 +855,10 @@ def _train_validation_test_split(self, partition, test_split, validation_split): start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1 correction_factor = 1.0 / test_split if start_event < 0 or start_event > self.n_samples: - raise ValueError( - "Irregular train / validation / test split: sample {} / {}", start_event, self.n_samples - ) + raise ValueError(f"Irregular split: sample {start_event} / {self.n_samples}") else: - raise RuntimeError("Unknown partition {}, has to be 'train', 'validation', or 'test'.") + raise RuntimeError(f"Unknown partition {partition}") return start_event, end_event, correction_factor @@ -950,7 +943,7 @@ def _get_dtheta_benchmark_matrix(self, theta, zero_pad=True): def _calculate_sampling_factors(self): events = np.asarray(self.n_events_generated_per_benchmark, dtype=np.float) - logger.debug("Events per benchmark: %s", events) + logger.debug(f"Events per benchmark: {events}") factors = events / np.sum(events) factors = np.hstack((factors, 1.0)) # background events return factors From 04af5a8a1acfa213e0fa5d4e65a66a4b6c8d45c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 10:57:53 -0400 Subject: [PATCH 13/65] src: analyzer module improve fmt --- madminer/analysis/dataanalyzer.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/madminer/analysis/dataanalyzer.py b/madminer/analysis/dataanalyzer.py index 974cd2e7d..e5c3e3434 100644 --- a/madminer/analysis/dataanalyzer.py +++ b/madminer/analysis/dataanalyzer.py @@ -113,7 +113,7 @@ def event_loader( at the closest benchmark point to a given parameter point. return_sampling_ids : bool, optional - If True, the iterator returns the sampling IDs in additioin to observables and weights. + If True, the iterator returns the sampling IDs in addition to observables and weights. Yields ------ @@ -206,11 +206,17 @@ def weighted_events( """ x, weights_benchmarks = next( - self.event_loader(batch_size=None, start=start_event, end=end_event, generated_close_to=generated_close_to) + self.event_loader( + start=start_event, + end=end_event, + batch_size=None, + generated_close_to=generated_close_to, + ) ) # Pick events randomly n_events = len(x) + if n_draws is not None and n_draws < n_events: idx = np.random.choice(n_events, n_draws, replace=False) x = x[idx] @@ -232,7 +238,7 @@ def weighted_events( else: # TODO: nuisance params if nu is not None: - raise NotImplementedError + raise NotImplementedError() theta_matrix = self._get_theta_benchmark_matrix(theta) weights_theta = mdot(theta_matrix, weights_benchmarks) return x, weights_theta @@ -399,11 +405,14 @@ def xsec_gradients( account. Otherwise, the list has to have the same number of elements as thetas, and each entry can specify nuisance parameters at nominal value (None) or a value of the nuisance parameters (ndarray). + partition : {"train", "test", "validation", "all"}, optional + Which events to use. Default: "all". + test_split : float, optional Fraction of events reserved for testing. Default value: 0.2. - partition : {"train", "test", "validation", "all"}, optional - Which events to use. Default: "all". + validation_split : float, optional + Fraction of weighted events reserved for validation. Default value: 0.2. gradients : {"all", "theta", "nu"}, optional Which gradients to calculate. Default value: "all". @@ -446,6 +455,7 @@ def xsec_gradients( theta_matrices = np.asarray( [self._get_theta_benchmark_matrix(theta) for theta in thetas] ) # shape (n_thetas, n_benchmarks) + theta_gradient_matrices = np.asarray( [self._get_dtheta_benchmark_matrix(theta) for theta in thetas] ) # shape (n_thetas, n_gradients, n_benchmarks) @@ -640,7 +650,6 @@ def _weights(self, thetas, nus, benchmark_weights, theta_matrices=None): n_events, _ = benchmark_weights.shape # Inputs - include_nuisance_benchmarks = nus is not None if nus is None: nus = [None for _ in thetas] assert len(nus) == len(thetas), "Numbers of thetas and nus don't match!" @@ -935,9 +944,8 @@ def _get_dtheta_benchmark_matrix(self, theta, zero_pad=True): if mode == "fd": raise RuntimeError("Cannot calculate score for arbitrary parameter points without morphing setup") - dtheta_matrix = self.morpher.calculate_morphing_weight_gradient( - theta - ) # Shape (n_parameters, n_benchmarks_phys) + # Shape (n_parameters, n_benchmarks_phys) + dtheta_matrix = self.morpher.calculate_morphing_weight_gradient(theta) return dtheta_matrix From 38c57914262193f580fc9f7d025e7a5d779faab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 17 Sep 2020 11:36:55 -0400 Subject: [PATCH 14/65] src: analyzer module remove 'six' --- madminer/analysis/dataanalyzer.py | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/madminer/analysis/dataanalyzer.py b/madminer/analysis/dataanalyzer.py index e5c3e3434..36429c84b 100644 --- a/madminer/analysis/dataanalyzer.py +++ b/madminer/analysis/dataanalyzer.py @@ -1,6 +1,5 @@ import logging import numpy as np -import six from madminer.utils.interfaces.madminer_hdf5 import load_madminer_settings, madminer_event_loader from madminer.utils.morphing import PhysicsMorpher, NuisanceMorpher @@ -227,7 +226,7 @@ def weighted_events( # Process and return appropriate weights if theta is None: return x, weights_benchmarks - elif isinstance(theta, six.string_types): + elif isinstance(theta, str): i_benchmark = list(self.benchmarks.keys()).index(theta) return x, weights_benchmarks[:, i_benchmark] elif derivative: @@ -535,7 +534,7 @@ def _report_setup(self): self.include_nuisance_parameters = False logger.info(f"Found {self.n_benchmarks} benchmarks") - for (key, values), is_nuisance in zip(six.iteritems(self.benchmarks), self.benchmark_is_nuisance): + for (key, values), is_nuisance in zip(self.benchmarks.items(), self.benchmark_is_nuisance): if is_nuisance: logger.debug(" %s: systematics", key) else: @@ -548,7 +547,7 @@ def _report_setup(self): logger.info(f"Found {self.n_samples} events") if self.n_events_generated_per_benchmark is not None: - for events, name in zip(self.n_events_generated_per_benchmark, six.iterkeys(self.benchmarks)): + for events, name in zip(self.n_events_generated_per_benchmark, self.benchmarks.keys()): if events > 0: logger.info(" %s signal events sampled from benchmark %s", events, name) if self.n_events_backgrounds is not None and self.n_events_backgrounds > 0: @@ -582,17 +581,19 @@ def _finite_differences_theta_gradient_matrices(self): matrix = np.zeros( (self.n_benchmarks, self.n_parameters, self.n_benchmarks) ) # (n_thetas, n_gradients, n_benchmarks) - benchmark_names = list(six.iterkeys(self.benchmarks)) - # We'll generally try to find the tupels p, i, j, k such that matrix[i, p, j] = - 1 / eps and matrix[i, p, i] = 1 / eps + benchmark_names = list(self.benchmarks.keys()) - for i, benchmark in enumerate(six.iterkeys(self.benchmarks)): - # For the FD-shited benchmarks, we assume that the gradients are the same as at the original point, and will just copy the matrix later + # We'll generally try to find the tuples p, i, j, k such that + # matrix[i, p, j] = - 1 / eps and matrix[i, p, i] = 1 / eps + for i, benchmark in enumerate(self.benchmarks.keys()): + # For the FD-shifted benchmarks, we assume that the gradients are + # the same as at the original point, and will just copy the matrix later copy_to = [] if benchmark not in self.finite_difference_benchmarks: continue - for p, param in enumerate(six.iterkeys(self.parameters)): + for p, param in enumerate(self.parameters.keys()): shifted_benchmark = self.finite_difference_benchmarks[benchmark][param] j = benchmark_names.index(shifted_benchmark) copy_to.append(j) @@ -872,7 +873,7 @@ def _train_validation_test_split(self, partition, test_split, validation_split): return start_event, end_event, correction_factor def _get_theta_value(self, theta): - if isinstance(theta, six.string_types): + if isinstance(theta, str): benchmark = self.benchmarks[theta] theta_value = np.array([benchmark[key] for key in benchmark]) elif isinstance(theta, int): @@ -897,7 +898,7 @@ def _get_theta_benchmark_matrix(self, theta, zero_pad=True): theta_matrix = np.zeros(self.n_benchmarks) theta_matrix[: unpadded_theta_matrix.shape[0]] = unpadded_theta_matrix - elif isinstance(theta, six.string_types): + elif isinstance(theta, str): i_benchmark = list(self.benchmarks).index(theta) theta_matrix = self._get_theta_benchmark_matrix(i_benchmark) @@ -921,18 +922,18 @@ def _get_dtheta_benchmark_matrix(self, theta, zero_pad=True): dtheta_matrix = np.zeros((unpadded_theta_matrix.shape[0], self.n_benchmarks)) dtheta_matrix[:, : unpadded_theta_matrix.shape[1]] = unpadded_theta_matrix - elif isinstance(theta, six.string_types) and mode == "morphing": + elif isinstance(theta, str) and mode == "morphing": benchmark = self.benchmarks[theta] - benchmark = np.array([value for _, value in six.iteritems(benchmark)]) + benchmark = np.array([value for _, value in benchmark.items()]) dtheta_matrix = self._get_dtheta_benchmark_matrix(benchmark) elif isinstance(theta, int) and mode == "morphing": benchmark = self.benchmarks[list(self.benchmarks.keys())[theta]] - benchmark = np.array([value for _, value in six.iteritems(benchmark)]) + benchmark = np.array([value for _, value in benchmark.items()]) dtheta_matrix = self._get_dtheta_benchmark_matrix(benchmark) - elif isinstance(theta, six.string_types): # finite differences - benchmark_id = list(six.iterkeys(self.benchmarks)).index(theta) + elif isinstance(theta, str): + benchmark_id = list(self.benchmarks.keys()).index(theta) dtheta_matrix = self._get_dtheta_benchmark_matrix(benchmark_id) elif isinstance(theta, int): # finite differences @@ -971,7 +972,6 @@ def _find_closest_benchmark(self, theta): return closest_idx def _benchmark_array(self): - benchmarks_array = [] - for benchmark in six.itervalues(self.benchmarks): - benchmarks_array.append(list(six.itervalues(benchmark))) - return np.asarray(benchmarks_array) + return np.asarray([ + benchmark.values() for benchmark in self.benchmarks.values() + ]) From 19f031ffa8da1cc0c2442ba92428d3da796aa54c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 13:50:07 -0400 Subject: [PATCH 15/65] src: core module f-strings --- madminer/core/madminer.py | 104 +++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py index 4ba3aa051..bdf40788e 100644 --- a/madminer/core/madminer.py +++ b/madminer/core/madminer.py @@ -95,17 +95,17 @@ def add_parameter( # Default names if parameter_name is None: - parameter_name = "parameter_" + str(len(self.parameters)) + parameter_name = f"parameter_{len(self.parameters)}" # Check and sanitize input - assert isinstance(parameter_name, six.string_types), "Parameter name is not a string: {}".format(parameter_name) - assert isinstance(lha_block, six.string_types), "LHA block is not a string: {}".format(lha_block) - assert isinstance(lha_id, int), "LHA id is not an integer: {}".format(lha_id) + assert isinstance(parameter_name, six.string_types), f"Parameter name is not a string: {parameter_name}" + assert isinstance(lha_block, six.string_types), f"LHA block is not a string: {lha_block}" + assert isinstance(lha_id, int), f"LHA id is not an integer: {lha_id}" parameter_name = parameter_name.replace(" ", "_") parameter_name = parameter_name.replace("-", "_") - assert parameter_name not in self.parameters, "Parameter name exists already: {}".format(parameter_name) + assert parameter_name not in self.parameters, f"Parameter name exists already: {parameter_name}" if isinstance(morphing_max_power, int): morphing_max_power = (morphing_max_power,) @@ -171,11 +171,11 @@ def set_parameters(self, parameters=None): elif len(values) == 2: self.add_parameter(lha_block=values[0], lha_id=values[1], parameter_name=key) else: - raise ValueError("Parameter properties has unexpected length: {0}".format(values)) + raise ValueError(f"Parameter properties has unexpected length: {values}") else: for values in parameters: - assert len(values) == 2, "Parameter list entry does not have length 2: {0}".format(values) + assert len(values) == 2, f"Parameter list entry does not have length 2: {values}" self.add_parameter(values[0], values[1]) # After manually adding parameters, the morphing information is not accurate anymore @@ -218,18 +218,18 @@ def add_benchmark(self, parameter_values, benchmark_name=None, verbose=True): # Default names if benchmark_name is None: - benchmark_name = "benchmark_" + str(len(self.benchmarks)) + benchmark_name = f"benchmark_{len(self.benchmarks)}" # Check input if not isinstance(parameter_values, dict): - raise RuntimeError("Parameter values are not a dict: {}".format(parameter_values)) + raise RuntimeError(f"Parameter values are not a dict: {parameter_values}") for key, value in six.iteritems(parameter_values): if key not in self.parameters: - raise RuntimeError("Unknown parameter: {0}".format(key)) + raise RuntimeError(f"Unknown parameter: {key}") if benchmark_name in self.benchmarks: - raise RuntimeError("Benchmark name {} exists already".format(benchmark_name)) + raise RuntimeError(f"Benchmark {benchmark_name} exists already") # Add benchmark self.benchmarks[benchmark_name] = parameter_values @@ -451,9 +451,9 @@ def add_systematics( # Default name if systematic_name is None: i = 0 - while "{}_{}".format(effect, i) in list(six.iterkeys(self.systematics)): + while f"{effect}_{i}" in list(six.iterkeys(self.systematics)): i += 1 - systematic_name = "{}_{}".format(type, i) + systematic_name = f"{type}_{i}" systematic_name = systematic_name.replace(" ", "_") systematic_name = systematic_name.replace("-", "_") @@ -466,7 +466,7 @@ def add_systematics( elif effect == "norm": self.systematics[systematic_name] = ("norm", norm_variation) else: - raise ValueError("Unknown systematic type {}, has to be one of 'norm', 'scale', or 'pdf'!".format(type)) + raise ValueError(f"Unknown systematic type: {effect}") def load(self, filename, disable_morphing=False): """ @@ -948,15 +948,19 @@ def run_multiple( # Gives 'python2_override' full power if 'initial_command' is empty. # (Reference: https://github.com/diana-hep/madminer/issues/422) if python2_override and initial_command is None and not python_executable: - logger.warning("The keyword python2_override is discouraged. Instead, consider using python_executable.") + logger.warning( + "The keyword python2_override is discouraged. " + "Instead, consider using python_executable." + ) + logger.info("Adding Python2.7 bin folder to PATH") binary_path = os.popen("command -v python2.7").read().strip() binary_folder = os.path.dirname(os.path.realpath(binary_path)) - initial_command = "export PATH={}:$PATH".format(binary_folder) - logger.info("Using Python executable %s", binary_path) + initial_command = f"export PATH={binary_folder}:$PATH" + logger.info(f"Using Python executable {binary_path}") # Generate process folder - log_file_generate = log_directory + "/generate.log" + log_file_generate = f"{log_directory}/generate.log" generate_mg_process( mg_directory, @@ -973,9 +977,9 @@ def run_multiple( # Make MadMiner folders create_missing_folders( [ - mg_process_directory + "/madminer", - mg_process_directory + "/madminer/cards", - mg_process_directory + "/madminer/scripts", + f"{mg_process_directory}/madminer", + f"{mg_process_directory}/madminer/cards", + f"{mg_process_directory}/madminer/scripts", ] ) @@ -995,20 +999,20 @@ def run_multiple( for sample_benchmark in sample_benchmarks: # Files - script_file = "madminer/scripts/run_{}.sh".format(i) - log_file_run = "run_{}.log".format(i) - mg_commands_filename = "madminer/cards/mg_commands_{}.dat".format(i) - param_card_file = "madminer/cards/param_card_{}.dat".format(i) - reweight_card_file = "madminer/cards/reweight_card_{}.dat".format(i) + script_file = f"madminer/scripts/run_{i}.sh" + log_file_run = f"run_{i}.log" + mg_commands_filename = f"madminer/cards/mg_commands_{i}.dat" + param_card_file = f"madminer/cards/param_card_{i}.dat" + reweight_card_file = f"madminer/cards/reweight_card_{i}.dat" new_pythia8_card_file = None if pythia8_card_file is not None: - new_pythia8_card_file = "madminer/cards/pythia8_card_{}.dat".format(i) + new_pythia8_card_file = f"madminer/cards/pythia8_card_{i}.dat" new_run_card_file = None if run_card_file is not None: - new_run_card_file = "madminer/cards/run_card_{}.dat".format(i) + new_run_card_file = f"madminer/cards/run_card_{i}.dat" new_configuration_file = None if configuration_file is not None: - new_configuration_file = "madminer/cards/me5_configuration_{}.txt".format(i) + new_configuration_file = f"madminer/cards/me5_configuration_{i}.txt" logger.info("Run %s", i) logger.info(" Sampling from benchmark: %s", sample_benchmark) @@ -1034,26 +1038,26 @@ def run_multiple( param_card_template_file, mg_process_directory, sample_benchmark=sample_benchmark, - param_card_filename=mg_process_directory + "/" + param_card_file, - reweight_card_filename=mg_process_directory + "/" + reweight_card_file, + param_card_filename=f"{mg_process_directory}/{param_card_file}", + reweight_card_filename=f"{mg_process_directory}/{reweight_card_file}", ) # Create run card if run_card_file is not None: export_run_card( template_filename=run_card_file, - run_card_filename=mg_process_directory + "/" + new_run_card_file, + run_card_filename=f"{mg_process_directory}/{new_run_card_file}", systematics=systematics_used, order=order, ) # Copy Pythia card if pythia8_card_file is not None: - copy_file(pythia8_card_file, mg_process_directory + "/" + new_pythia8_card_file) + copy_file(pythia8_card_file, f"{mg_process_directory}/{new_pythia8_card_file}") # Copy Configuration card if configuration_file is not None: - copy_file(configuration_file, mg_process_directory + "/" + new_configuration_file) + copy_file(configuration_file, f"{mg_process_directory}/{new_configuration_file}") # Run MG and Pythia if only_prepare_script: @@ -1079,15 +1083,15 @@ def run_multiple( run_mg( mg_directory, mg_process_directory, - mg_process_directory + "/" + mg_commands_filename, - mg_process_directory + "/" + new_run_card_file, - mg_process_directory + "/" + param_card_file, - mg_process_directory + "/" + reweight_card_file, - None if new_pythia8_card_file is None else mg_process_directory + "/" + new_pythia8_card_file, - None if new_configuration_file is None else mg_process_directory + "/" + new_configuration_file, + f"{mg_process_directory}/{mg_commands_filename}", + f"{mg_process_directory}/{new_run_card_file}", + f"{mg_process_directory}/{param_card_file}", + f"{mg_process_directory}/{reweight_card_file}", + None if new_pythia8_card_file is None else f"{mg_process_directory}/{new_pythia8_card_file}", + None if new_configuration_file is None else f"{mg_process_directory}/{new_configuration_file}", is_background=is_background, initial_command=initial_command, - log_file=log_directory + "/" + log_file_run, + log_file=f"{log_directory}/{log_file_run}", explicit_python_call=python2_override or (python_executable is not None), python_executable=python_executable, order=order, @@ -1099,7 +1103,7 @@ def run_multiple( # Master shell script if only_prepare_script: - master_script_filename = "{}/madminer/run.sh".format(mg_process_directory) + master_script_filename = f"{mg_process_directory}/madminer/run.sh" create_master_script(log_directory, master_script_filename, mg_directory, mg_process_directory, mg_scripts) logger.info( @@ -1108,9 +1112,7 @@ def run_multiple( ) else: - expected_event_files = [ - mg_process_directory + "/Events/run_{:02d}".format(i + 1) for i in range(n_runs_total) - ] + expected_event_files = [f"{mg_process_directory}/Events/run_{(i+1):02d}" for i in range(n_runs_total)] expected_event_files = "\n".join(expected_event_files) logger.info( "Finished running MadGraph! Please check that events were succesfully generated in the following " @@ -1184,9 +1186,9 @@ def reweight_existing_sample( # Make MadMiner folders create_missing_folders( [ - mg_process_directory + "/madminer", - mg_process_directory + "/madminer/cards", - mg_process_directory + "/madminer/scripts", + f"{mg_process_directory}/madminer", + f"{mg_process_directory}/madminer/cards", + f"{mg_process_directory}/madminer/scripts", ] ) @@ -1212,7 +1214,7 @@ def reweight_existing_sample( param_card_template_file, mg_process_directory, sample_benchmark=sample_benchmark, - reweight_card_filename=mg_process_directory + "/" + reweight_card_file, + reweight_card_filename=f"{mg_process_directory}/{reweight_card_file}", include_param_card=False, benchmarks=missing_benchmarks, ) @@ -1235,9 +1237,9 @@ def reweight_existing_sample( run_mg_reweighting( mg_process_directory, run_name=run_name, - reweight_card_file=mg_process_directory + "/" + reweight_card_file, + reweight_card_file=f"{mg_process_directory}/{reweight_card_file}", initial_command=initial_command, - log_file=log_directory + "/" + log_file_run, + log_file=f"{log_directory}/{log_file_run}", ) logger.info( "Finished running reweighting! Please check that events were succesfully reweighted in the following " From 2999b874dca88d09bd189787ab4057a9a3b59951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 13:54:40 -0400 Subject: [PATCH 16/65] src: core module improve fmt --- madminer/core/madminer.py | 40 ++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py index bdf40788e..6354e91f0 100644 --- a/madminer/core/madminer.py +++ b/madminer/core/madminer.py @@ -111,7 +111,13 @@ def add_parameter( morphing_max_power = (morphing_max_power,) # Add parameter - self.parameters[parameter_name] = (lha_block, lha_id, morphing_max_power, parameter_range, param_card_transform) + self.parameters[parameter_name] = ( + lha_block, + lha_id, + morphing_max_power, + parameter_range, + param_card_transform, + ) # After manually adding parameters, the morphing information is not accurate anymore self.morpher = None @@ -165,11 +171,15 @@ def set_parameters(self, parameters=None): lha_block=values[0], lha_id=values[1], parameter_name=key, - parameter_range=[values[3], values[4]], + parameter_range=(values[3], values[4]), morphing_max_power=values[2], ) elif len(values) == 2: - self.add_parameter(lha_block=values[0], lha_id=values[1], parameter_name=key) + self.add_parameter( + lha_block=values[0], + lha_id=values[1], + parameter_name=key, + ) else: raise ValueError(f"Parameter properties has unexpected length: {values}") @@ -284,7 +294,12 @@ def set_benchmarks(self, benchmarks=None, verbose=True): self.export_morphing = False def set_morphing( - self, max_overall_power=4, n_bases=1, include_existing_benchmarks=True, n_trials=100, n_test_thetas=100 + self, + max_overall_power=4, + n_bases=1, + include_existing_benchmarks=True, + n_trials=100, + n_test_thetas=100, ): """ Sets up the morphing environment. @@ -353,7 +368,10 @@ def set_morphing( else: n_predefined_benchmarks = 0 basis = morpher.optimize_basis( - n_bases=n_bases, fixed_benchmarks_from_madminer=None, n_trials=n_trials, n_test_thetas=n_test_thetas + n_bases=n_bases, + fixed_benchmarks_from_madminer=None, + n_trials=n_trials, + n_test_thetas=n_test_thetas, ) basis.update(self.benchmarks) @@ -424,7 +442,7 @@ def add_systematics( scale : {"mu", "mur", "muf"}, optional If type is "scale", this sets whether only the regularization scale ("mur"), only the factorization scale - ("muf"), or both simulatenously ("mu") are varied. Default value: + ("muf"), or both simultaneously ("mu") are varied. Default value: "mu". norm_variation : float, optional @@ -1104,8 +1122,13 @@ def run_multiple( # Master shell script if only_prepare_script: master_script_filename = f"{mg_process_directory}/madminer/run.sh" - create_master_script(log_directory, master_script_filename, mg_directory, mg_process_directory, mg_scripts) - + create_master_script( + log_directory, + master_script_filename, + mg_directory, + mg_process_directory, + mg_scripts, + ) logger.info( "To generate events, please run:\n\n %s [MG_directory] [MG_process_directory] [log_dir]\n\n", master_script_filename, @@ -1129,7 +1152,6 @@ def reweight_existing_sample( reweight_benchmarks=None, only_prepare_script=False, log_directory=None, - temp_directory=None, initial_command=None, ): """ From c0f58963f73147e0ab60738f55aaf9a7782b1ad5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 14:05:16 -0400 Subject: [PATCH 17/65] src: core module remove 'six' --- madminer/core/madminer.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py index 6354e91f0..9cafddfc0 100644 --- a/madminer/core/madminer.py +++ b/madminer/core/madminer.py @@ -1,10 +1,7 @@ -from __future__ import absolute_import, division, print_function, unicode_literals -import six - import os import logging -from collections import OrderedDict import tempfile +from collections import OrderedDict from madminer.utils.morphing import PhysicsMorpher from madminer.utils.interfaces.madminer_hdf5 import save_madminer_settings, load_madminer_settings @@ -98,8 +95,8 @@ def add_parameter( parameter_name = f"parameter_{len(self.parameters)}" # Check and sanitize input - assert isinstance(parameter_name, six.string_types), f"Parameter name is not a string: {parameter_name}" - assert isinstance(lha_block, six.string_types), f"LHA block is not a string: {lha_block}" + assert isinstance(parameter_name, str), f"Parameter name is not a string: {parameter_name}" + assert isinstance(lha_block, str), f"LHA block is not a string: {lha_block}" assert isinstance(lha_id, int), f"LHA id is not an integer: {lha_id}" parameter_name = parameter_name.replace(" ", "_") @@ -165,7 +162,7 @@ def set_parameters(self, parameters=None): self.parameters = OrderedDict() if isinstance(parameters, dict): - for key, values in six.iteritems(parameters): + for key, values in parameters.items(): if len(values) == 5: self.add_parameter( lha_block=values[0], @@ -234,7 +231,7 @@ def add_benchmark(self, parameter_values, benchmark_name=None, verbose=True): if not isinstance(parameter_values, dict): raise RuntimeError(f"Parameter values are not a dict: {parameter_values}") - for key, value in six.iteritems(parameter_values): + for key, value in parameter_values.items(): if key not in self.parameters: raise RuntimeError(f"Unknown parameter: {key}") @@ -281,7 +278,7 @@ def set_benchmarks(self, benchmarks=None, verbose=True): self.default_benchmark = None if isinstance(benchmarks, dict): - for name, values in six.iteritems(benchmarks): + for name, values in benchmarks.items(): self.add_benchmark(values, name, verbose=verbose) else: for values in benchmarks: @@ -469,7 +466,7 @@ def add_systematics( # Default name if systematic_name is None: i = 0 - while f"{effect}_{i}" in list(six.iterkeys(self.systematics)): + while f"{effect}_{i}" in list(self.systematics.keys()): i += 1 systematic_name = f"{type}_{i}" systematic_name = systematic_name.replace(" ", "_") @@ -524,7 +521,7 @@ def load(self, filename, disable_morphing=False): ) = load_madminer_settings(filename, include_nuisance_benchmarks=False) logger.info("Found %s parameters:", len(self.parameters)) - for key, values in six.iteritems(self.parameters): + for key, values in self.parameters.items(): logger.info( " %s (LHA: %s %s, maximal power in squared ME: %s, range: %s)", key, @@ -535,7 +532,7 @@ def load(self, filename, disable_morphing=False): ) logger.info("Found %s benchmarks:", len(self.benchmarks)) - for key, values in six.iteritems(self.benchmarks): + for key, values in self.benchmarks.items(): logger.info(" %s: %s", key, format_benchmark(values)) if self.default_benchmark is None: @@ -562,7 +559,7 @@ def load(self, filename, disable_morphing=False): else: logger.info("Found systematics setup with %s nuisance parameter groups", len(self.systematics)) - for key, value in six.iteritems(self.systematics): + for key, value in self.systematics.items(): logger.debug(" %s: %s", key, " / ".join(str(x) for x in value)) def save(self, filename): @@ -1271,7 +1268,7 @@ def reweight_existing_sample( ) def _check_pdf_or_scale_variation(self, systematics): - for value in six.itervalues(systematics): + for value in systematics.values(): if value[0] in ["pdf", "scale"]: return True return False From 3ed585f84d6494601ed0ece3e12ba85cc593ab2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 14:27:45 -0400 Subject: [PATCH 18/65] src: delphes module f-strings --- madminer/delphes/delphes_reader.py | 55 +++++++++++++----------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/madminer/delphes/delphes_reader.py b/madminer/delphes/delphes_reader.py index 1f0c8c23d..b64a8b7b0 100644 --- a/madminer/delphes/delphes_reader.py +++ b/madminer/delphes/delphes_reader.py @@ -450,25 +450,25 @@ def add_default_observables( [n_leptons_max, n_photons_max, n_jets_max], ["l", "a", "j"], [False, False, include_charge] ): if include_numbers: - self.add_observable("n_{}s".format(symbol), "len({})".format(symbol), required=True) + self.add_observable(f"n_{symbol}s", f"len({symbol})", required=True) for i in range(n): self.add_observable( - "e_{}{}".format(symbol, i + 1), "{}[{}].e".format(symbol, i), required=False, default=0.0 + f"e_{symbol}{i+1}", f"{symbol}[{i}].e", required=False, default=0.0 ) self.add_observable( - "pt_{}{}".format(symbol, i + 1), "{}[{}].pt".format(symbol, i), required=False, default=0.0 + f"pt_{symbol}{i+1}", f"{symbol}[{i}].pt", required=False, default=0.0 ) self.add_observable( - "eta_{}{}".format(symbol, i + 1), "{}[{}].eta".format(symbol, i), required=False, default=0.0 + f"eta_{symbol}{i+1}", f"{symbol}[{i}].eta", required=False, default=0.0 ) self.add_observable( - "phi_{}{}".format(symbol, i + 1), "{}[{}].phi()".format(symbol, i), required=False, default=0.0 + f"phi_{symbol}{i+1}", f"{symbol}[{i}].phi()", required=False, default=0.0 ) if include_this_charge and symbol == "l": self.add_observable( - "charge_{}{}".format(symbol, i + 1), - "{}[{}].charge".format(symbol, i), + f"charge_{symbol}{i+1}", + f"{symbol}[{i}].charge", required=False, default=0.0, ) @@ -638,9 +638,8 @@ def analyse_delphes_samples( # Following results: check consistency with previous results if len(self.observations) != len(this_observations): raise ValueError( - "Number of observations in different Delphes files incompatible: {} vs {}".format( - len(self.observations), len(this_observations) - ) + f"Number of observations in different Delphes files incompatible: " + f"{len(self.observations)} vs {len(this_observations)}" ) # Merge weights with previous @@ -664,7 +663,7 @@ def analyse_delphes_samples( # Merge observations with previous (should always be the same observables) for key in self.observations: - assert key in this_observations, "Observable {} not found in Delphes sample!".format(key) + assert key in this_observations, f"Observable {key} not found in Delphes sample!" self.observations[key] = np.hstack([self.observations[key], this_observations[key]]) self.events_sampling_benchmark_ids = np.hstack( @@ -725,11 +724,9 @@ def _analyse_delphes_sample( and (systematics_name, benchmark0, benchmark1) != self.nuisance_parameters[nuisance_parameter_name] ): raise RuntimeError( - "Inconsistent information for same nuisance parameter {}. Old: {}. New: {}.".format( - nuisance_parameter_name, - self.nuisance_parameters[nuisance_parameter_name], - (systematics_name, benchmark0, benchmark1), - ) + f"Inconsistent information for same nuisance parameter {nuisance_parameter_name}. " + f"Old: {self.nuisance_parameters[nuisance_parameter_name]}. " + f"New: {(systematics_name, benchmark0, benchmark1)}." ) self.nuisance_parameters[nuisance_parameter_name] = (systematics_name, benchmark0, benchmark1) @@ -796,8 +793,8 @@ def _analyse_delphes_sample( if k_factor is not None: for key in this_weights: this_weights[key] = k_factor * this_weights[key] - # Background scenario: we only have one set of weights, but these should be true for all benchmarks + # Background scenario: we only have one set of weights, but these should be true for all benchmarks if is_background: logger.debug("Sample is background") benchmarks_weight = list(six.itervalues(this_weights))[0] @@ -847,18 +844,15 @@ def _check_sample_observations(self, this_observations): if this_n_events != n_events: raise RuntimeError( - "Mismatching number of events in Delphes observations for {}: {} vs {}".format( - key, n_events, this_n_events - ) + f"Mismatching number of events in Delphes observations for {key}:" + f"{n_events} vs {this_n_events}" ) if not np.issubdtype(obs.dtype, np.number): logger.warning( - "Observations for observable %s have non-numeric dtype %s. This usually means something " - "is wrong in the definition of the observable. Data: %s", - key, - obs.dtype, - obs, + f"Observations for observable {key} have non-numeric dtype {obs.dtype}." + f"This usually means something is wrong in the observable definition." + f"Data: {obs}" ) return n_events @@ -873,16 +867,15 @@ def _check_sample_weights(self, n_events, this_weights): if this_n_events != n_events: raise RuntimeError( - "Mismatching number of events in weights {}: {} vs {}".format(key, n_events, this_n_events) + f"Mismatching number of events in weights {key}:" + f"{n_events} vs {this_n_events}" ) if not np.issubdtype(weights.dtype, np.number): logger.warning( - "Weights %s have non-numeric dtype %s. This usually means something " - "is wrong in the definition of the observable. Data: %s", - key, - weights.dtype, - weights, + f"Weights {key} have non-numeric dtype {weights.dtype}." + f"This usually means something is wrong in the observable definition." + f"Data: {weights}" ) return n_events From f9273776f39d6356691dacb4a3c98d7686011606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 14:42:14 -0400 Subject: [PATCH 19/65] src: delphes module improve fmt --- madminer/delphes/delphes_reader.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/madminer/delphes/delphes_reader.py b/madminer/delphes/delphes_reader.py index b64a8b7b0..9aec9b449 100644 --- a/madminer/delphes/delphes_reader.py +++ b/madminer/delphes/delphes_reader.py @@ -241,8 +241,13 @@ def run_delphes(self, delphes_directory, delphes_card, initial_command=None, log logger.info("Running Delphes on HepMC sample at %s", hepmc_filename) delphes_sample_filename = run_delphes( - delphes_directory, delphes_card, hepmc_filename, initial_command=initial_command, log_file=log_file + delphes_directory=delphes_directory, + delphes_card_filename=delphes_card, + hepmc_sample_filename=hepmc_filename, + initial_command=initial_command, + log_file=log_file, ) + self.delphes_sample_filenames[i] = delphes_sample_filename def set_acceptance( @@ -527,7 +532,11 @@ def reset_cuts(self): self.cuts_default_pass = [] def analyse_delphes_samples( - self, generator_truth=False, delete_delphes_files=False, reference_benchmark=None, parse_lhe_events_as_xml=True + self, + generator_truth=False, + delete_delphes_files=False, + reference_benchmark=None, + parse_lhe_events_as_xml=True, ): """ Main function that parses the Delphes samples (ROOT files), checks acceptance and cuts, and extracts @@ -674,6 +683,7 @@ def analyse_delphes_samples( for name, n_events in zip(self.benchmark_names_phys, self.signal_events_per_benchmark): if n_events > 0: logger.info(" %s from %s", n_events, name) + if self.background_events > 0: logger.info(" %s from backgrounds", self.background_events) @@ -710,6 +720,7 @@ def _analyse_delphes_sample( logger.debug("Extracting nuisance parameter definitions from LHE file") systematics_dict = extract_nuisance_parameters_from_lhe_file(lhe_file, systematics_used) logger.debug("systematics_dict: %s", systematics_dict) + # systematics_dict has structure # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}} @@ -769,11 +780,11 @@ def _analyse_delphes_sample( _, this_weights = parse_lhe_file( filename=lhe_file_for_weights, sampling_benchmark=sampling_benchmark, - benchmark_names=self.benchmark_names_phys, observables=OrderedDict(), + benchmark_names=self.benchmark_names_phys, + is_background=is_background, parse_events_as_xml=parse_lhe_events_as_xml, systematics_dict=systematics_dict, - is_background=is_background, ) logger.debug("Found weights %s in LHE file", list(this_weights.keys())) From b46ded2618ae3919c2f638e0ccac39dec7306029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 14:46:58 -0400 Subject: [PATCH 20/65] src: delphes module remove 'six' --- madminer/delphes/delphes_reader.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/madminer/delphes/delphes_reader.py b/madminer/delphes/delphes_reader.py index 9aec9b449..f2720d295 100644 --- a/madminer/delphes/delphes_reader.py +++ b/madminer/delphes/delphes_reader.py @@ -1,10 +1,7 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six -from collections import OrderedDict -import numpy as np import logging +import numpy as np import os +from collections import OrderedDict from madminer.utils.interfaces.madminer_hdf5 import ( save_events_to_madminer_file, @@ -725,10 +722,8 @@ def _analyse_delphes_sample( # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}} # Store nuisance parameters - for systematics_name, nuisance_info in six.iteritems(systematics_dict): - for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in six.iteritems( - nuisance_info - ): + for systematics_name, nuisance_info in systematics_dict.items(): + for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in nuisance_info.items(): if ( self.nuisance_parameters is not None and nuisance_parameter_name in self.nuisance_parameters @@ -791,7 +786,7 @@ def _analyse_delphes_sample( # Apply cuts logger.debug("Applying Delphes-based cuts to LHE weights") - for key, weights in six.iteritems(this_weights): + for key, weights in this_weights.items(): this_weights[key] = weights[cut_filter] if this_weights is None: @@ -808,7 +803,7 @@ def _analyse_delphes_sample( # Background scenario: we only have one set of weights, but these should be true for all benchmarks if is_background: logger.debug("Sample is background") - benchmarks_weight = list(six.itervalues(this_weights))[0] + benchmarks_weight = list(this_weights.values())[0] for benchmark_name in self.benchmark_names_phys: this_weights[benchmark_name] = benchmarks_weight @@ -847,7 +842,7 @@ def _check_sample_observations(self, this_observations): """ Sanity checks """ # Check number of events in observables n_events = None - for key, obs in six.iteritems(this_observations): + for key, obs in this_observations.items(): this_n_events = len(obs) if n_events is None: n_events = this_n_events @@ -870,7 +865,7 @@ def _check_sample_observations(self, this_observations): def _check_sample_weights(self, n_events, this_weights): """ Sanity checks """ # Check number of events in weights - for key, weights in six.iteritems(this_weights): + for key, weights in this_weights.items(): this_n_events = len(weights) if n_events is None: n_events = this_n_events From 321955995b5c228ff851f386face2cca849214e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 14:58:31 -0400 Subject: [PATCH 21/65] src: delphes module combine funcs --- madminer/delphes/delphes_reader.py | 114 +++++++++++------------------ 1 file changed, 44 insertions(+), 70 deletions(-) diff --git a/madminer/delphes/delphes_reader.py b/madminer/delphes/delphes_reader.py index f2720d295..9f5502cc9 100644 --- a/madminer/delphes/delphes_reader.py +++ b/madminer/delphes/delphes_reader.py @@ -100,6 +100,48 @@ def __init__(self, filename): # Initialize nuisance parameters self.nuisance_parameters = OrderedDict() + @staticmethod + def _check_python_syntax(expression): + """ + Evaluates a Python expression to check for syntax errors + + Parameters + ---------- + expression : str + Python expression to be evaluated. The evaluation raises either SyntaxError or NameError + + Returns + ------- + None + """ + + try: + eval(expression) + except SyntaxError: + raise ValueError("The provided Python expression is invalid") + except NameError: + pass + + @staticmethod + def _check_sample_elements(this_elements, n_events=None): + """ Sanity checks """ + + # Check number of events in observables + for key, elems in this_elements.items(): + this_n_events = len(elems) + + if n_events is None: + n_events = this_n_events + logger.debug(f"Found {n_events} events") + + if this_n_events != n_events: + raise RuntimeError(f"Mismatching number of events for {key}: "f"{n_events} vs {this_n_events}") + + if not np.issubdtype(elems.dtype, np.number): + logger.warning(f"For key {key} have non-numeric dtype {elems.dtype}.") + + return n_events + def add_sample( self, hepmc_filename, @@ -767,7 +809,7 @@ def _analyse_delphes_sample( logger.debug("Did not extract weights from Delphes file") # Sanity checks - n_events = self._check_sample_observations(this_observations) + n_events = self._check_sample_elements(this_observations, None) # Find weights in LHE file if lhe_file_for_weights is not None: @@ -793,7 +835,7 @@ def _analyse_delphes_sample( raise RuntimeError("Could not extract weights from Delphes ROOT file or LHE file.") # Sanity checks - n_events = self._check_sample_weights(n_events, this_weights) + n_events = self._check_sample_elements(this_weights, n_events) # k factors if k_factor is not None: @@ -817,74 +859,6 @@ def _analyse_delphes_sample( return this_observations, this_weights, n_events - def _check_python_syntax(self, expression): - """ - Evaluates a Python expression to check for syntax errors - - Parameters - ---------- - expression : str - Python expression to be evaluated. The evaluation raises either SyntaxError or NameError - - Returns - ------- - None - """ - - try: - eval(expression) - except SyntaxError: - raise ValueError("The provided Python expression is invalid") - except NameError: - pass - - def _check_sample_observations(self, this_observations): - """ Sanity checks """ - # Check number of events in observables - n_events = None - for key, obs in this_observations.items(): - this_n_events = len(obs) - if n_events is None: - n_events = this_n_events - logger.debug("Found %s events", n_events) - - if this_n_events != n_events: - raise RuntimeError( - f"Mismatching number of events in Delphes observations for {key}:" - f"{n_events} vs {this_n_events}" - ) - - if not np.issubdtype(obs.dtype, np.number): - logger.warning( - f"Observations for observable {key} have non-numeric dtype {obs.dtype}." - f"This usually means something is wrong in the observable definition." - f"Data: {obs}" - ) - return n_events - - def _check_sample_weights(self, n_events, this_weights): - """ Sanity checks """ - # Check number of events in weights - for key, weights in this_weights.items(): - this_n_events = len(weights) - if n_events is None: - n_events = this_n_events - logger.debug("Found %s events", n_events) - - if this_n_events != n_events: - raise RuntimeError( - f"Mismatching number of events in weights {key}:" - f"{n_events} vs {this_n_events}" - ) - - if not np.issubdtype(weights.dtype, np.number): - logger.warning( - f"Weights {key} have non-numeric dtype {weights.dtype}." - f"This usually means something is wrong in the observable definition." - f"Data: {weights}" - ) - return n_events - def save(self, filename_out, shuffle=True): """ Saves the observable definitions, observable values, and event weights in a MadMiner file. The parameter, From f34ab126ab2b6f530e712d378b885c8d972b2608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 15:05:20 -0400 Subject: [PATCH 22/65] src: fisher module f-strings --- madminer/fisherinformation/geometry.py | 3 ++- madminer/fisherinformation/information.py | 14 +++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/madminer/fisherinformation/geometry.py b/madminer/fisherinformation/geometry.py index 218c798bb..0f670b88d 100755 --- a/madminer/fisherinformation/geometry.py +++ b/madminer/fisherinformation/geometry.py @@ -359,7 +359,8 @@ def distance_contours( dth0 = np.array([np.cos(angle), np.sin(angle)]) else: dth0 = np.array([random.uniform(-1, 1) for _ in range(self.dimension)]) - logger.debug("Calculate Trajectory Number %s with dtheta0=%s", i, dth0) + + logger.debug(f"Calculate Trajectory Number {i} with dtheta0={dth0}") ths, ds = self.find_trajectory(theta0, dth0, limits, stepsize) for th in ths: thetas.append(th) diff --git a/madminer/fisherinformation/information.py b/madminer/fisherinformation/information.py index 1a32c690b..e28328281 100755 --- a/madminer/fisherinformation/information.py +++ b/madminer/fisherinformation/information.py @@ -200,8 +200,8 @@ def full_information( """ # Check input - if mode not in ["score", "information", "modified_score"]: - raise ValueError("Unknown mode {}, has to be 'score', 'modified_score', or 'information'!".format(mode)) + if mode not in {"score", "information", "modified_score"}: + raise ValueError(f"Unknown mode {mode}") # Load Estimator model if os.path.isdir(model_file) and os.path.exists(model_file + "/ensemble.json"): @@ -671,8 +671,8 @@ def histo_information_2d( i_bins1 = np.searchsorted(bin1_boundaries, histo1_observables) i_bins2 = np.searchsorted(bin2_boundaries, histo2_observables) - assert ((0 <= i_bins1) & (i_bins1 < n_bins1_total)).all(), "Wrong bin {}".format(i_bins1) - assert ((0 <= i_bins2) & (i_bins2 < n_bins1_total)).all(), "Wrong bin {}".format(i_bins2) + assert ((0 <= i_bins1) & (i_bins1 < n_bins1_total)).all(), f"Wrong bin {i_bins1}" + assert ((0 <= i_bins2) & (i_bins2 < n_bins1_total)).all(), f"Wrong bin {i_bins2}" # Add up for i in range(n_bins1_total): @@ -823,7 +823,7 @@ def histogram_of_information( # Find bins bins = np.searchsorted(bin_boundaries, histo_observables) - assert ((0 <= bins) & (bins < n_bins_total)).all(), "Wrong bin {}".format(bins) + assert ((0 <= bins) & (bins < n_bins_total)).all(), f"Wrong bin {bins}" # Add up for i in range(n_bins_total): @@ -834,7 +834,7 @@ def histogram_of_information( # ML case else: # Load SALLY model - if os.path.isdir(model_file) and os.path.exists(model_file + "/ensemble.json"): + if os.path.isdir(model_file) and os.path.exists(f"{model_file}/ensemble.json"): model_is_ensemble = True model = Ensemble() model.load(model_file) @@ -946,7 +946,7 @@ def histogram_of_information( # Find bins bins = np.searchsorted(bin_boundaries, histo_observables) - assert ((0 <= bins) & (bins < n_bins_total)).all(), "Wrong bin {}".format(bins) + assert ((0 <= bins) & (bins < n_bins_total)).all(), f"Wrong bin {bins}" # Add up for i in range(n_bins_total): From fca8db549d0a03414232cbaadb86acff5e20b1ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 15:17:59 -0400 Subject: [PATCH 23/65] src: fisher module improve fmt --- madminer/fisherinformation/geometry.py | 11 +-- madminer/fisherinformation/information.py | 102 ++++++++++++++++------ madminer/fisherinformation/manipulate.py | 4 +- 3 files changed, 81 insertions(+), 36 deletions(-) diff --git a/madminer/fisherinformation/geometry.py b/madminer/fisherinformation/geometry.py index 0f670b88d..b477ba2bc 100755 --- a/madminer/fisherinformation/geometry.py +++ b/madminer/fisherinformation/geometry.py @@ -20,7 +20,7 @@ class InformationGeometry: one of the following functions * `InformationGeometry.information_from_formula()` defines the Fisher Information - explicitly as function of the theory paramters `theta`. + explicitly as function of the theory parameters `theta`. * `InformationGeometry.information_from_grid()` loads a grid of Fisher Informations which is then interpolated. @@ -38,7 +38,7 @@ def __init__(self): def information_from_formula(self, formula, dimension): """ Explicitly defines the Fisher Information as function of the theory parameter `theta` - through a formula that can be avaulated using `eval()`. + through a formula that can be evaluated using `eval()`. Parameters ---------- @@ -300,7 +300,7 @@ def distance_contours( grid_ranges : list of (tuple of float) Specifies the boundaries of the parameter grid in which the trajectory - is evaulated. It should be `[[min, max], [min, max], ..., [min, max]`, + is evaluated. It should be `[[min, max], [min, max], ..., [min, max]`, where the list goes over all parameters and `min` and `max` are float. grid_resolutions : list of int @@ -342,12 +342,13 @@ def distance_contours( """ # automatic setting of stepsize and ntrajectories - if stepsize == None: + if stepsize is None: stepsize = min([(limit[1] - limit[0]) / 20.0 for limit in grid_ranges]) - if ntrajectories == None: + if ntrajectories is None: ntrajectories = 20 * self.dimension if self.dimension is not 2: continous_sampling = False + limits = (1.0 + 2.0 * stepsize) * np.array(grid_ranges) # determine trajectories diff --git a/madminer/fisherinformation/information.py b/madminer/fisherinformation/information.py index e28328281..f1b4d45bc 100755 --- a/madminer/fisherinformation/information.py +++ b/madminer/fisherinformation/information.py @@ -16,7 +16,7 @@ class FisherInformation(DataAnalyzer): """ Functions to calculate expected Fisher information matrices. - After inializing a `FisherInformation` instance with the filename of a MadMiner file, different information matrices + After initializing a `FisherInformation` instance with the filename of a MadMiner file, different information matrices can be calculated: * `FisherInformation.truth_information()` calculates the full truth-level Fisher information. @@ -51,7 +51,12 @@ def __init__(self, filename, include_nuisance_parameters=True): super(FisherInformation, self).__init__(filename, False, include_nuisance_parameters) def truth_information( - self, theta, luminosity=300000.0, cuts=None, efficiency_functions=None, include_nuisance_parameters=True + self, + theta, + luminosity=300000.0, + cuts=None, + efficiency_functions=None, + include_nuisance_parameters=True, ): """ Calculates the full Fisher information at parton / truth level. This is the information in an idealized @@ -299,7 +304,9 @@ def full_information( for i_batch, (observations, weights_benchmarks) in enumerate( self.event_loader( - batch_size=batch_size, start=start_event, include_nuisance_parameters=include_nuisance_parameters + batch_size=batch_size, + start=start_event, + include_nuisance_parameters=include_nuisance_parameters, ) ): if (i_batch + 1) % n_batches_verbose == 0: @@ -329,6 +336,7 @@ def full_information( n_events=luminosity * total_xsec * np.sum(weights_theta) / total_sum_weights_theta, ) this_covariance = None + # Sum up results if fisher_info_kin is None: fisher_info_kin = this_fisher_info @@ -371,7 +379,12 @@ def full_information( return fisher_info_rate + fisher_info_kin, rate_covariance def rate_information( - self, theta, luminosity, cuts=None, efficiency_functions=None, include_nuisance_parameters=True + self, + theta, + luminosity, + cuts=None, + efficiency_functions=None, + include_nuisance_parameters=True, ): """ Calculates the Fisher information in a measurement of the total cross section (without any kinematic @@ -527,7 +540,7 @@ def histo_information( # Find bins i_bins = np.searchsorted(bin_boundaries, histo_observables) - assert ((0 <= i_bins) & (i_bins < n_bins_total)).all(), "Wrong bin {}".format(i_bins) + assert ((0 <= i_bins) & (i_bins < n_bins_total)).all(), f"Wrong bin {i_bins}" # Add up for i in range(n_bins_total): @@ -637,10 +650,23 @@ def histo_information_2d( # Binning bin1_boundaries, n_bins1_total = self._calculate_binning( - bins1, cuts, efficiency_functions, histrange1, n_events_dynamic_binning, observable1, theta + bins1, + cuts, + efficiency_functions, + histrange1, + n_events_dynamic_binning, + observable1, + theta, ) + bin2_boundaries, n_bins2_total = self._calculate_binning( - bins2, cuts, efficiency_functions, histrange2, n_events_dynamic_binning, observable2, theta + bins2, + cuts, + efficiency_functions, + histrange2, + n_events_dynamic_binning, + observable2, + theta, ) # Loop over batches @@ -879,11 +905,6 @@ def histogram_of_information( else: start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1 - if start_event > 0: - total_sum_weights_theta = self._calculate_xsec(theta=theta, start_event=start_event) - else: - total_sum_weights_theta = total_xsec - # Number of batches n_batches = int(np.ceil((self.n_samples - start_event) / batch_size)) n_batches_verbose = max(int(round(n_batches / 10, 0)), 1) @@ -978,36 +999,36 @@ def histogram_of_sigma_dsigma(self, theta, observable, nbins, histrange, cuts=No Parameters ---------- theta : ndarray - Parameter point `theta` at which the Fisher information matrix `I_ij(theta)` is evaluated. + Parameter point `theta` at which the Fisher information matrix `I_ij(theta)` is evaluated. observable : str - Expression for the observable to be sliced. The str will be parsed by Python's `eval()` function - and can use the names of the observables in the MadMiner files. + Expression for the observable to be sliced. The str will be parsed by Python's `eval()` function + and can use the names of the observables in the MadMiner files. nbins : int - Number of bins in the slicing, excluding overflow bins. + Number of bins in the slicing, excluding overflow bins. histrange : tuple of float - Minimum and maximum value of the slicing in the form `(min, max)`. Overflow bins are always added. + Minimum and maximum value of the slicing in the form `(min, max)`. Overflow bins are always added. cuts : None or list of str, optional - Cuts. Each entry is a parseable Python expression that returns a bool (True if the event should pass a cut, - False otherwise). Default value: None. + Cuts. Each entry is a parseable Python expression that returns a bool (True if the event should pass a cut, + False otherwise). Default value: None. efficiency_functions : list of str or None - Efficiencies. Each entry is a parseable Python expression that returns a float for the efficiency of one - component. Default value: None. + Efficiencies. Each entry is a parseable Python expression that returns a float for the efficiency of one + component. Default value: None. Returns ------- bin_boundaries : ndarray - Observable slice boundaries. + Observable slice boundaries. sigma_bins : ndarray - Cross section in pb in each of the slices. + Cross section in pb in each of the slices. dsigma_bins : ndarray - Cross section in pb in each of the slices. + Cross section in pb in each of the slices. """ # Input @@ -1071,11 +1092,20 @@ def histogram_of_sigma_dsigma(self, theta, observable, nbins, histrange, cuts=No def nuisance_constraint_information(self): """ Builds the Fisher information term representing the Gaussian constraints on the nuisance parameters """ - diagonal = np.array([0.0 for _ in range(self.n_parameters)] + [1.0 for _ in range(self.n_nuisance_parameters)]) - return np.diag(diagonal) + return np.diag( + np.array( + [0.0 for _ in range(self.n_parameters)] + + [1.0 for _ in range(self.n_nuisance_parameters)] + ) + ) def _check_binning_stats( - self, weights_benchmarks, weights_benchmark_uncertainties, theta, report=5, n_bins_last_axis=None + self, + weights_benchmarks, + weights_benchmark_uncertainties, + theta, + report=5, + n_bins_last_axis=None, ): theta_matrix = self._get_theta_benchmark_matrix(theta, zero_pad=False) # (n_benchmarks_phys,) sigma = mdot(theta_matrix, weights_benchmarks) # Shape (n_bins,) @@ -1098,7 +1128,14 @@ def _check_binning_stats( ) def _calculate_binning( - self, bins, cuts, efficiency_functions, histrange, n_events_dynamic_binning, observable, theta + self, + bins, + cuts, + efficiency_functions, + histrange, + n_events_dynamic_binning, + observable, + theta, ): dynamic_binning = histrange is None and isinstance(bins, int) if dynamic_binning: @@ -1113,6 +1150,7 @@ def _calculate_binning( else: bin_boundaries = bins n_bins_total = len(bins) + 1 + return bin_boundaries, n_bins_total def _calculate_fisher_information( @@ -1485,7 +1523,13 @@ def _calculate_xsec( return xsec def _calculate_dynamic_binning( - self, observable, theta, n_bins, n_events=None, cuts=None, efficiency_functions=None + self, + observable, + theta, + n_bins, + n_events=None, + cuts=None, + efficiency_functions=None, ): if cuts is None: diff --git a/madminer/fisherinformation/manipulate.py b/madminer/fisherinformation/manipulate.py index d7922f2f8..2c1ed8b0d 100755 --- a/madminer/fisherinformation/manipulate.py +++ b/madminer/fisherinformation/manipulate.py @@ -18,7 +18,7 @@ def project_information(fisher_information, remaining_components, covariance=Non Original n x n Fisher information. remaining_components : list of int - List with m entries, each an int with 0 <= remaining_compoinents[i] < n. Denotes which parameters are kept, and + List with m entries, each an int with 0 <= remaining_components[i] < n. Denotes which parameters are kept, and their new order. All other parameters or projected out. covariance : ndarray or None, optional @@ -74,7 +74,7 @@ def profile_information( Original n x n Fisher information. remaining_components : list of int - List with m entries, each an int with 0 <= remaining_compoinents[i] < n. Denotes which parameters are kept, and + List with m entries, each an int with 0 <= remaining_components[i] < n. Denotes which parameters are kept, and their new order. All other parameters or profiled out. covariance : ndarray or None, optional From 6fb6279a51e5755c9b7c4c7e14f310dd0ac7b72a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 15:19:45 -0400 Subject: [PATCH 24/65] src: fisher module remove 'future' --- madminer/fisherinformation/geometry.py | 3 +-- madminer/fisherinformation/information.py | 2 -- madminer/fisherinformation/manipulate.py | 3 +-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/madminer/fisherinformation/geometry.py b/madminer/fisherinformation/geometry.py index b477ba2bc..ad41512d9 100755 --- a/madminer/fisherinformation/geometry.py +++ b/madminer/fisherinformation/geometry.py @@ -1,8 +1,7 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np import random + from scipy.interpolate import griddata, LinearNDInterpolator, CloughTocher2DInterpolator from scipy.stats import chi2 diff --git a/madminer/fisherinformation/information.py b/madminer/fisherinformation/information.py index f1b4d45bc..4e33e27fe 100755 --- a/madminer/fisherinformation/information.py +++ b/madminer/fisherinformation/information.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np import os diff --git a/madminer/fisherinformation/manipulate.py b/madminer/fisherinformation/manipulate.py index 2c1ed8b0d..9cf331e41 100755 --- a/madminer/fisherinformation/manipulate.py +++ b/madminer/fisherinformation/manipulate.py @@ -1,7 +1,6 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np + from ..utils.various import separate_information_blocks logger = logging.getLogger(__name__) From aa0104fdf6bbeea0fc62b8460c0e9adb9f057ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 15:58:40 -0400 Subject: [PATCH 25/65] src: LHE module f-strings --- madminer/lhe/lhe_reader.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/madminer/lhe/lhe_reader.py b/madminer/lhe/lhe_reader.py index 48daa9548..154c6187e 100644 --- a/madminer/lhe/lhe_reader.py +++ b/madminer/lhe/lhe_reader.py @@ -406,25 +406,25 @@ def add_default_observables( [n_leptons_max, n_photons_max, n_jets_max], ["l", "a", "j"], [False, False, include_charge] ): if include_numbers: - self.add_observable("n_{}s".format(symbol), "len({})".format(symbol), required=True) + self.add_observable(f"n_{symbol}s", f"len({symbol})", required=True) for i in range(n): self.add_observable( - "e_{}{}".format(symbol, i + 1), "{}[{}].e".format(symbol, i), required=False, default=0.0 + f"e_{symbol}{i+1}", f"{symbol}[{i}].e", required=False, default=0.0 ) self.add_observable( - "pt_{}{}".format(symbol, i + 1), "{}[{}].pt".format(symbol, i), required=False, default=0.0 + f"pt_{symbol}{i+1}", f"{symbol}[{i}].pt", required=False, default=0.0 ) self.add_observable( - "eta_{}{}".format(symbol, i + 1), "{}[{}].eta".format(symbol, i), required=False, default=0.0 + f"eta_{symbol}{i+1}", f"{symbol}[{i}].eta", required=False, default=0.0 ) self.add_observable( - "phi_{}{}".format(symbol, i + 1), "{}[{}].phi()".format(symbol, i), required=False, default=0.0 + f"phi_{symbol}{i+1}", f"{symbol}[{i}].phi()", required=False, default=0.0 ) if include_this_charge and symbol == "l": self.add_observable( - "charge_{}{}".format(symbol, i + 1), - "{}[{}].charge".format(symbol, i), + f"charge_{symbol}{i+1}", + f"{symbol}[{i}].charge", required=False, default=0.0, ) @@ -605,9 +605,8 @@ def analyse_samples(self, reference_benchmark=None, parse_events_as_xml=True): # Following results: check consistency with previous results if len(self.observations) != len(this_observations): raise ValueError( - "Number of observations in different Delphes files incompatible: {} vs {}".format( - len(self.observations), len(this_observations) - ) + f"Number of observations in different Delphes files incompatible: " + f"{len(self.observations)} vs {len(this_observations)}" ) # Merge weights with previous @@ -631,7 +630,7 @@ def analyse_samples(self, reference_benchmark=None, parse_events_as_xml=True): # Merge observations with previous (should always be the same observables) for key in self.observations: - assert key in this_observations, "Observable {} not found in Delphes sample!".format(key) + assert key in this_observations, f"Observable {key} not found in Delphes sample!" self.observations[key] = np.hstack([self.observations[key], this_observations[key]]) self.events_sampling_benchmark_ids = np.hstack( @@ -680,11 +679,9 @@ def _parse_sample( and (systematics_name, benchmark0, benchmark1) != self.nuisance_parameters[nuisance_parameter_name] ): raise RuntimeError( - "Inconsistent information for same nuisance parameter {}. Old: {}. New: {}.".format( - nuisance_parameter_name, - self.nuisance_parameters[nuisance_parameter_name], - (systematics_name, benchmark0, benchmark1), - ) + f"Inconsistent information for same nuisance parameter {nuisance_parameter_name}. " + f"Old: {self.nuisance_parameters[nuisance_parameter_name]}. " + f"New: {(systematics_name, benchmark0, benchmark1)}." ) self.nuisance_parameters[nuisance_parameter_name] = (systematics_name, benchmark0, benchmark1) From 1c664c3c111f0f0fae5f359ce6bde535e852ce3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:04:09 -0400 Subject: [PATCH 26/65] src: LHE module improve fmt --- madminer/lhe/lhe_reader.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/madminer/lhe/lhe_reader.py b/madminer/lhe/lhe_reader.py index 154c6187e..bea20ad1e 100644 --- a/madminer/lhe/lhe_reader.py +++ b/madminer/lhe/lhe_reader.py @@ -111,7 +111,14 @@ def __init__(self, filename): # Initialize nuisance parameters self.nuisance_parameters = OrderedDict() - def add_sample(self, lhe_filename, sampled_from_benchmark, is_background=False, k_factor=1.0, systematics=None): + def add_sample( + self, + lhe_filename, + sampled_from_benchmark, + is_background=False, + k_factor=1.0, + systematics=None, + ): """ Adds an LHE sample of simulated events. @@ -665,6 +672,7 @@ def _parse_sample( logger.debug("Extracting nuisance parameter definitions from LHE file") systematics_dict = extract_nuisance_parameters_from_lhe_file(lhe_file, systematics_used) logger.debug("systematics_dict: %s", systematics_dict) + # systematics_dict has structure # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}} From dfe9ad316a32370af260b4999df4de7a9a424929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:06:32 -0400 Subject: [PATCH 27/65] src: LHE module remove 'six' --- madminer/lhe/lhe_reader.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/madminer/lhe/lhe_reader.py b/madminer/lhe/lhe_reader.py index bea20ad1e..2ba34cc34 100644 --- a/madminer/lhe/lhe_reader.py +++ b/madminer/lhe/lhe_reader.py @@ -1,9 +1,6 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six -from collections import OrderedDict -import numpy as np import logging +import numpy as np +from collections import OrderedDict from madminer.utils.interfaces.madminer_hdf5 import ( save_events_to_madminer_file, @@ -677,10 +674,8 @@ def _parse_sample( # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}} # Store nuisance parameters - for systematics_name, nuisance_info in six.iteritems(systematics_dict): - for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in six.iteritems( - nuisance_info - ): + for systematics_name, nuisance_info in systematics_dict.items(): + for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in nuisance_info.items(): if ( self.nuisance_parameters is not None and nuisance_parameter_name in self.nuisance_parameters @@ -739,7 +734,7 @@ def _check_sample_observations_and_weights(this_observations, this_weights): # Check number of events in observables, and their dtype n_events = None - for key, obs in six.iteritems(this_observations): + for key, obs in this_observations.items(): this_n_events = len(obs) if n_events is None: n_events = this_n_events @@ -761,7 +756,7 @@ def _check_sample_observations_and_weights(this_observations, this_weights): obs, ) # Check number of events in weights, and thier dtype - for key, weights in six.iteritems(this_weights): + for key, weights in this_weights.items(): this_n_events = len(weights) if n_events is None: n_events = this_n_events From c100335f57900186229562f2459b3b358f383828 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:08:48 -0400 Subject: [PATCH 28/65] src: LHE module combine funcs --- madminer/lhe/lhe_reader.py | 74 ++++++++++++-------------------------- 1 file changed, 23 insertions(+), 51 deletions(-) diff --git a/madminer/lhe/lhe_reader.py b/madminer/lhe/lhe_reader.py index 2ba34cc34..d8c091ce7 100644 --- a/madminer/lhe/lhe_reader.py +++ b/madminer/lhe/lhe_reader.py @@ -108,6 +108,27 @@ def __init__(self, filename): # Initialize nuisance parameters self.nuisance_parameters = OrderedDict() + @staticmethod + def _check_sample_elements(this_elements, n_events=None): + """ Sanity checks """ + + # Check number of events in observables + for key, elems in this_elements.items(): + this_n_events = len(elems) + + if n_events is None: + n_events = this_n_events + logger.debug(f"Found {n_events} events") + + if this_n_events != n_events: + raise RuntimeError( + f"Mismatching number of events for {key}: "f"{n_events} vs {this_n_events}") + + if not np.issubdtype(elems.dtype, np.number): + logger.warning(f"For key {key} have non-numeric dtype {elems.dtype}.") + + return n_events + def add_sample( self, lhe_filename, @@ -716,8 +737,8 @@ def _parse_sample( return None, None logger.debug("Found weights %s in LHE file", list(this_weights.keys())) - # Sanity checks - n_events = self._check_sample_observations_and_weights(this_observations, this_weights) + n_events = self._check_sample_elements(this_observations, None) + n_events = self._check_sample_elements(this_weights, None) # Rescale nuisance parameters to reference benchmark reference_weights = this_weights[reference_benchmark] @@ -728,55 +749,6 @@ def _parse_sample( return this_observations, this_weights, n_events - @staticmethod - def _check_sample_observations_and_weights(this_observations, this_weights): - """ Sanity checks """ - - # Check number of events in observables, and their dtype - n_events = None - for key, obs in this_observations.items(): - this_n_events = len(obs) - if n_events is None: - n_events = this_n_events - logger.debug("Found %s events", n_events) - - if this_n_events != n_events: - raise RuntimeError( - "Mismatching number of events in LHE observations for {}: {} vs {}".format( - key, n_events, this_n_events - ) - ) - - if not np.issubdtype(obs.dtype, np.number): - logger.warning( - "Observations for observable %s have non-numeric dtype %s. This usually means something " - "is wrong in the definition of the observable. Data: %s", - key, - obs.dtype, - obs, - ) - # Check number of events in weights, and thier dtype - for key, weights in this_weights.items(): - this_n_events = len(weights) - if n_events is None: - n_events = this_n_events - logger.debug("Found %s events", n_events) - - if this_n_events != n_events: - raise RuntimeError( - "Mismatching number of events in weights {}: {} vs {}".format(key, n_events, this_n_events) - ) - - if not np.issubdtype(weights.dtype, np.number): - logger.warning( - "Weights %s have non-numeric dtype %s. This usually means something " - "is wrong in the definition of the observable. Data: %s", - key, - weights.dtype, - weights, - ) - return n_events - def save(self, filename_out, shuffle=True): """ Saves the observable definitions, observable values, and event weights in a MadMiner file. The parameter, From 54b42d8cf635ffc7db0e482506ea1db31ea79e3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:24:56 -0400 Subject: [PATCH 29/65] src: likelihood module f-strings --- madminer/likelihood/histo.py | 9 +++------ madminer/likelihood/manipulate.py | 7 +++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/madminer/likelihood/histo.py b/madminer/likelihood/histo.py index 32cf8b0b8..fc3676e3c 100644 --- a/madminer/likelihood/histo.py +++ b/madminer/likelihood/histo.py @@ -114,13 +114,10 @@ def create_negative_log_likelihood( if n_observed is None: n_observed = len(x_observed) - supported_modes = ["sampled", "weighted", "histo"] - if mode not in supported_modes: - raise ValueError("Mode %s unknown. Choose one of the following methods: %s", mode, supported_modes) + if mode not in {"sampled", "weighted", "histo"}: + raise ValueError(f"Mode {mode} unknown.") if mode == "histo" and self.n_nuisance_parameters > 0: - raise ValueError( - "Mode histo is currently not supported in the presence of nuisance parameters. Please use mode weighted or sampled." - ) + raise ValueError("Mode histo is currently not supported in the presence of nuisance parameters") # Load model - nothing interesting if score_components != []: diff --git a/madminer/likelihood/manipulate.py b/madminer/likelihood/manipulate.py index 2804392ed..b9c8fd4a6 100644 --- a/madminer/likelihood/manipulate.py +++ b/madminer/likelihood/manipulate.py @@ -49,7 +49,7 @@ def constrained_nll(params): if len(theta) != len(fixed_components): raise ValueError("Length of fixed_components and theta should be the same") if len(params) + len(fixed_components) != n_dimension: - raise ValueError("Length of params should be %s", n_dimension - len(fixed_components)) + raise ValueError(f"Length of params should be {n_dimension-len(fixed_components)}") # Initialize full paramaters params_full = np.zeros(n_dimension) @@ -275,9 +275,8 @@ def profile_log_likelihood( dof = m_paramaters # Method - supported_methods = ["TNC", " L-BFGS-B"] - if method not in supported_methods: - raise ValueError("Method %s unknown. Choose one of the following methods: %s", method, supported_methods) + if method not in {"TNC", " L-BFGS-B"}: + raise ValueError(f"Method {method} unknown.") # Initial guess for theta if theta_start is None: From c47843a726522382ba362bab29ea7c3172f39255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:32:34 -0400 Subject: [PATCH 30/65] src: likelihood module improve fmt --- madminer/likelihood/base.py | 29 ++++++++++++++++++------- madminer/likelihood/histo.py | 35 +++++++++++++++---------------- madminer/likelihood/manipulate.py | 8 +++---- madminer/likelihood/neural.py | 1 + 4 files changed, 43 insertions(+), 30 deletions(-) diff --git a/madminer/likelihood/base.py b/madminer/likelihood/base.py index 3a53b6580..4275195c4 100644 --- a/madminer/likelihood/base.py +++ b/madminer/likelihood/base.py @@ -11,13 +11,20 @@ class BaseLikelihood(DataAnalyzer): + def create_negative_log_likelihood(self, *args, **kwargs): - raise NotImplementedError + raise NotImplementedError() def create_expected_negative_log_likelihood(self, *args, **kwargs): - raise NotImplementedError - - def _asimov_data(self, theta, test_split=0.2, sample_only_from_closest_benchmark=True, n_asimov=None): + raise NotImplementedError() + + def _asimov_data( + self, + theta, + test_split=0.2, + sample_only_from_closest_benchmark=True, + n_asimov=None, + ): # get data start_event, end_event, correction_factor = self._train_test_split(False, test_split) @@ -39,13 +46,19 @@ def _asimov_data(self, theta, test_split=0.2, sample_only_from_closest_benchmark return x, weights_theta def _log_likelihood(self, *args, **kwargs): - raise NotImplementedError + raise NotImplementedError() def _log_likelihood_kinematic(self, *args, **kwargs): - raise NotImplementedError + raise NotImplementedError() def _log_likelihood_poisson( - self, n_observed, theta, nu, luminosity=300000.0, weights_benchmarks=None, total_weights=None + self, + n_observed, + theta, + nu, + luminosity=300000.0, + weights_benchmarks=None, + total_weights=None, ): if total_weights is not None and nu is None: # `histo` mode: Efficient morphing of whole cross section for the case without nuisance parameters @@ -72,8 +85,8 @@ def _log_likelihood_poisson( if xsec < 0: logger.warning("Total cross section is negative (%s pb) at theta=%s)", xsec, theta) n_predicted = 10 ** -5 - n_observed_rounded = int(np.round(n_observed, 0)) + n_observed_rounded = int(np.round(n_observed, 0)) log_likelihood = poisson.logpmf(k=n_observed_rounded, mu=n_predicted) logger.debug( diff --git a/madminer/likelihood/histo.py b/madminer/likelihood/histo.py index fc3676e3c..1e1822d70 100644 --- a/madminer/likelihood/histo.py +++ b/madminer/likelihood/histo.py @@ -15,6 +15,7 @@ class HistoLikelihood(BaseLikelihood): + def create_negative_log_likelihood( self, x_observed, @@ -33,7 +34,7 @@ def create_negative_log_likelihood( ): """ Returns a function which calculates the negative log likelihood for a given - parameter point, evaulated with a dataset (x_observed,n_observed,x_observed_weights). + parameter point, evaluated with a dataset (x_observed,n_observed,x_observed_weights). Parameters ---------- @@ -64,7 +65,7 @@ def create_negative_log_likelihood( Integrated luminosity in pb^{-1} assumed in the analysis. Default value: 300000. mode : {"weighted" , "sampled", "histo"} , optional - If "sampled", for each evaulation of the likelihood function, a separate + If "sampled", for each evaluation of the likelihood function, a separate set of events are sampled and histogram is created to construct the likelihood function. If "weighted", first a set of weighted events is sampled which is then used to create histograms. Default value: "sampled" @@ -120,7 +121,7 @@ def create_negative_log_likelihood( raise ValueError("Mode histo is currently not supported in the presence of nuisance parameters") # Load model - nothing interesting - if score_components != []: + if score_components: assert all([isinstance(score_component, int) for score_component in score_components]) if model_file is None: raise ValueError("You need to provide a model_file!") @@ -131,7 +132,7 @@ def create_negative_log_likelihood( # Create summary function logger.info("Setting up standard summary statistics") summary_function = None - if observables != []: + if observables: summary_function = self._make_summary_statistic_function(observables=observables, model=model) # Weighted sampled @@ -234,15 +235,15 @@ def create_expected_negative_log_likelihood( ): """ Returns a function which calculates the expected negative log likelihood for a given - parameter point, evaulated with test data sampled according to theta_true. + parameter point, evaluated with test data sampled according to theta_true. Parameters ---------- theta_true : ndarray - Specifies the physical paramaters according to which the test data is sampled. + Specifies the physical parameters according to which the test data is sampled. nu_true : ndarray - Specifies the nuisance paramaters according to which the test data is sampled. + Specifies the nuisance parameters according to which the test data is sampled. observables : list of str or None , optional Kinematic variables used in the histograms. The names are the same as @@ -263,7 +264,7 @@ def create_expected_negative_log_likelihood( file are used. Default value: None. mode : {"weighted" , "sampled"} , optional - If "sampled", for each evaulation of the likelihood function, a separate + If "sampled", for each evaluation of the likelihood function, a separate set of events are sampled and histogram is created to construct the likelihood function. If "weighted", first a set of weighted events is sampled which is then used to create histograms. Default value: "sampled" @@ -417,9 +418,7 @@ def _log_likelihood_kinematic( histo = self._histogram_morphing(theta, benchmark_histograms, hist_bins, bin_centers) # calculate log-likelihood from histogram - log_p = histo.log_likelihood(summary_stats) - - return log_p + return histo.log_likelihood(summary_stats) def _make_summary_statistic_function(self, observables=None, model=None): """ @@ -434,7 +433,7 @@ def summary_function(xs): if not "score" in x_indices and not "function" in x_indices: return xs[:, x_indices] - # evaulate some observables using eval() - more slow + # evaluate some observables using eval() - more slow data_events = [] for x in xs: data_event = [] @@ -476,6 +475,7 @@ def _find_x_indices(self, observables): x_indices.append(x_names.index(obs)) except: x_indices.append("function") + logger.debug("Using x indices %s", x_indices) return x_indices @@ -495,9 +495,7 @@ def _make_histo_data_sampled(self, summary_function, theta, n_histo_toys=1000): ) # Calculate summary stats - data = summary_function(x) - - return data + return summary_function(x) def _make_histo_data_weighted(self, summary_function, n_toys, test_split=None): """ @@ -530,6 +528,7 @@ def _find_bins(self, hist_bins, n_summary_stats): elif isinstance(hist_bins, int): # hist_bins = tuple([hist_bins] * n_summary_stats) hist_bins = [hist_bins for _ in range(n_summary_stats)] + return hist_bins def _fixed_adaptive_binning( @@ -549,7 +548,9 @@ def _fixed_adaptive_binning( # Get weighted data if data is None: data, weights_benchmarks = self._make_histo_data_weighted( - summary_function=summary_function, n_toys=n_toys, test_split=test_split + summary_function=summary_function, + n_toys=n_toys, + test_split=test_split, ) # Calculate weights for thetas @@ -584,8 +585,6 @@ def _histogram_morphing(self, theta, histogram_benchmarks, hist_bins, bin_center """ Low-level function that morphes histograms """ - # get binning - hist_nbins = [len(bins) - 1 for bins in hist_bins] # get array of flattened histograms flattened_histo_weights = [] diff --git a/madminer/likelihood/manipulate.py b/madminer/likelihood/manipulate.py index b9c8fd4a6..d9a1f81f3 100644 --- a/madminer/likelihood/manipulate.py +++ b/madminer/likelihood/manipulate.py @@ -27,7 +27,7 @@ def fix_params(negative_log_likelihood, theta, fixed_components=None): fixed_components : list of int or None, optional. m-dimensional vector of coordinate indices provided in theta. `fixed_components=[0,1]` will fix the 1st and 2nd - component of the paramater point. If None, uses [0, ..., m-1]. + component of the parameter point. If None, uses [0, ..., m-1]. Returns ------- @@ -51,7 +51,7 @@ def constrained_nll(params): if len(params) + len(fixed_components) != n_dimension: raise ValueError(f"Length of params should be {n_dimension-len(fixed_components)}") - # Initialize full paramaters + # Initialize full parameters params_full = np.zeros(n_dimension) # fill fixed components @@ -83,7 +83,7 @@ def project_log_likelihood( """ Takes a likelihood function depending on N parameters, and evaluates for a set of M-dimensional parameter points (either grid or explicitly specified) - while the remaining N-M paramters are set to zero. + while the remaining N-M parameters are set to zero. Parameters ---------- @@ -199,7 +199,7 @@ def profile_log_likelihood( """ Takes a likelihood function depending on N parameters, and evaluates for a set of M-dimensional parameter points (either grid or explicitly specified) - while the remaining N-M paramters are profiled over. + while the remaining N-M parameters are profiled over. Parameters ---------- diff --git a/madminer/likelihood/neural.py b/madminer/likelihood/neural.py index 49aa89691..e07b5c66b 100644 --- a/madminer/likelihood/neural.py +++ b/madminer/likelihood/neural.py @@ -11,6 +11,7 @@ class NeuralLikelihood(BaseLikelihood): + def create_negative_log_likelihood( self, model_file, From 4deeb3801a3aab0ca42660eb6ffb3bfdad30d9f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:35:25 -0400 Subject: [PATCH 31/65] src: likelihood module remove 'future' --- madminer/likelihood/base.py | 2 -- madminer/likelihood/histo.py | 8 +++----- madminer/likelihood/manipulate.py | 2 -- madminer/likelihood/neural.py | 6 ++---- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/madminer/likelihood/base.py b/madminer/likelihood/base.py index 4275195c4..42b518ab1 100644 --- a/madminer/likelihood/base.py +++ b/madminer/likelihood/base.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np from scipy.stats import poisson, norm diff --git a/madminer/likelihood/histo.py b/madminer/likelihood/histo.py index 1e1822d70..9c37c6fea 100644 --- a/madminer/likelihood/histo.py +++ b/madminer/likelihood/histo.py @@ -1,15 +1,13 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np from itertools import product -from ..utils.various import mdot, less_logging, math_commands +from .base import BaseLikelihood +from .. import sampling from ..ml import ScoreEstimator, Ensemble, load_estimator from ..utils.histo import Histo +from ..utils.various import mdot, less_logging, math_commands from ..sampling import SampleAugmenter -from .. import sampling -from .base import BaseLikelihood logger = logging.getLogger(__name__) diff --git a/madminer/likelihood/manipulate.py b/madminer/likelihood/manipulate.py index d9a1f81f3..aeb8ae252 100644 --- a/madminer/likelihood/manipulate.py +++ b/madminer/likelihood/manipulate.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np import time diff --git a/madminer/likelihood/neural.py b/madminer/likelihood/neural.py index e07b5c66b..fcfa08893 100644 --- a/madminer/likelihood/neural.py +++ b/madminer/likelihood/neural.py @@ -1,11 +1,9 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np -from ..utils.various import less_logging -from ..ml import ParameterizedRatioEstimator, Ensemble, LikelihoodEstimator, load_estimator from .base import BaseLikelihood +from ..ml import ParameterizedRatioEstimator, Ensemble, LikelihoodEstimator, load_estimator +from ..utils.various import less_logging logger = logging.getLogger(__name__) From 4117194e56898e01bf6f80580272ed4d2aa2d011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:41:40 -0400 Subject: [PATCH 32/65] src: limits module f-strings --- madminer/limits/asymptotic_limits.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/madminer/limits/asymptotic_limits.py b/madminer/limits/asymptotic_limits.py index 21ac66eb0..72829f9f5 100644 --- a/madminer/limits/asymptotic_limits.py +++ b/madminer/limits/asymptotic_limits.py @@ -690,7 +690,7 @@ def _analyse( log_r_kin = n_events * np.sum(log_r_kin * obs_weights[np.newaxis, :], axis=1) else: - raise ValueError("Unknown mode {}, has to be 'ml' or 'histo' or 'xsec'".format(mode)) + raise ValueError(f"Unknown mode {mode}") # xsec part if include_xsec: @@ -783,7 +783,7 @@ def summary_function(x): return score else: - raise RuntimeError("Unknown mode {}, has to be 'observables' or 'sally'".format(mode)) + raise RuntimeError(f"Unknown mode {mode}") return summary_function @@ -855,7 +855,7 @@ def processor(scores, theta): return postprocessing(h) else: - raise RuntimeError("Unknown score processing mode {} for summary stats dimension {}".format(mode, dim)) + raise RuntimeError(f"Unknown score processing mode {mode} for summary stats dimension {dim}") return processor @@ -1052,7 +1052,7 @@ def _find_x_indices(self, observables): try: x_indices.append(x_names.index(obs)) except ValueError: - raise RuntimeError("Unknown observable {}, has to be one of {}".format(obs, x_names)) + raise RuntimeError(f"Unknown observable {obs}") logger.debug("Using x indices %s", x_indices) return x_indices From 237d42589ad1726f10248c151381e4d5e67f43bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:47:18 -0400 Subject: [PATCH 33/65] src: limits module improve fmt --- madminer/limits/asymptotic_limits.py | 29 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/madminer/limits/asymptotic_limits.py b/madminer/limits/asymptotic_limits.py index 72829f9f5..426ceee7f 100644 --- a/madminer/limits/asymptotic_limits.py +++ b/madminer/limits/asymptotic_limits.py @@ -84,7 +84,7 @@ def observed_limits( `x_observed` specifies the observed data as an array of observables, using the same observables and their order as used throughout the MadMiner workflow. - The p-values with frequentist hypothesis tests using the likelihood ratio as test statistic. The asymptotic + The p-values with frequent hypothesis tests using the likelihood ratio as test statistic. The asymptotic approximation is used, see https://arxiv.org/abs/1007.1727. Depending on the keyword `mode`, the likelihood ratio is calculated with one of several different methods: @@ -243,7 +243,8 @@ def observed_limits( """ if n_observed is None: n_observed = len(x_observed) - results = self._analyse( + + return self._analyse( n_observed, x_observed, grid_ranges, @@ -269,7 +270,6 @@ def observed_limits( n_binning_toys=n_binning_toys, thetas_eval=thetas_eval, ) - return results def expected_limits( self, @@ -472,9 +472,11 @@ def expected_limits( test_split=test_split, n_asimov=n_asimov, ) + n_observed = luminosity * self._calculate_xsecs([theta_true])[0] logger.info("Expected events: %s", n_observed) - results = self._analyse( + + return self._analyse( n_observed, x_asimov, grid_ranges, @@ -501,7 +503,6 @@ def expected_limits( n_binning_toys=n_binning_toys, thetas_eval=thetas_eval, ) - return results def asymptotic_p_value(self, log_likelihood_ratio, dof=None): """ @@ -525,9 +526,9 @@ def asymptotic_p_value(self, log_likelihood_ratio, dof=None): """ if dof is None: dof = self.n_parameters + q = -2.0 * log_likelihood_ratio - p_value = chi2.sf(x=q, df=dof) - return p_value + return chi2.sf(x=q, df=dof) def _analyse( self, @@ -716,6 +717,7 @@ def _analyse( histo_data = (histos, processed_summary_stats, obs_weights) elif return_histos: histo_data = histos + return theta_grid, p_values, i_ml, log_r_kin, log_p_xsec, histo_data def _find_bins(self, mode, hist_bins, summary_stats): @@ -724,6 +726,7 @@ def _find_bins(self, mode, hist_bins, summary_stats): n_summary_stats += 1 elif mode == "sallino": n_summary_stats = 1 + # Bin numbers if hist_bins is None: if mode == "adaptive-sally" and n_summary_stats == 2: @@ -749,13 +752,13 @@ def _find_bins(self, mode, hist_bins, summary_stats): else: n_bins_each = [n_bins if isinstance(n_bins, int) else len(n_bins) - 1 for n_bins in hist_bins] total_n_bins = np.prod(n_bins_each) + return hist_bins, n_bins_each, n_summary_stats, total_n_bins def _make_summary_statistic_function(self, mode, model=None, observables=None): if mode == "observables": assert observables is not None x_indices = self._find_x_indices(observables) - logger.debug("Preparing observables %s as summary statistic function", x_indices) def summary_function(x): @@ -1000,17 +1003,21 @@ def _make_histos( def _fixed_adaptive_binning(self, n_toys, processor, summary_function, test_split, thetas_binning, x_bins): summary_stats, all_weights = self._make_weighted_histo_data( - summary_function, thetas_binning, n_toys, test_split=test_split + summary_function=summary_function, + thetas=thetas_binning, + n_toys=n_toys, + test_split=test_split, ) + all_weights = np.asarray(all_weights) weights = np.mean(all_weights, axis=0) if processor is None: data = summary_stats else: data = processor(summary_stats, thetas_binning) + histo = Histo(data, weights, x_bins, epsilon=1.0e-12) - x_bins = histo.edges - return x_bins + return histo.edges def _make_weighted_histo_data(self, summary_function, thetas, n_toys, test_split=0.2): # Get weighted events From 314714a4200932fb0a3d3f579a670b8ea4076980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 21 Sep 2020 16:48:35 -0400 Subject: [PATCH 34/65] src: limits module remove 'future' --- madminer/limits/asymptotic_limits.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/madminer/limits/asymptotic_limits.py b/madminer/limits/asymptotic_limits.py index 426ceee7f..b3c46da91 100644 --- a/madminer/limits/asymptotic_limits.py +++ b/madminer/limits/asymptotic_limits.py @@ -1,15 +1,13 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np from scipy.stats import chi2, poisson +from madminer import sampling from madminer.analysis import DataAnalyzer -from madminer.utils.various import mdot, less_logging from madminer.ml import ParameterizedRatioEstimator, Ensemble, ScoreEstimator, LikelihoodEstimator, load_estimator from madminer.utils.histo import Histo +from madminer.utils.various import mdot, less_logging from madminer.sampling import SampleAugmenter -from madminer import sampling logger = logging.getLogger(__name__) From 8b9d2e0857016d5ce3428d33822ea1dade915915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 10:15:20 -0400 Subject: [PATCH 35/65] src: ML module f-strings --- madminer/ml/double_parameterized_ratio.py | 14 +++++-------- madminer/ml/ensemble.py | 14 ++++++------- madminer/ml/likelihood.py | 12 ++++------- madminer/ml/lookup.py | 4 ++-- madminer/ml/parameterized_ratio.py | 20 ++++++++---------- madminer/ml/score.py | 25 +++++++++-------------- 6 files changed, 36 insertions(+), 53 deletions(-) diff --git a/madminer/ml/double_parameterized_ratio.py b/madminer/ml/double_parameterized_ratio.py index 2d99b6227..ec3cf197e 100644 --- a/madminer/ml/double_parameterized_ratio.py +++ b/madminer/ml/double_parameterized_ratio.py @@ -309,13 +309,9 @@ def train( self.n_parameters = n_parameters if n_parameters != self.n_parameters: - raise RuntimeError( - "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters) - ) + raise RuntimeError(f"Number of parameters does not match: {n_parameters} vs {self.n_parameters}") if n_observables != self.n_observables: - raise RuntimeError( - "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables) - ) + raise RuntimeError(f"Number of observables does not match: {n_observables} vs {self.n_observables}") # Data data = self._package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1) @@ -499,9 +495,9 @@ def _create_model(self): @staticmethod def _check_required_data(method, r_xz, t_xz0, t_xz1): if method in ["cascal", "alices", "rascal"] and (t_xz0 is None or t_xz1 is None): - raise RuntimeError("Method {} requires joint score information".format(method)) + raise RuntimeError(f"Method {method} requires joint score information") if method in ["rolr", "alice", "alices", "rascal"] and r_xz is None: - raise RuntimeError("Method {} requires joint likelihood ratio information".format(method)) + raise RuntimeError(f"Method {method} requires joint likelihood ratio information") @staticmethod def _package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1): @@ -527,4 +523,4 @@ def _unwrap_settings(self, settings): estimator_type = str(settings["estimator_type"]) if estimator_type != "double_parameterized_ratio": - raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type)) + raise RuntimeError(f"Saved model is an incompatible estimator type {estimator_type}.") diff --git a/madminer/ml/ensemble.py b/madminer/ml/ensemble.py index 3ad09d94a..c30f11b8a 100644 --- a/madminer/ml/ensemble.py +++ b/madminer/ml/ensemble.py @@ -144,7 +144,7 @@ def train_all(self, **kwargs): if not isinstance(value, list): kwargs[key] = [value for _ in range(self.n_estimators)] - assert len(kwargs[key]) == self.n_estimators, "Keyword {} has wrong length {}".format(key, len(value)) + assert len(kwargs[key]) == self.n_estimators, f"Keyword {key} has wrong length {len(value)}" for i, estimator in enumerate(self.estimators): kwargs_this_estimator = {} @@ -407,7 +407,7 @@ def calculate_fisher_information( # Check input if mode not in ["score", "information"]: - raise ValueError("Unknown mode {}, has to be 'score' or 'information'!".format(mode)) + raise ValueError(f"Unknown mode {mode}!") # Calculate estimator_weights of each estimator in vote if estimator_weights is None: @@ -604,12 +604,12 @@ def save(self, folder, save_model=False): logger.debug("Saving ensemble setup to %s/ensemble.json", folder) settings = {"estimator_type": self.estimator_type, "n_estimators": self.n_estimators} - with open(folder + "/ensemble.json", "w") as f: + with open(f"{folder}/ensemble.json", "w") as f: json.dump(settings, f) # Save estimators for i, estimator in enumerate(self.estimators): - estimator.save(folder + "/estimator_" + str(i), save_model=save_model) + estimator.save(f"{folder}/estimator_{i}", save_model=save_model) def load(self, folder): """ @@ -627,7 +627,7 @@ def load(self, folder): """ # Load ensemble settings logger.debug("Loading ensemble setup from %s/ensemble.json", folder) - with open(folder + "/ensemble.json", "r") as f: + with open(f"{folder}/ensemble.json", "r") as f: settings = json.load(f) self.n_estimators = int(settings["n_estimators"]) @@ -644,7 +644,7 @@ def load(self, folder): self.estimators = [] for i in range(self.n_estimators): estimator = self._get_estimator_class(estimator_type)() - estimator.load(folder + "/estimator_" + str(i)) + estimator.load(f"{folder}/estimator_{i}") self.estimators.append(estimator) self._check_consistency() @@ -725,4 +725,4 @@ def _get_estimator_class(estimator_type): elif estimator_type == "likelihood": return LikelihoodEstimator else: - raise RuntimeError("Unknown estimator type {}!".format(estimator_type)) + raise RuntimeError(f"Unknown estimator type {estimator_type}!") diff --git a/madminer/ml/likelihood.py b/madminer/ml/likelihood.py index dd81547fd..0567323b2 100644 --- a/madminer/ml/likelihood.py +++ b/madminer/ml/likelihood.py @@ -271,13 +271,9 @@ def train( self.n_parameters = n_parameters if n_parameters != self.n_parameters: - raise RuntimeError( - "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters) - ) + raise RuntimeError(f"Number of parameters does not match: {n_parameters} vs {self.n_parameters}") if n_observables != self.n_observables: - raise RuntimeError( - "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables) - ) + raise RuntimeError(f"Number of observables does not match: {n_observables} vs {self.n_observables}") # Data data = self._package_training_data(method, x, theta, t_xz) @@ -515,7 +511,7 @@ def _create_model(self): @staticmethod def _check_required_data(method, t_xz): if method == ["scandal"] and t_xz is None: - raise RuntimeError("Method {} requires joint score information".format(method)) + raise RuntimeError(f"Method {method} requires joint score information") @staticmethod def _package_training_data(method, x, theta, t_xz): @@ -539,7 +535,7 @@ def _unwrap_settings(self, settings): estimator_type = str(settings["estimator_type"]) if estimator_type != "likelihood": - raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type)) + raise RuntimeError(f"Saved model is an incompatible estimator type {estimator_type}.") self.n_components = int(settings["n_components"]) self.n_mades = int(settings["n_mades"]) diff --git a/madminer/ml/lookup.py b/madminer/ml/lookup.py index 061a5afce..4d16cc44c 100644 --- a/madminer/ml/lookup.py +++ b/madminer/ml/lookup.py @@ -16,7 +16,7 @@ def load_estimator(filename): model.load(filename) else: - with open(filename + "_settings.json", "r") as f: + with open(f"{filename}_settings.json", "r") as f: settings = json.load(f) try: estimator_type = settings["estimator_type"] @@ -32,7 +32,7 @@ def load_estimator(filename): elif estimator_type == "likelihood": model = LikelihoodEstimator() else: - raise RuntimeError("Unknown estimator type {}!".format(estimator_type)) + raise RuntimeError(f"Unknown estimator type {estimator_type}!") model.load(filename) diff --git a/madminer/ml/parameterized_ratio.py b/madminer/ml/parameterized_ratio.py index 99844a844..63a994f1a 100644 --- a/madminer/ml/parameterized_ratio.py +++ b/madminer/ml/parameterized_ratio.py @@ -283,13 +283,9 @@ def train( self.n_parameters = n_parameters if n_parameters != self.n_parameters: - raise RuntimeError( - "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters) - ) + raise RuntimeError(f"Number of parameters does not match: {n_parameters} vs {self.n_parameters}") if n_observables != self.n_observables: - raise RuntimeError( - "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables) - ) + raise RuntimeError(f"Number of observables does not match: {n_observables} vs {self.n_observables}") # Data data = self._package_training_data(method, x, theta, y, r_xz, t_xz) @@ -503,7 +499,7 @@ def evaluate_score(self, x, theta, nuisance_mode="keep"): if nuisance_mode == "keep": logger.debug("Keeping nuisance parameter in score") else: - raise ValueError("Unknown nuisance_mode {}".format(nuisance_mode)) + raise ValueError(f"Unknown nuisance_mode {nuisance_mode}") _, all_t_hat = self.evaluate_log_likelihood_ratio(x, theta, test_all_combinations=False, evaluate_score=True) return all_t_hat @@ -528,10 +524,10 @@ def _create_model(self): @staticmethod def _check_required_data(method, r_xz, t_xz): - if method in ["cascal", "alices", "rascal"] and t_xz is None: - raise RuntimeError("Method {} requires joint score information".format(method)) - if method in ["rolr", "alice", "alices", "rascal"] and r_xz is None: - raise RuntimeError("Method {} requires joint likelihood ratio information".format(method)) + if method in {"cascal", "alices", "rascal"} and t_xz is None: + raise RuntimeError(f"Method {method} requires joint score information") + if method in {"rolr", "alice", "alices", "rascal"} and r_xz is None: + raise RuntimeError(f"Method {method} requires joint likelihood ratio information") @staticmethod def _package_training_data(method, x, theta, y, r_xz, t_xz): @@ -555,4 +551,4 @@ def _unwrap_settings(self, settings): estimator_type = str(settings["estimator_type"]) if estimator_type != "parameterized_ratio": - raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type)) + raise RuntimeError(f"Saved model is an incompatible estimator type {estimator_type}.") diff --git a/madminer/ml/score.py b/madminer/ml/score.py index 19de558ee..40e5300e8 100644 --- a/madminer/ml/score.py +++ b/madminer/ml/score.py @@ -215,13 +215,9 @@ def train( self.n_parameters = n_parameters if n_parameters != self.n_parameters: - raise RuntimeError( - "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters) - ) + raise RuntimeError(f"Number of parameters does not match: {n_parameters} vs {self.n_parameters}") if n_observables != self.n_observables: - raise RuntimeError( - "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables) - ) + raise RuntimeError(f"Number of observables does not match: {n_observables} vs {self.n_observables}") # Data data = self._package_training_data(x, t_xz) @@ -284,9 +280,8 @@ def set_nuisance(self, fisher_information, parameters_of_interest): """ if fisher_information.shape != (self.n_parameters, self.n_parameters): raise ValueError( - "Fisher information has wrong shape {}, expected {}".format( - fisher_information.shape, (self.n_parameters, self.n_parameters) - ) + f"Fisher information has wrong shape {fisher_information.shape}. " + f"Expected {(self.n_parameters, self.n_parameters)}" ) n_parameters_of_interest = len(parameters_of_interest) @@ -386,7 +381,7 @@ def evaluate_score(self, x, theta=None, nuisance_mode="auto"): t_hat = np.einsum("ij,xj->xi", self.nuisance_profile_matrix, t_hat) else: - raise ValueError("Unknown nuisance_mode {}".format(nuisance_mode)) + raise ValueError(f"Unknown nuisance_mode {nuisance_mode}") return t_hat @@ -416,16 +411,16 @@ def save(self, filename, save_model=False): filename, filename, ) - np.save(filename + "_nuisance_profile_matrix.npy", self.nuisance_profile_matrix) - np.save(filename + "_nuisance_project_matrix.npy", self.nuisance_project_matrix) + np.save(f"{filename}_nuisance_profile_matrix.npy", self.nuisance_profile_matrix) + np.save(f"{filename}_nuisance_project_matrix.npy", self.nuisance_project_matrix) def load(self, filename): super(ScoreEstimator, self).load(filename) # Load scaling try: - self.nuisance_profile_matrix = np.load(filename + "_nuisance_profile_matrix.npy") - self.nuisance_project_matrix = np.load(filename + "_nuisance_project_matrix.npy") + self.nuisance_profile_matrix = np.load(f"{filename}_nuisance_profile_matrix.npy") + self.nuisance_project_matrix = np.load(f"{filename}_nuisance_project_matrix.npy") logger.debug( " Found nuisance profiling / projection matrices:\nProfiling:\n%s\nProjection:\n%s", self.nuisance_profile_matrix, @@ -463,7 +458,7 @@ def _unwrap_settings(self, settings): estimator_type = str(settings["estimator_type"]) if estimator_type != "score": - raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type)) + raise RuntimeError(f"Saved model is an incompatible estimator type {estimator_type}.") try: self.nuisance_mode_default = str(settings["nuisance_mode_default"]) From 2454945656c4696757ccfbdd38b2b7cc37dab72d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 10:31:39 -0400 Subject: [PATCH 36/65] src: ML module improve fmt --- madminer/ml/base.py | 15 ++++++--------- madminer/ml/double_parameterized_ratio.py | 13 +++++++++++-- madminer/ml/ensemble.py | 2 +- madminer/ml/likelihood.py | 4 ++-- madminer/ml/parameterized_ratio.py | 14 ++++++-------- madminer/ml/score.py | 14 ++++++-------- 6 files changed, 32 insertions(+), 30 deletions(-) diff --git a/madminer/ml/base.py b/madminer/ml/base.py index 7f84a4d97..bd79c44ea 100644 --- a/madminer/ml/base.py +++ b/madminer/ml/base.py @@ -46,34 +46,31 @@ def __init__(self, features=None, n_hidden=(100,), activation="tanh", dropout_pr self.x_scaling_stds = None def train(self, *args, **kwargs): - raise NotImplementedError + raise NotImplementedError() def evaluate_log_likelihood(self, *args, **kwargs): """ Log likelihood estimation. Signature depends on the type of estimator. The first returned value is the log likelihood with shape `(n_thetas, n_x)`. """ - raise NotImplementedError + raise NotImplementedError() def evaluate_log_likelihood_ratio(self, *args, **kwargs): """ Log likelihood ratio estimation. Signature depends on the type of estimator. The first returned value is the log likelihood ratio with shape `(n_thetas, n_x)` or `(n_x)`. """ - raise NotImplementedError + raise NotImplementedError() def evaluate_score(self, *args, **kwargs): """ Score estimation. Signature depends on the type of estimator. The only returned value is the score with shape `(n_x)`. """ - raise NotImplementedError + raise NotImplementedError() def evaluate(self, *args, **kwargs): - raise NotImplementedError - - def calculate_fisher_information(self, *args, **kwargs): - raise NotImplementedError + raise NotImplementedError() def save(self, filename, save_model=False): @@ -237,7 +234,7 @@ def _unwrap_settings(self, settings): ) def _create_model(self): - raise NotImplementedError + raise NotImplementedError() def calculate_fisher_information(self, x, theta=None, weights=None, n_events=1, sum_events=True): """ diff --git a/madminer/ml/double_parameterized_ratio.py b/madminer/ml/double_parameterized_ratio.py index ec3cf197e..5ac36e090 100644 --- a/madminer/ml/double_parameterized_ratio.py +++ b/madminer/ml/double_parameterized_ratio.py @@ -90,7 +90,7 @@ def train( Observations, or filename of a pickled numpy array. y : ndarray or str - Class labels (0 = numeerator, 1 = denominator), or filename of a pickled numpy array. + Class labels (0 = numerator, 1 = denominator), or filename of a pickled numpy array. theta0 : ndarray or str Numerator parameter point, or filename of a pickled numpy array. @@ -259,6 +259,7 @@ def train( assert x_val.shape[1] == n_observables assert theta0_val.shape[1] == n_parameters assert theta1_val.shape[1] == n_parameters + if r_xz is not None: assert r_xz_val is not None, "When providing r_xz and sep. validation data, also provide r_xz_val" if t_xz0 is not None: @@ -315,9 +316,17 @@ def train( # Data data = self._package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1) + if external_validation: data_val = self._package_training_data( - method, x_val, theta0_val, theta1_val, y_val, r_xz_val, t_xz0_val, t_xz1_val + method, + x_val, + theta0_val, + theta1_val, + y_val, + r_xz_val, + t_xz0_val, + t_xz1_val, ) else: data_val = None diff --git a/madminer/ml/ensemble.py b/madminer/ml/ensemble.py index c30f11b8a..0269912f1 100644 --- a/madminer/ml/ensemble.py +++ b/madminer/ml/ensemble.py @@ -679,7 +679,7 @@ def _check_consistency(self): " or local score estimators. Found types " + ", ".join(all_types) + "." ) - # Check consistency of parameter and observable numnbers + # Check consistency of parameter and observable numbers self.n_parameters = None self.n_observables = None diff --git a/madminer/ml/likelihood.py b/madminer/ml/likelihood.py index 0567323b2..4d9b7a35d 100644 --- a/madminer/ml/likelihood.py +++ b/madminer/ml/likelihood.py @@ -45,7 +45,7 @@ class LikelihoodEstimator(ConditionalEstimator): activation : {'tanh', 'sigmoid', 'relu'}, optional Activation function. Default value: 'tanh'. - batch_norm : None or floar, optional + batch_norm : None or float, optional If not None, batch normalization is used, where this value sets the alpha parameter in the calculation of the running average of the mean and variance. Default value: None. @@ -365,7 +365,7 @@ def evaluate_log_likelihood(self, x, theta, test_all_combinations=True, evaluate # Scale observables x = self._transform_inputs(x) - # Restrict featuers + # Restrict features if self.features is not None: x = x[:, self.features] diff --git a/madminer/ml/parameterized_ratio.py b/madminer/ml/parameterized_ratio.py index 63a994f1a..f63f9f183 100644 --- a/madminer/ml/parameterized_ratio.py +++ b/madminer/ml/parameterized_ratio.py @@ -88,7 +88,7 @@ def train( Observations, or filename of a pickled numpy array. y : ndarray or str - Class labels (0 = numeerator, 1 = denominator), or filename of a pickled numpy array. + Class labels (0 = numerator, 1 = denominator), or filename of a pickled numpy array. theta : ndarray or str Numerator parameter point, or filename of a pickled numpy array. @@ -330,7 +330,7 @@ def train( def evaluate_log_likelihood_ratio(self, x, theta, test_all_combinations=True, evaluate_score=False): """ - Evaluates the log likelihood ratio for given observations x betwen the given parameter point theta and the + Evaluates the log likelihood ratio for given observations x between the given parameter point theta and the reference hypothesis. Parameters @@ -421,7 +421,7 @@ def evaluate_log_likelihood_ratio(self, x, theta, test_all_combinations=True, ev def evaluate_log_likelihood_ratio_torch(self, x, theta, test_all_combinations=True): """ - Evaluates the log likelihood ratio for given observations x betwen the given parameter point theta and the + Evaluates the log likelihood ratio for given observations x between the given parameter point theta and the reference hypothesis. Parameters @@ -474,7 +474,7 @@ def evaluate_log_likelihood(self, *args, **kwargs): def evaluate_score(self, x, theta, nuisance_mode="keep"): """ - Evaluates the scores for given observations x betwen at a given parameter point theta. + Evaluates the scores for given observations x between at a given parameter point theta. Parameters ---------- @@ -505,10 +505,8 @@ def evaluate_score(self, x, theta, nuisance_mode="keep"): return all_t_hat def calculate_fisher_information(self, x, theta, weights=None, n_events=1, sum_events=True): - fisher_information = super(ParameterizedRatioEstimator, self).calculate_fisher_information( - x, theta, weights, n_events, sum_events - ) - return fisher_information + return super(ParameterizedRatioEstimator, self) \ + .calculate_fisher_information(x, theta, weights, n_events, sum_events) def evaluate(self, *args, **kwargs): return self.evaluate_log_likelihood_ratio(*args, **kwargs) diff --git a/madminer/ml/score.py b/madminer/ml/score.py index 40e5300e8..03142db16 100644 --- a/madminer/ml/score.py +++ b/madminer/ml/score.py @@ -267,10 +267,10 @@ def set_nuisance(self, fisher_information, parameters_of_interest): Parameters ---------- fisher_information : ndarray - Fisher informatioin with shape `(n_parameters, n_parameters)`. + Fisher information with shape `(n_parameters, n_parameters)`. parameters_of_interest : list of int - List of int, with 0 <= remaining_compoinents[i] < n_parameters. Denotes which parameters are kept in the + List of int, with 0 <= remaining_components[i] < n_parameters. Denotes which parameters are kept in the profiling, and their new order. Returns @@ -350,7 +350,7 @@ def evaluate_score(self, x, theta=None, nuisance_mode="auto"): # Scale observables x = self._transform_inputs(x) - # Restrict featuers + # Restrict features if self.features is not None: x = x[:, self.features] @@ -358,7 +358,7 @@ def evaluate_score(self, x, theta=None, nuisance_mode="auto"): logger.debug("Starting score evaluation") t_hat = evaluate_local_score_model(model=self.model, xs=x) - # Treatment of nuisance paramters + # Treatment of nuisance parameters if nuisance_mode == "keep": logger.debug("Keeping nuisance parameter in score") @@ -395,10 +395,8 @@ def evaluate(self, *args, **kwargs): return self.evaluate_score(*args, **kwargs) def calculate_fisher_information(self, x, theta=None, weights=None, n_events=1, sum_events=True): - fisher_information = super(ScoreEstimator, self).calculate_fisher_information( - x, theta, weights, n_events, sum_events - ) - return fisher_information + return super(ScoreEstimator, self) \ + .calculate_fisher_information(x, theta, weights, n_events, sum_events) def save(self, filename, save_model=False): super(ScoreEstimator, self).save(filename, save_model) From 8a104cca7c660b58361fd4daf62b2336053a735f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 10:37:17 -0400 Subject: [PATCH 37/65] src: ML module remove 'six' --- madminer/ml/base.py | 6 ++---- madminer/ml/double_parameterized_ratio.py | 8 +++----- madminer/ml/ensemble.py | 15 ++++++--------- madminer/ml/likelihood.py | 8 +++----- madminer/ml/lookup.py | 4 +--- madminer/ml/morphing_aware.py | 6 ++---- madminer/ml/parameterized_ratio.py | 10 ++++------ madminer/ml/score.py | 11 ++++------- 8 files changed, 25 insertions(+), 43 deletions(-) diff --git a/madminer/ml/base.py b/madminer/ml/base.py index bd79c44ea..56c646861 100644 --- a/madminer/ml/base.py +++ b/madminer/ml/base.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import, division, print_function - -import logging -import os import json +import logging import numpy as np +import os import torch from ..utils.various import create_missing_folders, load_and_check diff --git a/madminer/ml/double_parameterized_ratio.py b/madminer/ml/double_parameterized_ratio.py index 5ac36e090..bcb4f6151 100644 --- a/madminer/ml/double_parameterized_ratio.py +++ b/madminer/ml/double_parameterized_ratio.py @@ -1,15 +1,13 @@ -from __future__ import absolute_import, division, print_function - import logging import numpy as np from collections import OrderedDict -from ..utils.ml.models.ratio import DenseDoublyParameterizedRatioModel +from .base import ConditionalEstimator, TheresAGoodReasonThisDoesntWork from ..utils.ml.eval import evaluate_ratio_model +from ..utils.ml.models.ratio import DenseDoublyParameterizedRatioModel +from ..utils.ml.trainer import DoubleParameterizedRatioTrainer from ..utils.ml.utils import get_optimizer, get_loss from ..utils.various import load_and_check, shuffle, restrict_samplesize -from ..utils.ml.trainer import DoubleParameterizedRatioTrainer -from .base import ConditionalEstimator, TheresAGoodReasonThisDoesntWork try: FileNotFoundError diff --git a/madminer/ml/ensemble.py b/madminer/ml/ensemble.py index 0269912f1..eb997fa9c 100644 --- a/madminer/ml/ensemble.py +++ b/madminer/ml/ensemble.py @@ -1,11 +1,8 @@ -from __future__ import absolute_import, division, print_function - -import six -import logging import json +import logging import numpy as np - from madminer.utils.various import create_missing_folders, load_and_check + from .base import Estimator from .double_parameterized_ratio import DoubleParameterizedRatioEstimator from .likelihood import LikelihoodEstimator @@ -140,7 +137,7 @@ def train_all(self, **kwargs): """ logger.info("Training %s estimators in ensemble", self.n_estimators) - for key, value in six.iteritems(kwargs): + for key, value in kwargs.items(): if not isinstance(value, list): kwargs[key] = [value for _ in range(self.n_estimators)] @@ -148,7 +145,7 @@ def train_all(self, **kwargs): for i, estimator in enumerate(self.estimators): kwargs_this_estimator = {} - for key, value in six.iteritems(kwargs): + for key, value in kwargs.items(): kwargs_this_estimator[key] = value[i] logger.info("Training estimator %s / %s in ensemble", i + 1, self.n_estimators) @@ -448,7 +445,7 @@ def calculate_fisher_information( # "modified_score" mode: elif mode == "modified_score": # Load training data - if isinstance(x, six.string_types): + if isinstance(x, str): x = load_and_check(x) n_samples = x.shape[0] @@ -511,7 +508,7 @@ def calculate_fisher_information( # "score" mode: elif mode == "score": # Load training data - if isinstance(x, six.string_types): + if isinstance(x, str): x = load_and_check(x) n_samples = x.shape[0] diff --git a/madminer/ml/likelihood.py b/madminer/ml/likelihood.py index 4d9b7a35d..4a405f96c 100644 --- a/madminer/ml/likelihood.py +++ b/madminer/ml/likelihood.py @@ -1,16 +1,14 @@ -from __future__ import absolute_import, division, print_function - import logging import numpy as np from collections import OrderedDict +from .base import ConditionalEstimator +from ..utils.ml.eval import evaluate_flow_model from ..utils.ml.models.maf import ConditionalMaskedAutoregressiveFlow from ..utils.ml.models.maf_mog import ConditionalMixtureMaskedAutoregressiveFlow -from ..utils.ml.eval import evaluate_flow_model +from ..utils.ml.trainer import FlowTrainer from ..utils.ml.utils import get_optimizer, get_loss from ..utils.various import load_and_check, shuffle, restrict_samplesize -from ..utils.ml.trainer import FlowTrainer -from .base import ConditionalEstimator try: FileNotFoundError diff --git a/madminer/ml/lookup.py b/madminer/ml/lookup.py index 4d16cc44c..8a965775d 100644 --- a/madminer/ml/lookup.py +++ b/madminer/ml/lookup.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import, division, print_function - -import os import json +import os from .ensemble import Ensemble from .double_parameterized_ratio import DoubleParameterizedRatioEstimator diff --git a/madminer/ml/morphing_aware.py b/madminer/ml/morphing_aware.py index 76eaa0d17..5c33780fa 100644 --- a/madminer/ml/morphing_aware.py +++ b/madminer/ml/morphing_aware.py @@ -1,12 +1,10 @@ -from __future__ import absolute_import, division, print_function - import logging import numpy as np -from ..utils.ml.models.ratio import DenseMorphingAwareRatioModel, DenseQuadraticMorphingAwareRatioModel +from .parameterized_ratio import ParameterizedRatioEstimator from ..utils.interfaces.madminer_hdf5 import load_madminer_settings +from ..utils.ml.models.ratio import DenseMorphingAwareRatioModel, DenseQuadraticMorphingAwareRatioModel from ..utils.morphing import PhysicsMorpher -from .parameterized_ratio import ParameterizedRatioEstimator try: FileNotFoundError diff --git a/madminer/ml/parameterized_ratio.py b/madminer/ml/parameterized_ratio.py index f63f9f183..a78f80bd1 100644 --- a/madminer/ml/parameterized_ratio.py +++ b/madminer/ml/parameterized_ratio.py @@ -1,16 +1,14 @@ -from __future__ import absolute_import, division, print_function - import logging import numpy as np -from collections import OrderedDict import torch +from collections import OrderedDict -from ..utils.ml.models.ratio import DenseSingleParameterizedRatioModel +from .base import ConditionalEstimator, TheresAGoodReasonThisDoesntWork from ..utils.ml.eval import evaluate_ratio_model +from ..utils.ml.models.ratio import DenseSingleParameterizedRatioModel +from ..utils.ml.trainer import SingleParameterizedRatioTrainer from ..utils.ml.utils import get_optimizer, get_loss from ..utils.various import load_and_check, shuffle, restrict_samplesize -from ..utils.ml.trainer import SingleParameterizedRatioTrainer -from .base import ConditionalEstimator, TheresAGoodReasonThisDoesntWork try: FileNotFoundError diff --git a/madminer/ml/score.py b/madminer/ml/score.py index 03142db16..e420c4948 100644 --- a/madminer/ml/score.py +++ b/madminer/ml/score.py @@ -1,16 +1,13 @@ -from __future__ import absolute_import, division, print_function - import logging import numpy as np from collections import OrderedDict -from ..utils.ml.models.score import DenseLocalScoreModel +from .base import Estimator, TheresAGoodReasonThisDoesntWork from ..utils.ml.eval import evaluate_local_score_model -from ..utils.ml.utils import get_optimizer, get_loss -from ..utils.various import load_and_check, shuffle, restrict_samplesize -from ..utils.various import separate_information_blocks +from ..utils.ml.models.score import DenseLocalScoreModel from ..utils.ml.trainer import LocalScoreTrainer -from .base import Estimator, TheresAGoodReasonThisDoesntWork +from ..utils.ml.utils import get_optimizer, get_loss +from ..utils.various import load_and_check, shuffle, restrict_samplesize, separate_information_blocks try: FileNotFoundError From 3b53a5e763f1fc2a145ad2e783ee754666b447ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 10:40:51 -0400 Subject: [PATCH 38/65] src: ML module legacy exceptions --- madminer/ml/base.py | 4 ---- madminer/ml/double_parameterized_ratio.py | 4 ---- madminer/ml/ensemble.py | 5 ----- madminer/ml/likelihood.py | 4 ---- madminer/ml/morphing_aware.py | 4 ---- madminer/ml/parameterized_ratio.py | 4 ---- madminer/ml/score.py | 4 ---- 7 files changed, 29 deletions(-) diff --git a/madminer/ml/base.py b/madminer/ml/base.py index 56c646861..feb5698ce 100644 --- a/madminer/ml/base.py +++ b/madminer/ml/base.py @@ -6,10 +6,6 @@ from ..utils.various import create_missing_folders, load_and_check -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError logger = logging.getLogger(__name__) diff --git a/madminer/ml/double_parameterized_ratio.py b/madminer/ml/double_parameterized_ratio.py index bcb4f6151..d0415d67b 100644 --- a/madminer/ml/double_parameterized_ratio.py +++ b/madminer/ml/double_parameterized_ratio.py @@ -9,10 +9,6 @@ from ..utils.ml.utils import get_optimizer, get_loss from ..utils.various import load_and_check, shuffle, restrict_samplesize -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError logger = logging.getLogger(__name__) diff --git a/madminer/ml/ensemble.py b/madminer/ml/ensemble.py index eb997fa9c..415ac51db 100644 --- a/madminer/ml/ensemble.py +++ b/madminer/ml/ensemble.py @@ -10,11 +10,6 @@ from .score import ScoreEstimator -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError - logger = logging.getLogger(__name__) diff --git a/madminer/ml/likelihood.py b/madminer/ml/likelihood.py index 4a405f96c..59e4dcf0d 100644 --- a/madminer/ml/likelihood.py +++ b/madminer/ml/likelihood.py @@ -10,10 +10,6 @@ from ..utils.ml.utils import get_optimizer, get_loss from ..utils.various import load_and_check, shuffle, restrict_samplesize -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError logger = logging.getLogger(__name__) diff --git a/madminer/ml/morphing_aware.py b/madminer/ml/morphing_aware.py index 5c33780fa..4cb947a12 100644 --- a/madminer/ml/morphing_aware.py +++ b/madminer/ml/morphing_aware.py @@ -6,10 +6,6 @@ from ..utils.ml.models.ratio import DenseMorphingAwareRatioModel, DenseQuadraticMorphingAwareRatioModel from ..utils.morphing import PhysicsMorpher -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError logger = logging.getLogger(__name__) diff --git a/madminer/ml/parameterized_ratio.py b/madminer/ml/parameterized_ratio.py index a78f80bd1..a189a9512 100644 --- a/madminer/ml/parameterized_ratio.py +++ b/madminer/ml/parameterized_ratio.py @@ -10,10 +10,6 @@ from ..utils.ml.utils import get_optimizer, get_loss from ..utils.various import load_and_check, shuffle, restrict_samplesize -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError logger = logging.getLogger(__name__) diff --git a/madminer/ml/score.py b/madminer/ml/score.py index e420c4948..e199c7031 100644 --- a/madminer/ml/score.py +++ b/madminer/ml/score.py @@ -9,10 +9,6 @@ from ..utils.ml.utils import get_optimizer, get_loss from ..utils.various import load_and_check, shuffle, restrict_samplesize, separate_information_blocks -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError logger = logging.getLogger(__name__) From 70030fd8ab97c8db3b65a7bdf799efdcb1f76ddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 10:59:48 -0400 Subject: [PATCH 39/65] src: plotting module f-strings --- madminer/plotting/distributions.py | 8 ++++---- madminer/plotting/fisherinformation.py | 5 +++-- madminer/plotting/limits.py | 12 ++++++------ madminer/plotting/uncertainties.py | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/madminer/plotting/distributions.py b/madminer/plotting/distributions.py index 7bc64f001..68584dcfc 100644 --- a/madminer/plotting/distributions.py +++ b/madminer/plotting/distributions.py @@ -148,7 +148,7 @@ def plot_distributions( n_parameter_points = len(parameter_points) if colors is None: - colors = ["C" + str(i) for i in range(10)] * (n_parameter_points // 10 + 1) + colors = [f"C{i}" for i in range(10)] * (n_parameter_points // 10 + 1) elif not isinstance(colors, list): colors = [colors for _ in range(n_parameter_points)] @@ -421,11 +421,11 @@ def plot_histograms( # Basic setup n_histos = len(histos) dim = len(histos[0].edges) - assert dim in [1, 2], "Only 1- or 2-dimensional histograms are supported, but found {} dimensions".format(dim) + assert dim in [1, 2], f"Only 1- or 2-dimensional histograms are supported, but found {dim} dimensions" # Defaults if colors is None: - colors = ["C" + str(i) for i in range(10)] * (n_histos // 10 + 1) + colors = [f"C{i}" for i in range(10)] * (n_histos // 10 + 1) elif not isinstance(colors, list): colors = [colors for _ in range(n_histos)] if linestyles is None: @@ -435,7 +435,7 @@ def plot_histograms( if not isinstance(linewidths, list): linewidths = [linewidths for _ in range(n_histos)] if histo_labels is None: - histo_labels = ["Histogram {}".format(i + 1) for i in range(n_histos)] + histo_labels = [f"Histogram {i+1}" for i in range(n_histos)] # 1D plot if dim == 1: diff --git a/madminer/plotting/fisherinformation.py b/madminer/plotting/fisherinformation.py index 705e32729..eaf8ed91e 100644 --- a/madminer/plotting/fisherinformation.py +++ b/madminer/plotting/fisherinformation.py @@ -105,7 +105,8 @@ def plot_fisher_information_contours_2d( if fisher_information_matrices.shape != (n_matrices, 2, 2): raise RuntimeError( - "Fisher information matrices have shape {}, not (n, 2,2)!".format(fisher_information_matrices.shape) + f"Fisher information matrices have shape {fisher_information_matrices.shape}. " + f"Not (n, 2,2)!" ) if fisher_information_covariances is None: @@ -301,7 +302,7 @@ def plot_fisherinfo_barplot( bar_colors_light = bar_colors if eigenvalue_colors is None: - eigenvalue_colors = ["C{}".format(str(i)) for i in range(10)] + eigenvalue_colors = [f"C{i}" for i in range(10)] eigenvalue_linewidth = 1.5 # Upper plot diff --git a/madminer/plotting/limits.py b/madminer/plotting/limits.py index 4acc2797f..6d78d7524 100644 --- a/madminer/plotting/limits.py +++ b/madminer/plotting/limits.py @@ -134,20 +134,20 @@ def plot_pvalue_limits( cmap="Greys_r", ) cbar = fig.colorbar(pcm, ax=ax, extend="both") - cbar.set_label("Expected p-value ({})".format(labels[show_index])) + cbar.set_label(f"Expected p-value ({labels[show_index]})") for ipanel in range(len(p_values)): ax.contour( xcenters, ycenters, p_values[ipanel].reshape((grid_resolutions[0], grid_resolutions[1])).T, levels=levels, - colors="C{}".format(ipanel), + colors=f"C{ipanel}", ) ax.scatter( theta_grid[best_fits[ipanel]][0], theta_grid[best_fits[ipanel]][1], s=80.0, - color="C{}".format(ipanel), + color=f"C{ipanel}", marker="*", label=labels[ipanel], ) @@ -167,19 +167,19 @@ def plot_pvalue_limits( cmap="Greys_r", ) cbar = fig.colorbar(pcm, ax=ax, extend="both") - cbar.set_label("Expected p-value ({})".format(labels[ipanel])) + cbar.set_label(f"Expected p-value ({labels[ipanel]})") ax.contour( xcenters, ycenters, p_values[ipanel].reshape((grid_resolutions[0], grid_resolutions[1])).T, levels=levels, - colors="C{}".format(ipanel), + colors=f"C{ipanel}", ) ax.scatter( theta_grid[best_fits[ipanel]][0], theta_grid[best_fits[ipanel]][1], s=80.0, - color="C{}".format(ipanel), + color=f"C{ipanel}", marker="*", label=labels[ipanel], ) diff --git a/madminer/plotting/uncertainties.py b/madminer/plotting/uncertainties.py index bb66e9a8d..247ff78f3 100644 --- a/madminer/plotting/uncertainties.py +++ b/madminer/plotting/uncertainties.py @@ -307,7 +307,7 @@ def plot_systematics( # Colors if bandcolors is None: - bandcolors = ["C{}".format(i) for i in range(10)] + bandcolors = [f"C{i}" for i in range(10)] # Load data sa = SampleAugmenter(filename, include_nuisance_parameters=True) From a7173779f0255ee1ff46bdffc70d966d0b7e9557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 11:04:27 -0400 Subject: [PATCH 40/65] src: plotting module improve fmt --- madminer/plotting/__init__.py | 2 +- madminer/plotting/distributions.py | 2 +- madminer/plotting/fisherinformation.py | 7 ++++++- madminer/plotting/uncertainties.py | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/madminer/plotting/__init__.py b/madminer/plotting/__init__.py index 6d52e9102..cef287c8e 100644 --- a/madminer/plotting/__init__.py +++ b/madminer/plotting/__init__.py @@ -1,9 +1,9 @@ from .distributions import plot_distributions, plot_histograms from .morphing import ( + plot_1d_morphing_basis, plot_2d_morphing_basis, plot_nd_morphing_basis_scatter, plot_nd_morphing_basis_slices, - plot_1d_morphing_basis, ) from .fisherinformation import ( plot_fisherinfo_barplot, diff --git a/madminer/plotting/distributions.py b/madminer/plotting/distributions.py index 68584dcfc..cda4e5b89 100644 --- a/madminer/plotting/distributions.py +++ b/madminer/plotting/distributions.py @@ -69,7 +69,7 @@ def plot_distributions( Whether the distribution is normalized to the total cross section. Default value: False. log : bool, optional - Whether to draw the y axes on a logarithmic scale. Defaul value: False. + Whether to draw the y axes on a logarithmic scale. Default value: False. observable_labels : None or list of (str or None), optional x-axis labels naming the observables. If None, the observable names from the MadMiner file are used. Default diff --git a/madminer/plotting/fisherinformation.py b/madminer/plotting/fisherinformation.py index eaf8ed91e..73f6091e4 100644 --- a/madminer/plotting/fisherinformation.py +++ b/madminer/plotting/fisherinformation.py @@ -92,6 +92,7 @@ def plot_fisher_information_contours_2d( ax: axes or None, optional Predefined axes as part of figure instead of standalone figure. Default: None + Returns ------- figure : Figure @@ -220,7 +221,11 @@ def plot_fisher_information_contours_2d( def plot_fisherinfo_barplot( - fisher_information_matrices, labels, determinant_indices=None, eigenvalue_colors=None, bar_colors=None + fisher_information_matrices, + labels, + determinant_indices=None, + eigenvalue_colors=None, + bar_colors=None, ): """ diff --git a/madminer/plotting/uncertainties.py b/madminer/plotting/uncertainties.py index 247ff78f3..f1a8f31b5 100644 --- a/madminer/plotting/uncertainties.py +++ b/madminer/plotting/uncertainties.py @@ -295,7 +295,7 @@ def plot_systematics( bandcolors : None or list of str, optional Error band colors. Default value: None. - ratio_range : tuple of two floar + ratio_range : tuple of two float y-axis range for the plots of the ratio to the central prediction. Default value: (0.8, 1.2). Returns From d3f1e970e6e2d1f0c0c3506eb166b5484da82003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 11:07:52 -0400 Subject: [PATCH 41/65] src: plotting module remove 'six' --- madminer/plotting/distributions.py | 4 +--- madminer/plotting/fisherinformation.py | 2 -- madminer/plotting/limits.py | 2 -- madminer/plotting/morphing.py | 2 -- madminer/plotting/uncertainties.py | 15 ++++++--------- 5 files changed, 7 insertions(+), 18 deletions(-) diff --git a/madminer/plotting/distributions.py b/madminer/plotting/distributions.py index cda4e5b89..24b1b5451 100644 --- a/madminer/plotting/distributions.py +++ b/madminer/plotting/distributions.py @@ -1,13 +1,11 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import matplotlib import numpy as np from matplotlib import pyplot as plt +from ..sampling import SampleAugmenter from ..utils.morphing import NuisanceMorpher from ..utils.various import shuffle, sanitize_array, mdot, weighted_quantile -from ..sampling import SampleAugmenter logger = logging.getLogger(__name__) diff --git a/madminer/plotting/fisherinformation.py b/madminer/plotting/fisherinformation.py index 73f6091e4..a73f1b24e 100644 --- a/madminer/plotting/fisherinformation.py +++ b/madminer/plotting/fisherinformation.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import matplotlib import numpy as np diff --git a/madminer/plotting/limits.py b/madminer/plotting/limits.py index 6d78d7524..8c8a9083a 100644 --- a/madminer/plotting/limits.py +++ b/madminer/plotting/limits.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import matplotlib import numpy as np diff --git a/madminer/plotting/morphing.py b/madminer/plotting/morphing.py index 408013274..43c4dba56 100644 --- a/madminer/plotting/morphing.py +++ b/madminer/plotting/morphing.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import matplotlib import numpy as np diff --git a/madminer/plotting/uncertainties.py b/madminer/plotting/uncertainties.py index f1a8f31b5..30f4b7e44 100644 --- a/madminer/plotting/uncertainties.py +++ b/madminer/plotting/uncertainties.py @@ -1,13 +1,10 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six import logging import numpy as np from matplotlib import pyplot as plt, gridspec +from ..sampling import SampleAugmenter from ..utils.morphing import NuisanceMorpher from ..utils.various import mdot, shuffle, sanitize_array -from ..sampling import SampleAugmenter logger = logging.getLogger(__name__) @@ -120,7 +117,7 @@ def plot_uncertainty( # Restrict nuisance parameters if systematics is not None: nuisance_parameters = [] - for npar, (npar_syst, _, _) in six.iteritems(sa.nuisance_parameters): + for npar, (npar_syst, _, _) in sa.nuisance_parameters.items(): if npar_syst in systematics: nuisance_parameters.append(npar) @@ -342,7 +339,7 @@ def plot_systematics( # Systematics n_systematics = len(sa.systematics) + 1 - labels = list(six.iterkeys(sa.systematics)) + ["combined"] + labels = list(sa.systematics.keys()) + ["combined"] # Nuisance parameters n_nuisance_params = sa.n_nuisance_parameters @@ -351,11 +348,11 @@ def plot_systematics( nuisance_toys = nuisance_toys.reshape(n_systematics, n_toys, n_nuisance_params) # Restrict nuisance parameters - all_nuisance_parameters = list(six.iterkeys(sa.nuisance_parameters)) - for i_syst, syst_name in enumerate(six.iterkeys(sa.systematics)): + all_nuisance_parameters = list(sa.nuisance_parameters.keys()) + for i_syst, syst_name in enumerate(sa.systematics.keys()): n_used = n_nuisance_params used_nuisance_parameters = [] - for npar, (npar_syst, _, _) in six.iteritems(sa.nuisance_parameters): + for npar, (npar_syst, _, _) in sa.nuisance_parameters.items(): if npar_syst == syst_name: used_nuisance_parameters.append(npar) From 96fd583bd3e358059e0f2a4a5ec3ce189e5c269a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 11:24:09 -0400 Subject: [PATCH 42/65] src: sampling module f-strings --- madminer/sampling/combine.py | 3 +- madminer/sampling/sampleaugmenter.py | 74 ++++++++++++++-------------- 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/madminer/sampling/combine.py b/madminer/sampling/combine.py index 3b4281227..4efc53b5b 100644 --- a/madminer/sampling/combine.py +++ b/madminer/sampling/combine.py @@ -78,7 +78,8 @@ def combine_and_shuffle( if len(input_filenames) != len(k_factors): raise RuntimeError( - "Inconsistent length of input filenames and k factors: %s vs %s", len(input_filenames), len(k_factors) + f"Inconsistent length of input filenames and k factors: " + f"{len(input_filenames)} vs {len(k_factors)}" ) # Copy first file to output_filename diff --git a/madminer/sampling/sampleaugmenter.py b/madminer/sampling/sampleaugmenter.py index e16a0d228..6ed33a403 100644 --- a/madminer/sampling/sampleaugmenter.py +++ b/madminer/sampling/sampleaugmenter.py @@ -182,8 +182,8 @@ def sample_train_plain( # Save data if filename is not None and folder is not None: - np.save(folder + "/theta_" + filename + ".npy", theta) - np.save(folder + "/x_" + filename + ".npy", x) + np.save(f"{folder}/theta_{filename}.npy", theta) + np.save(f"{folder}/x_{filename}.npy", x) return x, theta, min(effective_n_samples) @@ -327,9 +327,9 @@ def sample_train_local( # Save data if filename is not None and folder is not None: - np.save(folder + "/theta_" + filename + ".npy", theta) - np.save(folder + "/x_" + filename + ".npy", x) - np.save(folder + "/t_xz_" + filename + ".npy", t_xz) + np.save(f"{folder}/theta_{filename}.npy", theta) + np.save(f"{folder}/x_{filename}.npy", x) + np.save(f"{folder}/t_xz_{filename}.npy", t_xz) return x, theta, t_xz, min(effective_n_samples) @@ -699,13 +699,13 @@ def sample_train_ratio( # Save data if filename is not None and folder is not None: - np.save(folder + "/theta0_" + filename + ".npy", theta0) - np.save(folder + "/theta1_" + filename + ".npy", theta1) - np.save(folder + "/x_" + filename + ".npy", x) - np.save(folder + "/y_" + filename + ".npy", y) - np.save(folder + "/r_xz_" + filename + ".npy", r_xz) + np.save(f"{folder}/theta0_{filename}.npy", theta0) + np.save(f"{folder}/theta1_{filename}.npy", theta1) + np.save(f"{folder}/x_{filename}.npy", x) + np.save(f"{folder}/y_{filename}.npy", y) + np.save(f"{folder}/r_xz_{filename}.npy", r_xz) if self.morpher is not None: - np.save(folder + "/t_xz_" + filename + ".npy", t_xz) + np.save(f"{folder}/t_xz_{filename}.npy", t_xz) if not return_individual_n_effective: n_effective = np.min(n_effective) @@ -1029,13 +1029,13 @@ def sample_train_more_ratios( # Save data if filename is not None and folder is not None: - np.save(folder + "/theta0_" + filename + ".npy", theta0) - np.save(folder + "/theta1_" + filename + ".npy", theta1) - np.save(folder + "/x_" + filename + ".npy", x) - np.save(folder + "/y_" + filename + ".npy", y) - np.save(folder + "/r_xz_" + filename + ".npy", r_xz) - np.save(folder + "/t_xz0_" + filename + ".npy", t_xz0) - np.save(folder + "/t_xz1_" + filename + ".npy", t_xz1) + np.save(f"{folder}/theta0_{filename}.npy", theta0) + np.save(f"{folder}/theta1_{filename}.npy", theta1) + np.save(f"{folder}/x_{filename}.npy", x) + np.save(f"{folder}/y_{filename}.npy", y) + np.save(f"{folder}/r_xz_{filename}.npy", r_xz) + np.save(f"{folder}/t_xz0_{filename}.npy", t_xz0) + np.save(f"{folder}/t_xz1_{filename}.npy", t_xz1) return x, theta0, theta1, y, r_xz, t_xz0, t_xz1, min(min(n_effective_samples_0), min(n_effective_samples_1)) @@ -1146,8 +1146,8 @@ def sample_test( # Save data if filename is not None and folder is not None: - np.save(folder + "/theta_" + filename + ".npy", theta) - np.save(folder + "/x_" + filename + ".npy", x) + np.save(f"{folder}/theta_{filename}.npy", theta) + np.save(f"{folder}/x_{filename}.npy", x) return x, theta, min(n_effective_samples) @@ -1675,9 +1675,8 @@ def _sample_set( # Check that we got 'em all, otherwise repeat if not np.all(done): logger.debug( - " After full pass through event files, {} / {} samples not found, with u = {}".format( - np.sum(np.invert(done)), done.size, u[np.invert(done)] - ) + f" After full pass through event files, {np.sum(np.invert(done))} / {done.size} " + f"samples not found, with u = {u[np.invert(done)]}" ) n_eff_samples = 1.0 / max(1.0e-12, largest_event_probability) @@ -1716,7 +1715,7 @@ def _calculate_augmented_data( score = score.T # (n_samples, n_gradients) augmented_data.append(score) else: - raise ValueError("Unknown augmented data type {}".format(definition[0])) + raise ValueError(f"Unknown augmented data type {definition[0]}") return augmented_data @@ -1815,11 +1814,11 @@ def _parse_theta(theta, n_samples): prior_std = prior[2] thetas_out.append(np.random.normal(loc=prior_mean, scale=prior_std, size=n_benchmarks)) else: - raise ValueError("Unknown prior {}".format(prior)) + raise ValueError(f"Unknown prior {prior}") thetas_out = np.array(thetas_out).T else: - raise ValueError("Unknown theta specification {}".format(theta)) + raise ValueError(f"Unknown theta specification {theta}") return thetas_out, n_samples_per_theta @@ -1861,18 +1860,18 @@ def _parse_nu(self, nu, n_thetas): prior_std = prior[2] nu_out.append(np.random.normal(loc=prior_mean, scale=prior_std, size=n_thetas)) else: - raise ValueError("Unknown prior {}".format(prior)) + raise ValueError(f"Unknown prior {prior}") nu_out = np.array(nu_out).T else: - raise ValueError("Unknown nu specification {}".format(nu)) + raise ValueError(f"Unknown nu specification {nu}") return nu_out @staticmethod def _build_sets(thetas, nus): if len(nus) != len(thetas): - raise RuntimeError("Mismatching thetas and nus: {} vs {}".format(len(thetas), len(nus))) + raise RuntimeError(f"Mismatching thetas and nus: {len(thetas)} vs {len(nus)}") n_sets = max([len(param) for param in thetas + nus]) sets = [[] for _ in range(n_sets)] @@ -1883,9 +1882,8 @@ def _build_sets(thetas, nus): if n_theta_sets_before <= 0 or n_nu_sets_before <= 0: raise RuntimeError( - ( - "Inconsistent number of sets in _build_sets: thetas = {}, nus = {}, theta = {}, " "nu = {}" - ).format(thetas, nus, theta, nu) + f"Inconsistent number of sets in _build_sets: " + f"thetas = {thetas}, nus = {nus}, theta = {theta}, nu = {nu}" ) for i_set in range(n_sets): @@ -1900,19 +1898,19 @@ def _format_sampling(theta): elif theta[0] == "morphing_point": return str(theta[1]) elif theta[0] == "benchmarks": - return "{} benchmarks, starting with {}".format(len(theta[1]), theta[1][:3]) + return f"{len(theta[1])} benchmarks, starting with {theta[1][:3]}" elif theta[0] == "morphing_points": - return "{} morphing points, starting with {}".format(len(theta[1]), theta[1][:3]) + return f"{len(theta[1])} morphing points, starting with {theta[1][:3]}" elif theta[0] == "random_morphing_points": prior_str = "" for i, (type_, arg0, arg1) in enumerate(theta[1][1]): prior_str += "\n" if type_ == "gaussian": - prior_str += " theta_{} ~ Gaussian with mean {} and std {}".format(i, arg0, arg1) + prior_str += f" theta_{i} ~ Gaussian with mean {arg0} and std {arg1}" elif type_ == "flat": - prior_str += " theta_{} ~ flat from {} to {}".format(i, arg0, arg1) + prior_str += f" theta_{i} ~ flat from {arg0} to {arg1}" if theta[1][0] is None: - return "Maximally many random morphing points, drawn from the following priors:{}".format(prior_str) + return f"Maximally many random morphing points, drawn from the following priors: {prior_str}" else: - return "{} random morphing points, drawn from the following priors:{}".format(theta[1][0], prior_str) + return f"{theta[1][0]} random morphing points, drawn from the following priors: {prior_str}" From d12dd2e96c4d36a0edddbbe466ac85f328fa2f85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 11:34:31 -0400 Subject: [PATCH 43/65] src: sampling module improve fmt --- madminer/sampling/combine.py | 8 +++++++- madminer/sampling/parameters.py | 4 ++-- madminer/sampling/sampleaugmenter.py | 27 ++++++++++++++++----------- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/madminer/sampling/combine.py b/madminer/sampling/combine.py index 4efc53b5b..fdce943e1 100644 --- a/madminer/sampling/combine.py +++ b/madminer/sampling/combine.py @@ -14,15 +14,21 @@ def _calculate_n_events(sampling_ids, n_benchmarks): if sampling_ids is None: return None, None + unique, counts = np.unique(sampling_ids, return_counts=True) results = dict(zip(unique, counts)) + n_events_backgrounds = results.get(-1, 0) n_events_signal_per_benchmark = np.array([results.get(i, 0) for i in range(n_benchmarks)], dtype=np.int) return n_events_signal_per_benchmark, n_events_backgrounds def combine_and_shuffle( - input_filenames, output_filename, k_factors=None, overwrite_existing_file=True, recalculate_header=True + input_filenames, + output_filename, + k_factors=None, + overwrite_existing_file=True, + recalculate_header=True, ): """ Combines multiple MadMiner files into one, and shuffles the order of the events. diff --git a/madminer/sampling/parameters.py b/madminer/sampling/parameters.py index 4a693e914..3e495ac76 100644 --- a/madminer/sampling/parameters.py +++ b/madminer/sampling/parameters.py @@ -109,7 +109,7 @@ def random_morphing_points(n_thetas, priors): def iid_nuisance_parameters(shape="gaussian", param0=0.0, param1=1.0): """ Utility function to be used as input to various SampleAugmenter functions, specifying that nuisance parameters are - fixed at their nominal valuees. + fixed at their nominal values. Parameters ---------- @@ -134,7 +134,7 @@ def iid_nuisance_parameters(shape="gaussian", param0=0.0, param1=1.0): def nominal_nuisance_parameters(): """ Utility function to be used as input to various SampleAugmenter functions, specifying that nuisance parameters are - fixed at their nominal valuees. + fixed at their nominal values. Returns ------- diff --git a/madminer/sampling/sampleaugmenter.py b/madminer/sampling/sampleaugmenter.py index 6ed33a403..0f272fc2c 100644 --- a/madminer/sampling/sampleaugmenter.py +++ b/madminer/sampling/sampleaugmenter.py @@ -22,8 +22,8 @@ class SampleAugmenter(DataAnalyzer): (related) tasks: unweighting, i.e. the creation of samples that do not carry individual weights but follow some distribution, and the extraction of the joint likelihood ratio and / or joint score (the "augmented data"). - After inializing `SampleAugmenter` with the filename of a MadMiner file, this is done with a single function call. - Depending on the downstream infference algorithm, there are different possibilities: + After initializing `SampleAugmenter` with the filename of a MadMiner file, this is done with a single function call. + Depending on the downstream inference algorithm, there are different possibilities: * `SampleAugmenter.sample_train_plain()` creates plain training samples without augmented data. * `SampleAugmenter.sample_train_local()` creates training samples for local methods based on the score, @@ -685,6 +685,7 @@ def sample_train_ratio( t_xz = np.vstack([t_xz0, t_xz1]) else: t_xz = None + theta0 = np.vstack([theta0_0, theta0_1]) theta1 = np.vstack([theta1_0, theta1_1]) y = np.zeros(x.shape[0]) @@ -1168,7 +1169,7 @@ def cross_sections(self, theta, nu=None): Tuple (type, value) that defines the nuisance parameter point or prior over nuisance parameter points at which the cross section is calculated. Pass the output of the functions `benchmark()`, `benchmarks()`, `morphing_point()`, `morphing_points()`, or - `random_morphing_points()`. Default valuee: None. + `random_morphing_points()`. Default value: None. Returns ------- @@ -1247,17 +1248,20 @@ def _sample( If True, any joint score in the augmented data definitions is also calculated with respect to the nuisance parameters. Default value: True. - use_train_events : bool, optional - Decides whether to use the train or test split of the events. Default value: True. + partition : {"train", "test", "validation", "all"}, optional + Which event partition to use. Default value: "train". test_split : float or None, optional Fraction of events reserved for the evaluation sample (that will not be used for any training samples). Default value: 0.2. + validation_split : float or None, optional + Fraction of events reserved for testing. Default value: 0.2. + n_processes : None or int, optional If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case, - n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value: - 1. + n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. + Default value: 1. update_patience : float, optional Wait time (in s) between log update checks if n_workers > 1 (or None). Default value: 0.01 @@ -1271,6 +1275,9 @@ def _sample( and ignore other events. This can help to reduce statistical effects caused by a small number of events with very large weights obtained by the morphing procedure. Default value: None + double_precision : bool, optional + Use double floating-point precision. Default value: False. + Returns ------- x : ndarray @@ -1720,8 +1727,7 @@ def _calculate_augmented_data( return augmented_data def _combine_thetas_nus(self, all_thetas, all_nus): - n_thetas = len(all_thetas) - assert n_thetas == len(all_nus) + assert len(all_thetas) == len(all_nus) # all_nus is a list of a list of (None or ndarray) # Figure out if there's anything nontrivial in there @@ -1870,8 +1876,7 @@ def _parse_nu(self, nu, n_thetas): @staticmethod def _build_sets(thetas, nus): - if len(nus) != len(thetas): - raise RuntimeError(f"Mismatching thetas and nus: {len(thetas)} vs {len(nus)}") + assert len(thetas) == len(nus) n_sets = max([len(param) for param in thetas + nus]) sets = [[] for _ in range(n_sets)] From 60d4607c67dc44c67ebe5236f556e370dd9da696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 11:35:32 -0400 Subject: [PATCH 44/65] src: sampling module remove 'future' --- madminer/sampling/combine.py | 2 -- madminer/sampling/parameters.py | 2 -- madminer/sampling/sampleaugmenter.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/madminer/sampling/combine.py b/madminer/sampling/combine.py index fdce943e1..45f120a0c 100644 --- a/madminer/sampling/combine.py +++ b/madminer/sampling/combine.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np diff --git a/madminer/sampling/parameters.py b/madminer/sampling/parameters.py index 3e495ac76..226e18ad2 100644 --- a/madminer/sampling/parameters.py +++ b/madminer/sampling/parameters.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np diff --git a/madminer/sampling/sampleaugmenter.py b/madminer/sampling/sampleaugmenter.py index 0f272fc2c..0db78af4b 100644 --- a/madminer/sampling/sampleaugmenter.py +++ b/madminer/sampling/sampleaugmenter.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import time import logging import numpy as np From 68fc6fd52317104f5e10671456ad2bfba2b155ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 24 Sep 2020 11:17:59 -0400 Subject: [PATCH 45/65] src: utils module f-strings --- madminer/utils/histo.py | 5 ++--- madminer/utils/morphing.py | 5 ++--- madminer/utils/various.py | 14 +++++++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/madminer/utils/histo.py b/madminer/utils/histo.py index 83ff2ba7a..78c870420 100644 --- a/madminer/utils/histo.py +++ b/madminer/utils/histo.py @@ -36,9 +36,8 @@ def __init__(self, x, weights=None, bins=20, epsilon=0.0): if weights is not None: weights = weights.flatten() - assert weights.shape == (self.n_samples,), "Inconsistent weight shape {} should be {}".format( - weights.shape, (self.n_samples,) - ) + assert weights.shape == (self.n_samples,), \ + f"Inconsistent weight shape {weights.shape} should be {(self.n_samples,)}" logger.debug("Creating histogram:") logger.debug(" Samples: %s", self.n_samples) diff --git a/madminer/utils/morphing.py b/madminer/utils/morphing.py index f685de47c..e3f268c83 100644 --- a/madminer/utils/morphing.py +++ b/madminer/utils/morphing.py @@ -142,9 +142,8 @@ def find_components(self, max_overall_power=4): for max_power in self.parameter_max_power: if n_regions != len(max_power): raise RuntimeError( - "Parameters have different number of partitions of max powers: {} {}".format( - max_overall_power, self.parameter_max_power - ) + f"Parameters have different number of partitions of max powers: " + f"{max_overall_power} vs {self.parameter_max_power}" ) # Go through regions and finds components for each diff --git a/madminer/utils/various.py b/madminer/utils/various.py index 9c2b4a308..7fb8387ff 100644 --- a/madminer/utils/various.py +++ b/madminer/utils/various.py @@ -25,7 +25,8 @@ def call_command(cmd, log_file=None, return_std=False): if exitcode != 0: raise RuntimeError( - "Calling command {} returned exit code {}. Output in file {}.".format(cmd, exitcode, log_file) + f"Calling command {cmd} returned exit code {exitcode}. " + f"Output in file {log_file}." ) else: proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) @@ -34,9 +35,12 @@ def call_command(cmd, log_file=None, return_std=False): if exitcode != 0: raise RuntimeError( - "Calling command {} returned exit code {}.\n\nStd output:\n\n{}Error output:\n\n{}".format( - cmd, exitcode, out, err - ) + f"Calling command {cmd} returned exit code {exitcode}." + f"\n\n" + f"Std output: {out}" + f"\n\n" + f"Error output: {err}" + f"\n\n" ) if return_std: @@ -63,7 +67,7 @@ def create_missing_folders(folders): os.makedirs(folder) elif not os.path.isdir(folder): - raise OSError("Path {} exists, but is no directory!".format(folder)) + raise OSError(f"Path {folder} exists, but is no directory!") def format_benchmark(parameters, precision=2): From 962d978b204da151e82c99e480a4e2935c73c7e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 24 Sep 2020 11:25:41 -0400 Subject: [PATCH 46/65] src: utils module improve fmt --- madminer/utils/histo.py | 21 ++++++++++++++++++--- madminer/utils/morphing.py | 20 ++++++-------------- madminer/utils/various.py | 4 +++- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/madminer/utils/histo.py b/madminer/utils/histo.py index 78c870420..449b4032b 100644 --- a/madminer/utils/histo.py +++ b/madminer/utils/histo.py @@ -123,9 +123,20 @@ def _adaptive_binning(x, n_bins, weights=None, lower_cutoff_percentile=0.1, uppe def _fit(self, x, weights=None, epsilon=0.0): # Fill histograms ranges = [(edges[0], edges[-1]) for edges in self.edges] - histo, _ = np.histogramdd(x, bins=self.edges, range=ranges, normed=False, weights=weights) + + histo, _ = np.histogramdd( + x, + bins=self.edges, + range=ranges, + normed=False, + weights=weights, + ) histo_w2, _ = np.histogramdd( - x, bins=self.edges, range=ranges, normed=False, weights=None if weights is None else weights ** 2 + x, + bins=self.edges, + range=ranges, + normed=False, + weights=None if weights is None else weights ** 2, ) # Uncertainties @@ -154,10 +165,12 @@ def _fit(self, x, weights=None, epsilon=0.0): axis_edges[-1], axis_edges[-1] + 2.0 * (axis_edges[-1] - axis_edges[-2]) ) # Last bin is treated as at most twice as big as second-to-last modified_histo_edges.append(axis_edges) + # Calculate cell volumes bin_widths = [axis_edges[1:] - axis_edges[:-1] for axis_edges in modified_histo_edges] shape = tuple(self.n_bins) volumes = np.ones(shape) + for obs in range(self.n_observables): # Broadcast bin widths to array with shape like volumes bin_widths_broadcasted = np.ones(shape) @@ -183,7 +196,9 @@ def _report_binning(self): def _report_uncertainties(self): rel_uncertainties = np.where( - self.histo.flatten() > 0.0, self.histo_uncertainties.flatten() / self.histo.flatten(), np.nan + self.histo.flatten() > 0.0, + self.histo_uncertainties.flatten() / self.histo.flatten(), + np.nan, ) if np.nanmax(rel_uncertainties) > 0.5: logger.debug( diff --git a/madminer/utils/morphing.py b/madminer/utils/morphing.py index e3f268c83..58f83af53 100644 --- a/madminer/utils/morphing.py +++ b/madminer/utils/morphing.py @@ -103,7 +103,6 @@ def set_components(self, components): scales a given component. For instance, a typical signal, interference, background situation with one parameter might be described by the components [[2], [1], [0]]. - Returns ------- None @@ -227,7 +226,7 @@ def optimize_basis( ): """ - Optimizes the morphing basis. If either fixed_benchmarks_from_maxminer or fixed_benchmarks_numpy are not + Optimizes the morphing basis. If either fixed_benchmarks_from_madminer or fixed_benchmarks_numpy are not None, then these will be used as fixed basis points and only the remaining part of the basis will be optimized. Parameters @@ -345,7 +344,6 @@ def calculate_morphing_matrix(self, basis=None): morphing_matrix : ndarray Morphing matrix with shape `(n_basis_benchmarks, n_components)` - """ # Check all data is there @@ -394,9 +392,7 @@ def calculate_morphing_matrix(self, basis=None): morphing_submatrix = morphing_submatrix.T morphing_matrix[i * n_benchmarks_this_basis : (i + 1) * n_benchmarks_this_basis] = morphing_submatrix - morphing_matrix = morphing_matrix.T - - return morphing_matrix + return morphing_matrix.T def calculate_morphing_weights(self, theta, basis=None, morphing_matrix=None): @@ -454,9 +450,7 @@ def calculate_morphing_weights(self, theta, basis=None, morphing_matrix=None): component_weights = np.array(component_weights) # Transform to basis weights - weights = morphing_matrix.T.dot(component_weights) - - return weights + return morphing_matrix.T.dot(component_weights) def calculate_morphing_weight_gradient(self, theta, basis=None, morphing_matrix=None): @@ -522,11 +516,8 @@ def calculate_morphing_weight_gradient(self, theta, basis=None, morphing_matrix= component_weight_gradients[c, i] = factor # Transform to basis weights - weight_gradients = morphing_matrix.T.dot( - component_weight_gradients - ).T # Shape (n_parameters, n_benchmarks_phys) - - return weight_gradients + # Shape (n_parameters, n_benchmarks_phys) + return morphing_matrix.T.dot(component_weight_gradients).T def evaluate_morphing(self, basis=None, morphing_matrix=None, n_test_thetas=100, return_weights_and_thetas=False): @@ -663,6 +654,7 @@ def __init__(self, nuisance_parameters_from_madminer, benchmark_names, reference self.i_benchmarks_pos = [] self.i_benchmarks_neg = [] self.degrees = [] + for key, value in six.iteritems(self.nuisance_parameters): self.i_benchmarks_pos.append(benchmark_names.index(value[1])) if value[2] is None: diff --git a/madminer/utils/various.py b/madminer/utils/various.py index 7fb8387ff..62abb0763 100644 --- a/madminer/utils/various.py +++ b/madminer/utils/various.py @@ -112,13 +112,13 @@ def shuffle(*arrays): shuffled_a = a[permutation] shuffled_arrays.append(shuffled_a) - a = None return shuffled_arrays def restrict_samplesize(n, *arrays): restricted_arrays = [] + for i, a in enumerate(arrays): if a is None: restricted_arrays.append(None) @@ -265,8 +265,10 @@ def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False # Input values = np.array(values, dtype=np.float64) quantiles = np.array(quantiles) + if sample_weight is None: sample_weight = np.ones(len(values)) + sample_weight = np.array(sample_weight, dtype=np.float64) assert np.all(quantiles >= 0.0) and np.all(quantiles <= 1.0), "quantiles should be in [0, 1]" From 7e27064bd46bfb63e6584787a54cdd56d28590ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 24 Sep 2020 11:28:55 -0400 Subject: [PATCH 47/65] src: utils module remove 'six' --- madminer/utils/histo.py | 4 +--- madminer/utils/morphing.py | 13 +++++-------- madminer/utils/particle.py | 4 +--- madminer/utils/various.py | 19 ++++++++----------- 4 files changed, 15 insertions(+), 25 deletions(-) diff --git a/madminer/utils/histo.py b/madminer/utils/histo.py index 449b4032b..b68f341de 100644 --- a/madminer/utils/histo.py +++ b/madminer/utils/histo.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import numpy as np import logging +import numpy as np from madminer.utils.various import weighted_quantile logger = logging.getLogger(__name__) diff --git a/madminer/utils/morphing.py b/madminer/utils/morphing.py index 58f83af53..3538a39e8 100644 --- a/madminer/utils/morphing.py +++ b/madminer/utils/morphing.py @@ -1,11 +1,8 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six +import itertools import logging import numpy as np -from collections import OrderedDict -import itertools +from collections import OrderedDict from madminer.utils.various import sanitize_array logger = logging.getLogger(__name__) @@ -200,7 +197,7 @@ def set_basis(self, basis_from_madminer=None, basis_numpy=None, morphing_matrix= if basis_from_madminer is not None: self.basis = [] - for bname, benchmark_in in six.iteritems(basis_from_madminer): + for bname, benchmark_in in basis_from_madminer.items(): self.basis.append([benchmark_in[key] for key in self.parameter_names]) self.basis = np.array(self.basis) elif basis_numpy is not None: @@ -268,7 +265,7 @@ def optimize_basis( if fixed_benchmarks_from_madminer is not None: fixed_benchmarks = [] fixed_benchmark_names = [] - for bname, benchmark_in in six.iteritems(fixed_benchmarks_from_madminer): + for bname, benchmark_in in fixed_benchmarks_from_madminer.items(): fixed_benchmark_names.append(bname) fixed_benchmarks.append([benchmark_in[key] for key in self.parameter_names]) fixed_benchmarks = np.array(fixed_benchmarks) @@ -655,7 +652,7 @@ def __init__(self, nuisance_parameters_from_madminer, benchmark_names, reference self.i_benchmarks_neg = [] self.degrees = [] - for key, value in six.iteritems(self.nuisance_parameters): + for key, value in self.nuisance_parameters.items(): self.i_benchmarks_pos.append(benchmark_names.index(value[1])) if value[2] is None: self.degrees.append(1) diff --git a/madminer/utils/particle.py b/madminer/utils/particle.py index 56812cbe6..504674a56 100644 --- a/madminer/utils/particle.py +++ b/madminer/utils/particle.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -from skhep.math.vectors import LorentzVector import logging +from skhep.math.vectors import LorentzVector logger = logging.getLogger(__name__) diff --git a/madminer/utils/various.py b/madminer/utils/various.py index 62abb0763..b01f7bfc8 100644 --- a/madminer/utils/various.py +++ b/madminer/utils/various.py @@ -1,15 +1,12 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six import logging -import os -import stat -from subprocess import Popen, PIPE -import io +import gzip import numpy as np +import os import shutil +import stat + from contextlib import contextmanager -import gzip +from subprocess import Popen, PIPE logger = logging.getLogger(__name__) @@ -18,7 +15,7 @@ def call_command(cmd, log_file=None, return_std=False): if log_file is not None: - with io.open(log_file, "wb") as log: + with open(log_file, "wb") as log: proc = Popen(cmd, stdout=log, stderr=log, shell=True) _ = proc.communicate() exitcode = proc.returncode @@ -73,7 +70,7 @@ def create_missing_folders(folders): def format_benchmark(parameters, precision=2): output = "" - for i, (key, value) in enumerate(six.iteritems(parameters)): + for i, (key, value) in enumerate(parameters.items()): if i > 0: output += ", " @@ -160,7 +157,7 @@ def load_and_check(filename, warning_threshold=1.0e9, memmap_files_larger_than_g if filename is None: return None - if not isinstance(filename, six.string_types): + if not isinstance(filename, str): data = filename memmap = False else: From 51865398d49121f017b1a6ca249f8689d4123d3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 11:41:34 -0400 Subject: [PATCH 48/65] src: utils/interfaces module f-strings --- madminer/utils/interfaces/delphes.py | 15 +- madminer/utils/interfaces/delphes_root.py | 8 +- madminer/utils/interfaces/hepmc.py | 2 +- madminer/utils/interfaces/lhe.py | 18 +-- madminer/utils/interfaces/madminer_hdf5.py | 2 +- madminer/utils/interfaces/mg.py | 161 +++++++++------------ madminer/utils/interfaces/mg_cards.py | 43 +++--- 7 files changed, 109 insertions(+), 140 deletions(-) diff --git a/madminer/utils/interfaces/delphes.py b/madminer/utils/interfaces/delphes.py index c75c408a0..aeacc7d15 100644 --- a/madminer/utils/interfaces/delphes.py +++ b/madminer/utils/interfaces/delphes.py @@ -20,7 +20,7 @@ def run_delphes( ): """ Runs Delphes on a HepMC sample """ - # Untar event file + # Unzip event file filename, extension = os.path.splitext(hepmc_sample_filename) to_delete = None if extension == ".gz": @@ -38,9 +38,9 @@ def run_delphes( for i in range(1, 1000): if i == 1: - filename_candidate = filename_prefix + "_delphes.root" + filename_candidate = f"{filename_prefix}_delphes.root" else: - filename_candidate = filename_prefix + "_delphes_" + str(i) + ".root" + filename_candidate = f"{filename_prefix}_delphes_{i}.root" if not os.path.exists(filename_candidate): delphes_sample_filename = filename_candidate @@ -61,13 +61,14 @@ def run_delphes( # Call Delphes _ = call_command( - "{}{}/DelphesHepMC {} {} {}".format( - initial_command, delphes_directory, delphes_card_filename, delphes_sample_filename, hepmc_sample_filename - ), + f"{initial_command}{delphes_directory}/DelphesHepMC " + f"{delphes_card_filename} " + f"{delphes_sample_filename} " + f"{hepmc_sample_filename}", log_file=log_file, ) - # Delete untarred file + # Delete unzipped file if to_delete is not None: logger.debug("Deleting %s", to_delete) os.remove(to_delete) diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py index f8d15dfa8..ec5083985 100644 --- a/madminer/utils/interfaces/delphes_root.py +++ b/madminer/utils/interfaces/delphes_root.py @@ -285,10 +285,10 @@ def _get_particles_truth(tree, pt_min, eta_max, included_pdgids=None): def _get_particles_charged(tree, name, mass, pdgid_positive_charge, pt_min, eta_max): - pts = tree.array(name + ".PT") - etas = tree.array(name + ".Eta") - phis = tree.array(name + ".Phi") - charges = tree.array(name + ".Charge") + pts = tree.array(f"{name}.PT") + etas = tree.array(f"{name}.Eta") + phis = tree.array(f"{name}.Phi") + charges = tree.array(f"{name}.Charge") all_particles = [] diff --git a/madminer/utils/interfaces/hepmc.py b/madminer/utils/interfaces/hepmc.py index 338a46b3d..0a52c9611 100644 --- a/madminer/utils/interfaces/hepmc.py +++ b/madminer/utils/interfaces/hepmc.py @@ -9,7 +9,7 @@ def extract_weight_order(filename, default_weight_label=None): - # Untar event file + # Unzip event file new_filename, extension = os.path.splitext(filename) if extension == ".gz": if not os.path.exists(new_filename): diff --git a/madminer/utils/interfaces/lhe.py b/madminer/utils/interfaces/lhe.py index 4575b101e..c4733f584 100644 --- a/madminer/utils/interfaces/lhe.py +++ b/madminer/utils/interfaces/lhe.py @@ -273,7 +273,7 @@ def parse_lhe_file( elif isinstance(processing, float): output_weights[nuisance_benchmark0] = processing * weights_all_events[weight_name0] else: - raise RuntimeError("Unknown nuisance processiing {}".format(processing)) + raise RuntimeError(f"Unknown nuisance processing {processing}") # Store second benchmark associated with nuisance param if nuisance_benchmark1 is None or weight_name1 is None: @@ -283,7 +283,7 @@ def parse_lhe_file( elif isinstance(processing, float): output_weights[nuisance_benchmark1] = processing * weights_all_events[weight_name1] else: - raise RuntimeError("Unknown nuisance processing {}".format(processing)) + raise RuntimeError(f"Unknown nuisance processing {processing}") return observations_dict, output_weights @@ -532,8 +532,8 @@ def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_de syst_type = systematics_definition[0] if syst_type == "norm": - nuisance_param_name = "{}_nuisance_param_0".format(systematics_name) - benchmark_name = "{}_benchmark_0".format(nuisance_param_name) + nuisance_param_name = f"{systematics_name}_nuisance_param_0" + benchmark_name = f"{nuisance_param_name}_benchmark_0" nuisance_param_definition = (benchmark_name, None), (None, None), systematics_definition[1] return {nuisance_param_name: nuisance_param_definition} @@ -596,7 +596,7 @@ def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_de # Matching time! if approx_equal(weight_mur, mur) and approx_equal(weight_muf, muf): - benchmark_name = "{}_nuisance_param_0_benchmark_{}".format(systematics_name, k) + benchmark_name = f"{systematics_name}_nuisance_param_0_benchmark_{k}" nuisance_param_definition_parts.append((benchmark_name, weight_id)) break @@ -614,7 +614,7 @@ def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_de return {} else: # Output - nuisance_param_name = "{}_nuisance_param_0".format(systematics_name) + nuisance_param_name = f"{systematics_name}_nuisance_param_0" if len(nuisance_param_definition_parts) > 1: nuisance_dict = { nuisance_param_name: (nuisance_param_definition_parts[0], nuisance_param_definition_parts[1], None) @@ -657,8 +657,8 @@ def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_de logger.debug("Found PDF weight %s / %s", weight_id, weight_pdf) # Add every PDF Hessian direction to nuisance parameters - nuisance_param_name = "{}_nuisance_param_{}".format(systematics_name, i) - benchmark_name = "{}_benchmark_0".format(nuisance_param_name) + nuisance_param_name = f"{systematics_name}_nuisance_param_{i}" + benchmark_name = f"{nuisance_param_name}_benchmark_0" nuisance_dict[nuisance_param_name] = (benchmark_name, weight_id), (None, None), None # Check that everything was found @@ -854,7 +854,7 @@ def _parse_lhe_file_with_bad_chars(filename): def _untar_and_parse_lhe_file(filename, tags=None): - # Untar event file + # Unzip event file new_filename, extension = os.path.splitext(filename) if extension == ".gz": if not os.path.exists(new_filename): diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py index 325243ce9..35f8ac32b 100644 --- a/madminer/utils/interfaces/madminer_hdf5.py +++ b/madminer/utils/interfaces/madminer_hdf5.py @@ -711,7 +711,7 @@ def _load_systematics(filename): syst_data[1] = str(syst_data[1]) syst_data[2] = str(syst_data[2]) else: - raise RuntimeError("Error while reading systematics from HDF5 file: {}".format(syst_data)) + raise RuntimeError(f"Error while reading systematics from HDF5 file: {syst_data}") systematics[name] = tuple(syst_data) except KeyError: diff --git a/madminer/utils/interfaces/mg.py b/madminer/utils/interfaces/mg.py index 85f067ac9..b0489ac43 100644 --- a/madminer/utils/interfaces/mg.py +++ b/madminer/utils/interfaces/mg.py @@ -70,31 +70,27 @@ def generate_mg_process( copy_ufo_model(ufo_model_directory, mg_directory) # MG commands - temp_proc_card_file = temp_directory + "/generate.mg5" + temp_proc_card_file = f"{temp_directory}/generate.mg5" shutil.copyfile(proc_card_file, temp_proc_card_file) with open(temp_proc_card_file, "a") as myfile: - myfile.write("\n\noutput " + mg_process_directory) + myfile.write(f"\n") + myfile.write(f"\n") + myfile.write(f"output {mg_process_directory}") # Call MG5 - if initial_command is None: - initial_command = "" - else: - initial_command = initial_command + "; " + initial_command = f"{initial_command}; " if initial_command else "" # Explicitly call Python 2 if necessary if explicit_python_call: - python_call = python_executable + " " if python_executable is not None else "python2.7 " + python_call = f"{python_executable} " if python_executable is not None else "python2.7 " else: python_call = "" - logger.info( - "Calling MadGraph: %s", initial_command + python_call + mg_directory + "/bin/mg5_aMC " + temp_proc_card_file - ) + command = f"{initial_command}{python_call}{mg_directory}/bin/mg5_aMC {temp_proc_card_file}" + logger.info(f"Calling MadGraph: {command}") - _ = call_command( - initial_command + python_call + mg_directory + "/bin/mg5_aMC " + temp_proc_card_file, log_file=log_file - ) + _ = call_command(cmd=command, log_file=log_file) def setup_mg_with_scripts( @@ -198,19 +194,19 @@ def setup_mg_with_scripts( # Find filenames for process card and script if proc_card_filename_from_mgprocdir is None: for i in range(1000): - proc_card_filename_from_mgprocdir = "/Cards/start_event_generation_{}.mg5".format(i) - if not os.path.isfile(mg_process_directory + "/" + proc_card_filename_from_mgprocdir): + proc_card_filename_from_mgprocdir = f"/Cards/start_event_generation_{i}.mg5" + if not os.path.isfile(f"{mg_process_directory}/{proc_card_filename_from_mgprocdir}"): break else: proc_card_filename = mg_process_directory + "/" + proc_card_filename_from_mgprocdir if script_file_from_mgprocdir is None: for i in range(1000): - script_file = mg_process_directory + "/madminer/scripts/madminer_run_{}.sh".format(i) + script_file = f"{mg_process_directory}/madminer/scripts/madminer_run_{i}.sh" if not os.path.isfile(script_file): break else: - script_file = mg_process_directory + "/" + script_file_from_mgprocdir + script_file = f"{mg_process_directory}/{script_file_from_mgprocdir}" script_filename = os.path.basename(script_file) @@ -243,48 +239,34 @@ def setup_mg_with_scripts( # Card copying commands copy_commands = "" if run_card_file_from_mgprocdir is not None: - copy_commands += "cp {}/{} {}{}\n".format( - mg_process_directory_placeholder, - run_card_file_from_mgprocdir, - mg_process_directory_placeholder, - "/Cards/run_card.dat", - ) + copy_commands += f"cp " \ + f"{mg_process_directory_placeholder}/{run_card_file_from_mgprocdir} " \ + f"{mg_process_directory_placeholder}/Cards/run_card.dat\n" + if param_card_file_from_mgprocdir is not None: - copy_commands += "cp {}/{} {}{}\n".format( - mg_process_directory_placeholder, - param_card_file_from_mgprocdir, - mg_process_directory_placeholder, - "/Cards/param_card.dat", - ) + copy_commands += f"cp " \ + f"{mg_process_directory_placeholder}/{param_card_file_from_mgprocdir} " \ + f"{mg_process_directory_placeholder}/Cards/param_card.dat\n" + if reweight_card_file_from_mgprocdir is not None and not is_background: - copy_commands += "cp {}/{} {}{}\n".format( - mg_process_directory_placeholder, - reweight_card_file_from_mgprocdir, - mg_process_directory_placeholder, - "/Cards/reweight_card.dat", - ) + copy_commands += f"cp " \ + f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " \ + f"{mg_process_directory_placeholder}/Cards/reweight_card.dat\n" + if pythia8_card_file_from_mgprocdir is not None and order == "LO": - copy_commands += "cp {}/{} {}{}\n".format( - mg_process_directory_placeholder, - pythia8_card_file_from_mgprocdir, - mg_process_directory_placeholder, - "/Cards/pythia8_card.dat", - ) - elif pythia8_card_file_from_mgprocdir is not None and order == "NLO": - copy_commands += "cp {}/{} {}{}\n".format( - mg_process_directory_placeholder, - pythia8_card_file_from_mgprocdir, - mg_process_directory_placeholder, - "/Cards/shower_card.dat", - ) + copy_commands += f"cp " \ + f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " \ + f"{mg_process_directory_placeholder}/Cards/pythia8_card.dat\n" + + if pythia8_card_file_from_mgprocdir is not None and order == "NLO": + copy_commands += f"cp " \ + f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " \ + f"{mg_process_directory_placeholder}/Cards/shower_card.dat\n" if configuration_file_from_mgprocdir is not None: - copy_commands += "cp {}/{} {}{}\n".format( - mg_process_directory_placeholder, - configuration_file_from_mgprocdir, - mg_process_directory_placeholder, - "/Cards/me5_configuration.txt", - ) + copy_commands += f"cp " \ + f"{mg_process_directory_placeholder}/{configuration_file_from_mgprocdir} " \ + f"{mg_process_directory_placeholder}/Cards/me5_configuration.txt\n" # Replace environment variable in proc card replacement_command = """sed -e 's@\$mgprocdir@'"$mgprocdir"'@' {}/{} > {}/{}""".format( @@ -323,13 +305,11 @@ def setup_mg_with_scripts( make_file_executable(script_file) # How to call it from master script - call_placeholder = "{}/{} {} {} {}".format( - mg_process_directory_placeholder, - script_file_from_mgprocdir, - mg_directory_placeholder, - mg_process_directory_placeholder, - log_dir_placeholder, - ) + call_placeholder = \ + f"{mg_process_directory_placeholder}/{script_file_from_mgprocdir} " \ + f"{mg_directory_placeholder} " \ + f"{mg_process_directory_placeholder} " \ + f"{log_dir_placeholder}" return call_placeholder @@ -415,22 +395,22 @@ def run_mg( # Copy cards if run_card_file is not None: - shutil.copyfile(run_card_file, mg_process_directory + "/Cards/run_card.dat") + shutil.copyfile(run_card_file, f"{mg_process_directory}/Cards/run_card.dat") if param_card_file is not None: - shutil.copyfile(param_card_file, mg_process_directory + "/Cards/param_card.dat") + shutil.copyfile(param_card_file, f"{mg_process_directory}/Cards/param_card.dat") if reweight_card_file is not None and not is_background: - shutil.copyfile(reweight_card_file, mg_process_directory + "/Cards/reweight_card.dat") + shutil.copyfile(reweight_card_file, f"{mg_process_directory}/Cards/reweight_card.dat") if pythia8_card_file is not None and order == "LO": - shutil.copyfile(pythia8_card_file, mg_process_directory + "/Cards/pythia8_card.dat") + shutil.copyfile(pythia8_card_file, f"{mg_process_directory}/Cards/pythia8_card.dat") if pythia8_card_file is not None and order == "NLO": - shutil.copyfile(pythia8_card_file, mg_process_directory + "/Cards/shower_card.dat") + shutil.copyfile(pythia8_card_file, f"{mg_process_directory}/Cards/shower_card.dat") if configuration_card_file is not None: - shutil.copyfile(configuration_card_file, mg_process_directory + "/Cards/me5_configuration.txt") + shutil.copyfile(configuration_card_file, f"{mg_process_directory}/Cards/me5_configuration.txt") # Find filenames for process card and script if proc_card_filename is None: for i in range(1000): - proc_card_filename = mg_process_directory + "/Cards/start_event_generation_{}.mg5".format(i) + proc_card_filename = f"{mg_process_directory}/Cards/start_event_generation_{i}.mg5" if not os.path.isfile(proc_card_filename): break @@ -454,20 +434,18 @@ def run_mg( file.write(mg_commands) # Call MG5 - if initial_command is None: - initial_command = "" - else: - initial_command = initial_command + "; " + initial_command = f"{initial_command}; " if initial_command else "" # Explicitly call Python 2 if necessary if explicit_python_call: - python_call = python_executable + " " if python_executable is not None else "python2.7 " + python_call = f"{python_executable} " if python_executable is not None else "python2.7 " else: python_call = "" - _ = call_command( - initial_command + python_call + mg_directory + "/bin/mg5_aMC " + proc_card_filename, log_file=log_file - ) + command = f"{initial_command}{python_call}{mg_directory}/bin/mg5_aMC {proc_card_filename}" + logger.info(f"Calling MadGraph: {command}") + + _ = call_command(cmd=command, log_file=log_file) def setup_mg_reweighting_with_scripts( @@ -525,10 +503,11 @@ def setup_mg_reweighting_with_scripts( log_dir_placeholder = "$mmlogdir" placeholder_definition = r"mgprocdir=${1:-" + mg_process_directory + r"}" + "\n" placeholder_definition += r"mmlogdir=${2:-" + log_dir + r"}" + if script_file_from_mgprocdir is None: - script_file = mg_process_directory + "/madminer/scripts/madminer_reweight_{}.sh".format(run_name) + script_file = f"{mg_process_directory}/madminer/scripts/madminer_reweight_{run_name}.sh" else: - script_file = mg_process_directory + "/" + script_file_from_mgprocdir + script_file = f"{mg_process_directory}/{script_file_from_mgprocdir}" script_filename = os.path.basename(script_file) @@ -541,12 +520,9 @@ def setup_mg_reweighting_with_scripts( # Card copying commands if reweight_card_file_from_mgprocdir is not None: - copy_commands = "cp {}/{} {}{}\n".format( - mg_process_directory_placeholder, - reweight_card_file_from_mgprocdir, - mg_process_directory_placeholder, - "/Cards/reweight_card.dat", - ) + copy_commands = f"cp " \ + f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " \ + f"{mg_process_directory_placeholder}/Cards/reweight_card.dat\n" else: copy_commands = "" @@ -570,9 +546,9 @@ def setup_mg_reweighting_with_scripts( make_file_executable(script_file) # How to call it from master script - call_instruction = "{}/{} [MG_process_directory] [log_directory]".format( - mg_process_directory, script_file_from_mgprocdir - ) + call_instruction = \ + f"{mg_process_directory}/{script_file_from_mgprocdir} " \ + f"[MG_process_directory] [log_directory]" return call_instruction @@ -618,19 +594,17 @@ def run_mg_reweighting(mg_process_directory, run_name, reweight_card_file=None, shutil.copyfile(reweight_card_file, mg_process_directory + "/Cards/reweight_card.dat") # Call MG5 reweight feature - if initial_command is None: - initial_command = "" - else: - initial_command = initial_command + "; " + initial_command = f"{initial_command}; " if initial_command else "" _ = call_command( - "{}{}/bin/madevent reweight {} -f".format(initial_command, mg_process_directory, run_name), log_file=log_file + cmd=f"{initial_command}{mg_process_directory}/bin/madevent reweight {run_name} -f", + log_file=log_file, ) def copy_ufo_model(ufo_directory, mg_directory): _, model_name = os.path.split(ufo_directory) - destination = mg_directory + "/models/" + model_name + destination = f"{mg_directory}/models/{model_name}" if os.path.isdir(destination): return @@ -648,6 +622,7 @@ def create_master_script(log_directory, master_script_filename, mg_directory, mg + "# Usage: run.sh [MG_directory] [MG_process_directory] [log_directory]\n\n" + "{}\n\n{}" ).format(placeholder_definition, commands) + with open(master_script_filename, "w") as file: file.write(script) make_file_executable(master_script_filename) diff --git a/madminer/utils/interfaces/mg_cards.py b/madminer/utils/interfaces/mg_cards.py index 39bfbab18..708f655d0 100644 --- a/madminer/utils/interfaces/mg_cards.py +++ b/madminer/utils/interfaces/mg_cards.py @@ -49,7 +49,7 @@ def export_param_card(benchmark, parameters, param_card_template_file, mg_proces continue if lha_id == parameter_lha_id: - lines[i] = " " + str(parameter_lha_id) + " " + str(parameter_value) + " # MadMiner" + lines[i] = f" {parameter_lha_id} {parameter_value} # MadMiner" changed_line = True break @@ -61,25 +61,18 @@ def export_param_card(benchmark, parameters, param_card_template_file, mg_proces current_block = None if lha_id == parameter_lha_id: - lines[i] = ( - str(parameter_lha_block) - + " " - + str(parameter_lha_id) - + " " - + str(parameter_value) - + " # MadMiner" - ) + lines[i] = f"{parameter_lha_block} {parameter_lha_id} {parameter_value} # MadMiner" changed_line = True break if not changed_line: - raise ValueError("Could not find LHA ID {0} in param_card template!".format(parameter_lha_id)) + raise ValueError(f"Could not find LHA ID {parameter_lha_id} in param_card template!") param_card = "\n".join(lines) # Output filename if param_card_filename is None: - param_card_filename = mg_process_directory + "/Cards/param_card.dat" + param_card_filename = f"{mg_process_directory}/Cards/param_card.dat" # Save param_card.dat with open(param_card_filename, "w") as file: @@ -114,7 +107,7 @@ def export_reweight_card(sample_benchmark, benchmarks, parameters, mg_process_di variables = {"theta": parameter_value} parameter_value = eval(parameter_transform, variables) - lines.append(" set {0} {1} {2}".format(parameter_lha_block, parameter_lha_id, parameter_value)) + lines.append(f" set {parameter_lha_block} {parameter_lha_id} {parameter_value}") lines.append("") @@ -122,7 +115,7 @@ def export_reweight_card(sample_benchmark, benchmarks, parameters, mg_process_di # Output filename if reweight_card_filename is None: - reweight_card_filename = mg_process_directory + "/Cards/reweight_card.dat" + reweight_card_filename = f"{mg_process_directory}/Cards/reweight_card.dat" # Save param_card.dat with open(reweight_card_filename, "w") as file: @@ -182,7 +175,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde line_key = elements[-1].strip() if line_key in entries_to_comment_out: - run_card_lines[i] = "# {} # Commented out by MadMiner".format(line) + run_card_lines[i] = f"# {line} # Commented out by MadMiner" continue # Add new entries - sytematics @@ -192,7 +185,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde run_card_lines.append("# MadMiner systematics setup *") run_card_lines.append("#*********************************************************************") for key, value in six.iteritems(settings): - run_card_lines.append("{} = {}".format(value, key)) + run_card_lines.append(f"{value} = {key}") run_card_lines.append("") # Write new run card @@ -217,31 +210,31 @@ def create_systematics_arguments(systematics): if value[0] == "scale" and value[1] == "mu": if mur_done or muf_done: raise ValueError("Multiple nuisance parameter for scale variation!") - systematics_arguments.append("'--mur={}'".format(value[2])) - systematics_arguments.append("'--muf={}'".format(value[2])) - systematics_arguments.append("'--together=mur,muf'") - systematics_arguments.append("'--dyn=-1'") + systematics_arguments.append(f"'--mur={value[2]}'") + systematics_arguments.append(f"'--muf={value[2]}'") + systematics_arguments.append(f"'--together=mur,muf'") + systematics_arguments.append(f"'--dyn=-1'") mur_done = True muf_done = True elif value[0] == "scale" and value[1] == "mur": if mur_done: raise ValueError("Multiple nuisance parameter for mur variation!") - systematics_arguments.append("'--mur={}'".format(value[2])) - systematics_arguments.append("'--dyn=-1'") + systematics_arguments.append(f"'--mur={value[2]}'") + systematics_arguments.append(f"'--dyn=-1'") mur_done = True elif value[0] == "scale" and value[1] == "muf": if muf_done: raise ValueError("Multiple nuisance parameter for muf variation!") - systematics_arguments.append("'--muf={}'".format(value[2])) - systematics_arguments.append("'--dyn=-1'") + systematics_arguments.append(f"'--muf={value[2]}'") + systematics_arguments.append(f"'--dyn=-1'") muf_done = True elif value[0] == "pdf": if pdf_done: raise ValueError("Multiple nuisance parameter for PDF variation!") - systematics_arguments.append("'--pdf={}'".format(value[1])) + systematics_arguments.append(f"'--pdf={value[1]}'") pdf_done = True if len(systematics_arguments) > 0: - return "[" + ", ".join(systematics_arguments) + "]" + return f"[{', '.join(systematics_arguments)}]" return "" From 0f157a4b2598a5fa0735952944a30ae08d916df3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 13:45:07 -0400 Subject: [PATCH 49/65] src: utils/interfaces module improve fmt --- madminer/utils/interfaces/delphes_root.py | 5 +-- madminer/utils/interfaces/lhe.py | 38 +++++++++++++++++++--- madminer/utils/interfaces/madminer_hdf5.py | 28 +++++++++++++++- madminer/utils/interfaces/mg_cards.py | 2 +- 4 files changed, 62 insertions(+), 11 deletions(-) diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py index ec5083985..6e6d0b85e 100644 --- a/madminer/utils/interfaces/delphes_root.py +++ b/madminer/utils/interfaces/delphes_root.py @@ -42,15 +42,12 @@ def parse_delphes_root_file( logger.debug("Extracting weights %s", weight_labels) # Delphes ROOT file - root_file = uproot.open(str(delphes_sample_file)) - # The str() call is important when using numpy 1.16.0 and Python 2.7. In this combination of versions, a unicode - # delphes_sample_file would lead to a crash. + root_file = uproot.open(delphes_sample_file) # Delphes tree tree = root_file["Delphes"] # Weights - n_weights = 0 weights = None if weight_labels is not None: try: diff --git a/madminer/utils/interfaces/lhe.py b/madminer/utils/interfaces/lhe.py index c4733f584..d76874617 100644 --- a/madminer/utils/interfaces/lhe.py +++ b/madminer/utils/interfaces/lhe.py @@ -70,7 +70,7 @@ def parse_lhe_file( if efficiencies_default_pass is None: efficiencies_default_pass = {key: 1.0 for key in six.iterkeys(efficiencies)} - # Untar and open LHE file + # Unzip and open LHE file run_card = None for elem in _untar_and_parse_lhe_file(filename): if elem.tag == "MGRunCard": @@ -305,11 +305,14 @@ def _report_parse_results( logger.info(" %s / %s events pass efficiency %s", n_pass, n_pass + n_fail, efficiency) for n_eff, efficiency, n_pass, n_fail in zip(avg_efficiencies, efficiencies, pass_efficiencies, fail_efficiencies): logger.info(" average efficiency for %s is %s", efficiency, n_eff / (n_pass + n_fail)) + n_events_pass = len(observations_all_events) + if len(cuts) > 0: logger.info(" %s events pass all cuts/efficiencies", n_events_pass) if n_events_with_negative_weights > 0: logger.warning(" %s events contain negative weights", n_events_with_negative_weights) + return n_events_pass @@ -365,14 +368,26 @@ def _parse_event( pass_all_cuts = True if pass_all_observation: pass_all_cuts = _parse_cuts( - cuts, cuts_default_pass, fail_cuts, observables, observations, pass_all_cuts, pass_cuts, variables + cuts, + cuts_default_pass, + fail_cuts, + observables, + observations, + pass_all_cuts, + pass_cuts, + variables, ) # Efficiencies pass_all_efficiencies = True if pass_all_observation and pass_all_cuts: pass_all_efficiencies, total_efficiency = _parse_efficiencies( - avg_efficiencies, efficiencies, efficiencies_default_pass, fail_efficiencies, pass_efficiencies, variables + avg_efficiencies, + efficiencies, + efficiencies_default_pass, + fail_efficiencies, + pass_efficiencies, + variables, ) if pass_all_efficiencies: @@ -395,18 +410,21 @@ def _parse_event( def _report_negative_weights(n_events_with_negative_weights, weights): n_negative_weights = np.sum(np.array(list(weights.values())) < 0.0) + if n_negative_weights > 0: n_events_with_negative_weights += 1 if n_events_with_negative_weights <= 3: logger.warning("Found %s negative weights in event. Weights: %s", n_negative_weights, weights) if n_events_with_negative_weights == 3: logger.warning("Skipping warnings about negative weights from now on...") + return n_events_with_negative_weights def _parse_observations(observables, observables_defaults, observables_required, variables): observations = [] pass_all_observation = True + for obs_name, obs_definition in six.iteritems(observables): if isinstance(obs_definition, six.string_types): try: @@ -434,15 +452,22 @@ def _parse_observations(observables, observables_defaults, observables_required, if default is None: default = np.nan observations.append(default) + return observations, pass_all_observation def _parse_efficiencies( - avg_efficiencies, efficiencies, efficiencies_default_pass, fail_efficiencies, pass_efficiencies, variables + avg_efficiencies, + efficiencies, + efficiencies_default_pass, + fail_efficiencies, + pass_efficiencies, + variables, ): # Apply efficiencies total_efficiency = 1.0 pass_all_efficiencies = True + for i_efficiency, (efficiency, default_pass) in enumerate(zip(efficiencies, efficiencies_default_pass)): try: efficiency_result = eval(efficiency, variables) @@ -462,6 +487,7 @@ def _parse_efficiencies( else: fail_efficiencies[i_efficiency] += 1 pass_all_efficiencies = False + return pass_all_efficiencies, total_efficiency @@ -469,6 +495,7 @@ def _parse_cuts(cuts, cuts_default_pass, fail_cuts, observables, observations, p # Objects for cuts for obs_name, obs_value in zip(observables.keys(), observations): variables[obs_name] = obs_value + # Check cuts for i_cut, (cut, default_pass) in enumerate(zip(cuts, cuts_default_pass)): try: @@ -485,6 +512,7 @@ def _parse_cuts(cuts, cuts_default_pass, fail_cuts, observables, observations, p else: fail_cuts[i_cut] += 1 pass_all_cuts = False + return pass_all_cuts @@ -504,7 +532,7 @@ def extract_nuisance_parameters_from_lhe_file(filename, systematics): # Parse scale factors from strings in systematics logger.debug("Systematics setup: %s", systematics) - # Untar and parse LHE file + # Unzip and parse LHE file initrwgts = _untar_and_parse_lhe_file(filename, ["initrwgt"]) # Find weight groups diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py index 35f8ac32b..0c3ef778b 100644 --- a/madminer/utils/interfaces/madminer_hdf5.py +++ b/madminer/utils/interfaces/madminer_hdf5.py @@ -58,7 +58,12 @@ def save_nuisance_setup_to_madminer_file( def save_preformatted_events_to_madminer_file( - filename, observations, weights, sampling_benchmarks, copy_setup_from, overwrite_existing_samples=True + filename, + observations, + weights, + sampling_benchmarks, + copy_setup_from, + overwrite_existing_samples=True, ): _copy_madminer_file(copy_setup_from, filename, overwrite_existing_samples) _save_events( @@ -269,6 +274,7 @@ def madminer_event_loader( def _save_parameters(filename, overwrite_existing_files, parameters): io_tag = "w" if overwrite_existing_files else "x" + with h5py.File(filename, io_tag) as f: # Prepare parameters parameter_names = [pname for pname in parameters] @@ -293,11 +299,13 @@ def _save_parameters(filename, overwrite_existing_files, parameters): f.create_dataset("parameters/max_power", data=parameter_max_power) f.create_dataset("parameters/ranges", data=parameter_ranges) f.create_dataset("parameters/transforms", (n_parameters,), dtype="S256", data=parameter_transforms) + return parameter_names def _save_benchmarks(benchmarks, benchmarks_is_nuisance, filename, parameter_names): io_tag = "a" # Read-write if file exists, otherwise create + with h5py.File(filename, io_tag) as f: # Prepare benchmarks benchmark_names = [bname for bname in benchmarks] @@ -320,6 +328,7 @@ def _save_benchmarks(benchmarks, benchmarks_is_nuisance, filename, parameter_nam def _save_benchmarks2(benchmark_is_nuisance, benchmark_names, benchmark_values, filename, reference_benchmark): io_tag = "a" # Read-write if file exists, otherwise create + with h5py.File(filename, io_tag) as f: # Prepare benchmarks for saving n_benchmarks = len(benchmark_names) @@ -367,6 +376,7 @@ def _save_finite_differences(filename, finite_difference_benchmarks, finite_diff def _save_morphing(filename, morphing_components, morphing_matrix): io_tag = "a" # Read-write if file exists, otherwise create + with h5py.File(filename, io_tag) as f: # Store morphing info if morphing_components is not None: @@ -377,6 +387,7 @@ def _save_morphing(filename, morphing_components, morphing_matrix): def _save_systematics(filename, systematics): io_tag = "a" + with h5py.File(filename, io_tag) as f: # Prepare and store systematics setup if systematics is not None and len(systematics) > 0: @@ -393,6 +404,7 @@ def _save_systematics(filename, systematics): def _save_nuisance_parameters(filename, nuisance_parameters, overwrite_existing_nuisance_parameters): io_tag = "a" # Read-write if file exists, otherwise create + with h5py.File(filename, io_tag) as f: # Make space for nuisance params if overwrite_existing_nuisance_parameters: @@ -441,6 +453,7 @@ def _save_nuisance_parameters(filename, nuisance_parameters, overwrite_existing_ def _save_n_events(filename, n_events_background, n_events_per_sampling_benchmark, overwrite_existing_samples): io_tag = "a" # Read-write if file exists, otherwise create + with h5py.File(filename, io_tag) as f: # Check if groups exist already if overwrite_existing_samples: @@ -493,6 +506,7 @@ def _save_events( def _save_observables(filename, observables, overwrite_existing_samples): io_tag = "a" # Read-write if file exists, otherwise create + if observables is None: return None @@ -524,6 +538,7 @@ def _save_observables(filename, observables, overwrite_existing_samples): def _load_parameters(filename): + with h5py.File(filename, "r") as f: # Parameters try: @@ -556,10 +571,12 @@ def _load_parameters(filename): except KeyError: raise IOError("Cannot read parameters from HDF5 file") + return parameter_names, parameters def _load_benchmarks(filename, include_nuisance_benchmarks, parameter_names, return_dict=True): + with h5py.File(filename, "r") as f: # Benchmarks try: @@ -620,6 +637,7 @@ def _load_finite_differences(filename, parameter_names): def _load_n_samples(filename): + with h5py.File(filename, "r") as f: # Number of samples try: @@ -645,6 +663,7 @@ def _load_n_samples(filename): def _load_morphing(filename): + with h5py.File(filename, "r") as f: # Morphing try: @@ -658,6 +677,7 @@ def _load_morphing(filename): def _load_nuisance_params(filename): + with h5py.File(filename, "r") as f: # Nuisance parameters try: @@ -687,10 +707,12 @@ def _load_nuisance_params(filename): except KeyError: nuisance_parameters = None + return nuisance_parameters def _load_systematics(filename): + with h5py.File(filename, "r") as f: # Systematics setup try: @@ -716,10 +738,12 @@ def _load_systematics(filename): except KeyError: systematics = OrderedDict() + return systematics def _load_observables(filename): + with h5py.File(filename, "r") as f: # Observables try: @@ -734,6 +758,7 @@ def _load_observables(filename): observables[oname] = odef except KeyError: observables = None + return observables @@ -771,6 +796,7 @@ def _sort_weights(benchmark_names, weights): except Exception as e: logger.warning("Issue matching weight names in HepMC file to benchmark names in MadMiner file:\n%s", e) weights_sorted = [weights[key] for key in weights] + return weights_sorted diff --git a/madminer/utils/interfaces/mg_cards.py b/madminer/utils/interfaces/mg_cards.py index 708f655d0..efbf58b37 100644 --- a/madminer/utils/interfaces/mg_cards.py +++ b/madminer/utils/interfaces/mg_cards.py @@ -178,7 +178,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde run_card_lines[i] = f"# {line} # Commented out by MadMiner" continue - # Add new entries - sytematics + # Add new entries - systematics if order == "LO": run_card_lines.append("") run_card_lines.append("#*********************************************************************") From 73cf4fde14e1eb1826f4d5fa48579a7c5bb420fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Wed, 23 Sep 2020 13:58:24 -0400 Subject: [PATCH 50/65] src: utils/interfaces module remove 'six' --- madminer/utils/interfaces/delphes.py | 2 -- madminer/utils/interfaces/delphes_root.py | 15 ++++----- madminer/utils/interfaces/hepmc.py | 6 ++-- madminer/utils/interfaces/lhe.py | 38 ++++++++++------------ madminer/utils/interfaces/madminer_hdf5.py | 10 +++--- madminer/utils/interfaces/mg.py | 4 +-- madminer/utils/interfaces/mg_cards.py | 15 ++++----- 7 files changed, 36 insertions(+), 54 deletions(-) diff --git a/madminer/utils/interfaces/delphes.py b/madminer/utils/interfaces/delphes.py index aeacc7d15..81f062d83 100644 --- a/madminer/utils/interfaces/delphes.py +++ b/madminer/utils/interfaces/delphes.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import os import logging diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py index 6e6d0b85e..ba6b5cca7 100644 --- a/madminer/utils/interfaces/delphes_root.py +++ b/madminer/utils/interfaces/delphes_root.py @@ -1,11 +1,8 @@ -from __future__ import absolute_import, division, print_function, unicode_literals -import six - +import logging import numpy as np -from collections import OrderedDict -import uproot import os -import logging +import uproot +from collections import OrderedDict from madminer.utils.particle import MadMinerParticle from madminer.utils.various import math_commands @@ -124,14 +121,14 @@ def get_objects(ievent): # Observations observable_values = OrderedDict() - for obs_name, obs_definition in six.iteritems(observables): + for obs_name, obs_definition in observables.items(): values_this_observable = [] # Loop over events for event in range(n_events): variables = get_objects(event) - if isinstance(obs_definition, six.string_types): + if isinstance(obs_definition, str): try: values_this_observable.append(eval(obs_definition, variables)) except (SyntaxError, NameError, TypeError, ZeroDivisionError, IndexError): @@ -184,7 +181,7 @@ def get_objects(ievent): # Check for existence of required observables combined_filter = None - for obs_name, obs_required in six.iteritems(observables_required): + for obs_name, obs_required in observables_required.items(): if obs_required: this_filter = np.isfinite(observable_values[obs_name]) n_pass = np.sum(this_filter) diff --git a/madminer/utils/interfaces/hepmc.py b/madminer/utils/interfaces/hepmc.py index 0a52c9611..0fdc0314f 100644 --- a/madminer/utils/interfaces/hepmc.py +++ b/madminer/utils/interfaces/hepmc.py @@ -1,8 +1,6 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import os -from io import open import logging +import os + from madminer.utils.various import unzip_file logger = logging.getLogger(__name__) diff --git a/madminer/utils/interfaces/lhe.py b/madminer/utils/interfaces/lhe.py index d76874617..f0669464d 100644 --- a/madminer/utils/interfaces/lhe.py +++ b/madminer/utils/interfaces/lhe.py @@ -1,22 +1,17 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six +import logging import numpy as np -from collections import OrderedDict import os -import logging +from collections import OrderedDict try: import xml.etree.cElementTree as ET - use_celementtree = True except ImportError: import xml.etree.ElementTree as ET - use_celementtree = False -from madminer.utils.various import unzip_file, approx_equal, math_commands from madminer.utils.particle import MadMinerParticle +from madminer.utils.various import unzip_file, approx_equal, math_commands logger = logging.getLogger(__name__) @@ -56,19 +51,19 @@ def parse_lhe_file( if k_factor is None: k_factor = 1.0 if observables_required is None: - observables_required = {key: False for key in six.iterkeys(observables)} + observables_required = {key: False for key in observables.keys()} if observables_defaults is None: - observables_defaults = {key: None for key in six.iterkeys(observables)} + observables_defaults = {key: None for key in observables.keys()} if is_background and benchmark_names is None: raise RuntimeError("Parsing background LHE files required benchmark names to be provided.") if cuts is None: cuts = OrderedDict() if cuts_default_pass is None: - cuts_default_pass = {key: False for key in six.iterkeys(cuts)} + cuts_default_pass = {key: False for key in cuts.keys()} if efficiencies is None: efficiencies = OrderedDict() if efficiencies_default_pass is None: - efficiencies_default_pass = {key: 1.0 for key in six.iterkeys(efficiencies)} + efficiencies_default_pass = {key: 1.0 for key in efficiencies.keys()} # Unzip and open LHE file run_card = None @@ -260,11 +255,12 @@ def parse_lhe_file( output_weights[benchmark_name] = weights_all_events[sampling_benchmark] else: output_weights[benchmark_name] = weights_all_events[benchmark_name] - for syst_name, syst_data in six.iteritems(systematics_dict): + + for syst_name, syst_data in systematics_dict.items(): for ( nuisance_param_name, ((nuisance_benchmark0, weight_name0), (nuisance_benchmark1, weight_name1), processing), - ) in six.iteritems(syst_data): + ) in syst_data.items(): # Store first benchmark associated with nuisance param if weight_name0 is None: weight_name0 = sampling_benchmark @@ -425,8 +421,8 @@ def _parse_observations(observables, observables_defaults, observables_required, observations = [] pass_all_observation = True - for obs_name, obs_definition in six.iteritems(observables): - if isinstance(obs_definition, six.string_types): + for obs_name, obs_definition in observables.items(): + if isinstance(obs_definition, str): try: observations.append(eval(obs_definition, variables)) except (SyntaxError, NameError, TypeError, ZeroDivisionError, IndexError): @@ -547,7 +543,7 @@ def extract_nuisance_parameters_from_lhe_file(filename, systematics): logger.debug("%s weight groups", len(weight_groups)) # Loop over systematics - for syst_name, syst_value in six.iteritems(systematics): + for syst_name, syst_value in systematics.items(): nuisance_param_dict = _extract_nuisance_param_dict(weight_groups, syst_name, syst_value) systematics_dict[syst_name] = nuisance_param_dict @@ -1023,10 +1019,10 @@ def _smear_particles(particles, energy_resolutions, pt_resolutions, eta_resoluti pdgid = particle.pdgid if ( - pdgid not in six.iterkeys(energy_resolutions) - or pdgid not in six.iterkeys(pt_resolutions) - or pdgid not in six.iterkeys(eta_resolutions) - or pdgid not in six.iterkeys(phi_resolutions) + pdgid not in energy_resolutions.keys() + or pdgid not in pt_resolutions.keys() + or pdgid not in eta_resolutions.keys() + or pdgid not in phi_resolutions.keys() ): continue diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py index 0c3ef778b..cf721799d 100644 --- a/madminer/utils/interfaces/madminer_hdf5.py +++ b/madminer/utils/interfaces/madminer_hdf5.py @@ -1,11 +1,9 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six -import shutil import h5py +import logging import numpy as np +import shutil from collections import OrderedDict -import logging + logger = logging.getLogger(__name__) @@ -525,7 +523,7 @@ def _save_observables(filename, observables, overwrite_existing_samples): observable_definitions = [] for key in observable_names: definition = observables[key] - if isinstance(definition, six.string_types): + if isinstance(definition, str): observable_definitions.append(definition.encode("ascii", "ignore")) else: observable_definitions.append("".encode("ascii", "ignore")) diff --git a/madminer/utils/interfaces/mg.py b/madminer/utils/interfaces/mg.py index b0489ac43..0f436c897 100644 --- a/madminer/utils/interfaces/mg.py +++ b/madminer/utils/interfaces/mg.py @@ -1,8 +1,6 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - +import logging import os import shutil -import logging from madminer.utils.various import call_command, make_file_executable, create_missing_folders diff --git a/madminer/utils/interfaces/mg_cards.py b/madminer/utils/interfaces/mg_cards.py index efbf58b37..25fdd0225 100644 --- a/madminer/utils/interfaces/mg_cards.py +++ b/madminer/utils/interfaces/mg_cards.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six import logging from collections import OrderedDict @@ -14,7 +11,7 @@ def export_param_card(benchmark, parameters, param_card_template_file, mg_proces lines = param_card.splitlines() # Replace parameter values - for parameter_name, parameter_value in six.iteritems(benchmark): + for parameter_name, parameter_value in benchmark.items(): parameter_lha_block = parameters[parameter_name][0] parameter_lha_id = parameters[parameter_name][1] @@ -89,7 +86,7 @@ def export_reweight_card(sample_benchmark, benchmarks, parameters, mg_process_di "change helicity False", ] - for benchmark_name, benchmark in six.iteritems(benchmarks): + for benchmark_name, benchmark in benchmarks.items(): if benchmark_name == sample_benchmark: continue @@ -97,7 +94,7 @@ def export_reweight_card(sample_benchmark, benchmarks, parameters, mg_process_di lines.append("# MadMiner benchmark " + benchmark_name) lines.append("launch --rwgt_name=" + benchmark_name) - for parameter_name, parameter_value in six.iteritems(benchmark): + for parameter_name, parameter_value in benchmark.items(): parameter_lha_block = parameters[parameter_name][0] parameter_lha_id = parameters[parameter_name][1] @@ -131,7 +128,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde # Do we actually have to run MadGraph's systematics feature? run_systematics = False - for value in six.itervalues(systematics): + for value in systematics.values(): if value[0] in ["pdf", "scale"]: run_systematics = True @@ -184,7 +181,7 @@ def export_run_card(template_filename, run_card_filename, systematics=None, orde run_card_lines.append("#*********************************************************************") run_card_lines.append("# MadMiner systematics setup *") run_card_lines.append("#*********************************************************************") - for key, value in six.iteritems(settings): + for key, value in settings.items(): run_card_lines.append(f"{value} = {key}") run_card_lines.append("") @@ -206,7 +203,7 @@ def create_systematics_arguments(systematics): muf_done = False pdf_done = False - for value in six.itervalues(systematics): + for value in systematics.values(): if value[0] == "scale" and value[1] == "mu": if mur_done or muf_done: raise ValueError("Multiple nuisance parameter for scale variation!") From cb7cab240c612d858a2e364d48e952b0cf04e7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 24 Sep 2020 10:55:50 -0400 Subject: [PATCH 51/65] src: utils/ml module f-strings --- madminer/utils/ml/trainer.py | 18 ++++++++---------- madminer/utils/ml/utils.py | 4 ++-- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/madminer/utils/ml/trainer.py b/madminer/utils/ml/trainer.py index 2dd94ee0b..f07c444e5 100644 --- a/madminer/utils/ml/trainer.py +++ b/madminer/utils/ml/trainer.py @@ -216,7 +216,7 @@ def make_dataloaders(self, dataset, dataset_val, validation_split, batch_size): ) else: - assert 0.0 < validation_split < 1.0, "Wrong validation split: {}".format(validation_split) + assert 0.0 < validation_split < 1.0, f"Wrong validation split: {validation_split}" n_samples = len(dataset) indices = list(range(n_samples)) @@ -402,7 +402,7 @@ def check_early_stopping(self, best_loss, best_model, best_epoch, loss, i_epoch, @staticmethod def report_batch(i_epoch, i_batch, loss_train): if i_batch in [0, 1, 10, 100, 1000]: - logger.debug(" Epoch {:>3d}, batch {:>3d}: loss {:>8.5f}".format(i_epoch + 1, i_batch + 1, loss_train)) + logger.debug(f" Epoch {(i_epoch+1):>3d}, batch {(i_batch+1):>3d}: loss {loss_train:>8.5f}") @staticmethod def report_epoch( @@ -415,18 +415,16 @@ def contribution_summary(labels, contributions): for i, (label, value) in enumerate(zip(labels, contributions)): if i > 0: summary += ", " - summary += "{}: {:>6.3f}".format(label, value) + summary += f"{label}: {value:>6.3f}" return summary - train_report = " Epoch {:>3d}: train loss {:>8.5f} ({})".format( - i_epoch + 1, loss_train, contribution_summary(loss_labels, loss_contributions_train) - ) + summary = contribution_summary(loss_labels, loss_contributions_train) + train_report = f" Epoch {(i_epoch+1):>3d}: train loss {loss_train:>8.5f} ({summary})" logging_fn(train_report) if loss_val is not None: - val_report = " val. loss {:>8.5f} ({})".format( - loss_val, contribution_summary(loss_labels, loss_contributions_val) - ) + summary = contribution_summary(loss_labels, loss_contributions_train) + val_report = f" val. loss {loss_val:>8.5f} ({summary})" logging_fn(val_report) def wrap_up_early_stopping(self, best_model, currrent_loss, best_loss, best_epoch): @@ -476,7 +474,7 @@ def _timer(self, start=None, stop=None): def _report_timer(self): logger.info("Training time spend on:") for key, value in six.iteritems(self.timer): - logger.info(" {:>32s}: {:6.2f}h".format(key, value / 3600.0)) + logger.info(f" {key:>32s}: {(value/3600.0):6.2f}h") class SingleParameterizedRatioTrainer(Trainer): diff --git a/madminer/utils/ml/utils.py b/madminer/utils/ml/utils.py index 2cf223f33..851a7997d 100644 --- a/madminer/utils/ml/utils.py +++ b/madminer/utils/ml/utils.py @@ -130,7 +130,7 @@ def get_optimizer(optimizer, nesterov_momentum): if nesterov_momentum is not None: opt_kwargs = {"momentum": nesterov_momentum} else: - raise ValueError("Unknown optimizer {}".format(optimizer)) + raise ValueError(f"Unknown optimizer {optimizer}") return opt, opt_kwargs @@ -184,7 +184,7 @@ def get_loss(method, alpha): loss_weights = [1.0, alpha] loss_labels = ["nll", "mse_score"] else: - raise NotImplementedError("Unknown method {}".format(method)) + raise NotImplementedError("Unknown method {method}") return loss_functions, loss_labels, loss_weights From 34bb65d0f6a37c29a1b520a3e63c366294946228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 24 Sep 2020 11:04:35 -0400 Subject: [PATCH 52/65] src: utils/ml module improve fmt --- madminer/utils/ml/eval.py | 27 ++++++++++++++++--- madminer/utils/ml/trainer.py | 52 +++++++++++++++++++++++++++++++----- madminer/utils/ml/utils.py | 16 ++++++----- 3 files changed, 77 insertions(+), 18 deletions(-) diff --git a/madminer/utils/ml/eval.py b/madminer/utils/ml/eval.py index 89d267e6d..3c013d667 100644 --- a/madminer/utils/ml/eval.py +++ b/madminer/utils/ml/eval.py @@ -114,20 +114,39 @@ def evaluate_ratio_model( if method_type == "parameterized_ratio": if return_grad_x: s_hat, log_r_hat, t_hat0, x_gradients = model( - theta0s, xs, return_grad_x=True, track_score=evaluate_score, create_gradient_graph=False + theta0s, + xs, + return_grad_x=True, + track_score=evaluate_score, + create_gradient_graph=False, ) else: - s_hat, log_r_hat, t_hat0 = model(theta0s, xs, track_score=evaluate_score, create_gradient_graph=False) + s_hat, log_r_hat, t_hat0 = model( + theta0s, + xs, + track_score=evaluate_score, + create_gradient_graph=False, + ) x_gradients = None t_hat1 = None + elif method_type == "double_parameterized_ratio": if return_grad_x: s_hat, log_r_hat, t_hat0, t_hat1, x_gradients = model( - theta0s, theta1s, xs, return_grad_x=True, track_score=evaluate_score, create_gradient_graph=False + theta0s, + theta1s, + xs, + return_grad_x=True, + track_score=evaluate_score, + create_gradient_graph=False, ) else: s_hat, log_r_hat, t_hat0, t_hat1 = model( - theta0s, theta1s, xs, track_score=evaluate_score, create_gradient_graph=False + theta0s, + theta1s, + xs, + track_score=evaluate_score, + create_gradient_graph=False, ) x_gradients = None else: diff --git a/madminer/utils/ml/trainer.py b/madminer/utils/ml/trainer.py index f07c444e5..542aa34bc 100644 --- a/madminer/utils/ml/trainer.py +++ b/madminer/utils/ml/trainer.py @@ -68,12 +68,15 @@ def train( logger.debug("Found external validation data set") self.check_data(data_val) self.report_data(data_val) + self._timer(stop="check data", start="make dataset") data_labels, dataset = self.make_dataset(data) + if data_val is not None: _, dataset_val = self.make_dataset(data_val) else: dataset_val = None + self._timer(stop="make dataset", start="make dataloader") train_loader, val_loader = self.make_dataloaders(dataset, dataset_val, validation_split, batch_size) @@ -84,6 +87,7 @@ def train( early_stopping = early_stopping and (validation_split is not None) and (epochs > 1) best_loss, best_model, best_epoch = None, None, None + if early_stopping and early_stopping_patience is None: logger.debug("Using early stopping with infinite patience") elif early_stopping: @@ -194,25 +198,39 @@ def check_data(data): def make_dataset(self, data): data_arrays = [] data_labels = [] + for key, value in six.iteritems(data): data_labels.append(key) data_arrays.append(value) dataset = NumpyDataset(*data_arrays, dtype=self.dtype) + return data_labels, dataset def make_dataloaders(self, dataset, dataset_val, validation_split, batch_size): if dataset_val is None and (validation_split is None or validation_split <= 0.0): train_loader = DataLoader( - dataset, batch_size=batch_size, shuffle=True, pin_memory=self.run_on_gpu, num_workers=self.n_workers + dataset, + batch_size=batch_size, + shuffle=True, + pin_memory=self.run_on_gpu, + num_workers=self.n_workers, ) val_loader = None elif dataset_val is not None: train_loader = DataLoader( - dataset, batch_size=batch_size, shuffle=True, pin_memory=self.run_on_gpu, num_workers=self.n_workers + dataset, + batch_size=batch_size, + shuffle=True, + pin_memory=self.run_on_gpu, + num_workers=self.n_workers, ) val_loader = DataLoader( - dataset_val, batch_size=batch_size, shuffle=True, pin_memory=self.run_on_gpu, num_workers=self.n_workers + dataset_val, + batch_size=batch_size, + shuffle=True, + pin_memory=self.run_on_gpu, + num_workers=self.n_workers, ) else: @@ -344,6 +362,7 @@ def batch_val(self, batch_data, loss_functions, loss_weights): loss = loss.item() loss_contributions = [contrib.item() for contrib in loss_contributions] self._timer(stop="validation sum losses") + return loss, loss_contributions def forward_pass(self, batch_data, loss_functions): @@ -364,13 +383,16 @@ def forward_pass(self, batch_data, loss_functions): Losses as scalar pyTorch tensors. """ - raise NotImplementedError + + raise NotImplementedError() @staticmethod def sum_losses(contributions, weights): loss = weights[0] * contributions[0] + for _w, _l in zip(weights[1:], contributions[1:]): loss = loss + _w * _l + return loss def optimizer_step(self, optimizer, loss, clip_gradient): @@ -379,8 +401,10 @@ def optimizer_step(self, optimizer, loss, clip_gradient): self._timer(stop="opt: zero grad", start="opt: backward") loss.backward() self._timer(start="opt: clip grad norm", stop="opt: backward") + if clip_gradient is not None: clip_grad_norm_(self.model.parameters(), clip_gradient) + self._timer(stop="opt: clip grad norm", start="opt: step") optimizer.step() self._timer(stop="opt: step") @@ -406,7 +430,13 @@ def report_batch(i_epoch, i_batch, loss_train): @staticmethod def report_epoch( - i_epoch, loss_labels, loss_train, loss_val, loss_contributions_train, loss_contributions_val, verbose=False + i_epoch, + loss_labels, + loss_train, + loss_val, + loss_contributions_train, + loss_contributions_val, + verbose=False, ): logging_fn = logger.info if verbose else logger.debug @@ -488,7 +518,7 @@ def check_data(self, data): raise ValueError("Missing required information 'x', 'theta', or 'y' in training data!") for key in data_keys: - if key not in ["x", "theta", "y", "r_xz", "t_xz"]: + if key not in {"x", "theta", "y", "r_xz", "t_xz"}: logger.warning("Unknown key %s in training data! Ignoring it.", key) self.calculate_model_score = "t_xz" in data_keys @@ -502,6 +532,7 @@ def forward_pass(self, batch_data, loss_functions): theta = batch_data["theta"].to(self.device, self.dtype, non_blocking=True) x = batch_data["x"].to(self.device, self.dtype, non_blocking=True) y = batch_data["y"].to(self.device, self.dtype, non_blocking=True) + try: r_xz = batch_data["r_xz"].to(self.device, self.dtype, non_blocking=True) except KeyError: @@ -510,6 +541,7 @@ def forward_pass(self, batch_data, loss_functions): t_xz = batch_data["t_xz"].to(self.device, self.dtype, non_blocking=True) except KeyError: t_xz = None + self._timer(stop="fwd: move data", start="fwd: check for nans") self._check_for_nans("Training data", theta, x, y) self._check_for_nans("Augmented training data", r_xz, t_xz) @@ -558,6 +590,7 @@ def forward_pass(self, batch_data, loss_functions): theta1 = batch_data["theta1"].to(self.device, self.dtype, non_blocking=True) x = batch_data["x"].to(self.device, self.dtype, non_blocking=True) y = batch_data["y"].to(self.device, self.dtype, non_blocking=True) + try: r_xz = batch_data["r_xz"].to(self.device, self.dtype, non_blocking=True) except KeyError: @@ -570,6 +603,7 @@ def forward_pass(self, batch_data, loss_functions): t_xz1 = batch_data["t_xz1"].to(self.device, self.dtype, non_blocking=True) except KeyError: t_xz1 = None + self._timer(stop="fwd: move data", start="fwd: check for nans") self._check_for_nans("Training data", theta0, theta1, x, y) self._check_for_nans("Augmented training data", r_xz, t_xz0, t_xz1) @@ -639,7 +673,7 @@ def check_data(self, data): raise ValueError("Missing required information 'x' or 'theta' in training data!") for key in data_keys: - if key not in ["x", "theta", "t_xz"]: + if key not in {"x", "theta", "t_xz"}: logger.warning("Unknown key %s in training data! Ignoring it.", key) self.calculate_model_score = "t_xz" in data_keys @@ -652,21 +686,25 @@ def forward_pass(self, batch_data, loss_functions): self._timer(start="fwd: move data") x = batch_data["x"].to(self.device, self.dtype, non_blocking=True) theta = batch_data["theta"].to(self.device, self.dtype, non_blocking=True) + try: t_xz = batch_data["t_xz"].to(self.device, self.dtype, non_blocking=True) except KeyError: t_xz = None + self._timer(stop="fwd: move data", start="fwd: check for nans") self._check_for_nans("Training data", theta, x) self._check_for_nans("Augmented training data", t_xz) self._timer(start="fwd: model.forward", stop="fwd: check for nans") + if self.calculate_model_score: theta.requires_grad = True _, log_likelihood, t_hat = self.model.log_likelihood_and_score(theta, x) else: _, log_likelihood = self.model.log_likelihood(theta, x) t_hat = None + self._timer(stop="fwd: model.forward", start="fwd: check for nans") self._check_for_nans("Model output", log_likelihood, t_hat) diff --git a/madminer/utils/ml/utils.py b/madminer/utils/ml/utils.py index 851a7997d..efe95c280 100644 --- a/madminer/utils/ml/utils.py +++ b/madminer/utils/ml/utils.py @@ -85,7 +85,7 @@ def check_required_data(method, r_xz, t_xz0, t_xz1, theta0, theta1, x, y): data_is_there = False if ( method - in [ + in { "carl", "carl2", "nde", @@ -98,23 +98,24 @@ def check_required_data(method, r_xz, t_xz0, t_xz1, theta0, theta1, x, y): "alice2", "rascal2", "alices2", - ] + } and theta0 is None ): data_is_there = False - if method in ["rolr", "alice", "rascal", "alices", "rolr2", "alice2", "rascal2", "alices2"] and r_xz is None: + if method in {"rolr", "alice", "rascal", "alices", "rolr2", "alice2", "rascal2", "alices2"} and r_xz is None: data_is_there = False if ( - method in ["carl", "carl2", "rolr", "alice", "rascal", "alices", "rolr2", "alice2", "rascal2", "alices2"] + method in {"carl", "carl2", "rolr", "alice", "rascal", "alices", "rolr2", "alice2", "rascal2", "alices2"} and y is None ): data_is_there = False - if method in ["scandal", "rascal", "alices", "rascal2", "alices2", "sally", "sallino"] and t_xz0 is None: + if method in {"scandal", "rascal", "alices", "rascal2", "alices2", "sally", "sallino"} and t_xz0 is None: data_is_there = False - if method in ["carl2", "rolr2", "alice2", "rascal2", "alices2"] and theta1 is None: + if method in {"carl2", "rolr2", "alice2", "rascal2", "alices2"} and theta1 is None: data_is_there = False - if method in ["rascal2", "alices2"] and t_xz1 is None: + if method in {"rascal2", "alices2"} and t_xz1 is None: data_is_there = False + return data_is_there @@ -185,6 +186,7 @@ def get_loss(method, alpha): loss_labels = ["nll", "mse_score"] else: raise NotImplementedError("Unknown method {method}") + return loss_functions, loss_labels, loss_weights From fcf99f781f58abaa4002b2d5eba41a63994bd53f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 24 Sep 2020 11:09:09 -0400 Subject: [PATCH 53/65] src: utils/ml module remove 'six' --- madminer/utils/ml/eval.py | 7 ++++--- madminer/utils/ml/losses.py | 4 +--- madminer/utils/ml/trainer.py | 21 ++++++++++++--------- madminer/utils/ml/utils.py | 15 +++++++-------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/madminer/utils/ml/eval.py b/madminer/utils/ml/eval.py index 3c013d667..f23c3faf2 100644 --- a/madminer/utils/ml/eval.py +++ b/madminer/utils/ml/eval.py @@ -1,11 +1,12 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - import logging import numpy as np import torch from torch import tensor -from madminer.utils.ml.models.ratio import DenseSingleParameterizedRatioModel, DenseDoublyParameterizedRatioModel +from madminer.utils.ml.models.ratio import ( + DenseSingleParameterizedRatioModel, + DenseDoublyParameterizedRatioModel, +) logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/losses.py b/madminer/utils/ml/losses.py index 0daa3822d..64082c3f1 100644 --- a/madminer/utils/ml/losses.py +++ b/madminer/utils/ml/losses.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - +import logging import numpy as np import torch from torch.nn import BCELoss, MSELoss -import logging logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/trainer.py b/madminer/utils/ml/trainer.py index 542aa34bc..d1bb6fa76 100644 --- a/madminer/utils/ml/trainer.py +++ b/madminer/utils/ml/trainer.py @@ -1,16 +1,19 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import six import logging -from collections import OrderedDict import numpy as np import time import torch import torch.optim as optim -from madminer.utils.ml.utils import EarlyStoppingException, NanException, NumpyDataset + +from collections import OrderedDict +from torch.nn.utils import clip_grad_norm_ from torch.utils.data import DataLoader from torch.utils.data.sampler import SubsetRandomSampler -from torch.nn.utils import clip_grad_norm_ + +from madminer.utils.ml.utils import ( + EarlyStoppingException, + NanException, + NumpyDataset, +) logger = logging.getLogger(__name__) @@ -177,7 +180,7 @@ def train( @staticmethod def report_data(data): logger.debug("Training data:") - for key, value in six.iteritems(data): + for key, value in data.items(): if value is None: logger.debug(" %s: -", key) else: @@ -199,7 +202,7 @@ def make_dataset(self, data): data_arrays = [] data_labels = [] - for key, value in six.iteritems(data): + for key, value in data.items(): data_labels.append(key) data_arrays.append(value) dataset = NumpyDataset(*data_arrays, dtype=self.dtype) @@ -503,7 +506,7 @@ def _timer(self, start=None, stop=None): def _report_timer(self): logger.info("Training time spend on:") - for key, value in six.iteritems(self.timer): + for key, value in self.timer.items(): logger.info(f" {key:>32s}: {(value/3600.0):6.2f}h") diff --git a/madminer/utils/ml/utils.py b/madminer/utils/ml/utils.py index efe95c280..23d3f58ff 100644 --- a/madminer/utils/ml/utils.py +++ b/madminer/utils/ml/utils.py @@ -1,14 +1,13 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - +import logging import numpy as np import torch -from torch.nn import functional as F -import logging + from torch import optim +from torch.nn import functional as F +from torch.utils.data import Dataset -import madminer.utils from madminer.utils.ml import losses -from torch.utils.data import Dataset + logger = logging.getLogger(__name__) @@ -177,11 +176,11 @@ def get_loss(method, alpha): loss_weights = [1.0] loss_labels = ["mse_score"] elif method == "nde": - loss_functions = [madminer.utils.ml.losses.flow_nll] + loss_functions = [losses.flow_nll] loss_weights = [1.0] loss_labels = ["nll"] elif method == "scandal": - loss_functions = [madminer.utils.ml.losses.flow_nll, madminer.utils.ml.losses.flow_score_mse] + loss_functions = [losses.flow_nll, losses.flow_score_mse] loss_weights = [1.0, alpha] loss_labels = ["nll", "mse_score"] else: From 46b6d606b704b0dee999d645023d1c134b1525dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 24 Sep 2020 11:39:53 -0400 Subject: [PATCH 54/65] src: utils/ml/models module improve fmt --- madminer/utils/ml/models/base.py | 11 +++++------ madminer/utils/ml/models/batch_norm.py | 9 ++------- madminer/utils/ml/models/made.py | 16 ++++++++++------ madminer/utils/ml/models/made_mog.py | 4 ++-- madminer/utils/ml/models/maf.py | 4 +--- madminer/utils/ml/models/maf_mog.py | 2 +- madminer/utils/ml/models/score.py | 10 +++++++--- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/madminer/utils/ml/models/base.py b/madminer/utils/ml/models/base.py index e06739211..3c40fd261 100644 --- a/madminer/utils/ml/models/base.py +++ b/madminer/utils/ml/models/base.py @@ -11,14 +11,13 @@ class BaseFlow(nn.Module): def __init__(self, n_inputs, **kwargs): super(BaseFlow, self).__init__() - self.n_inputs = n_inputs def forward(self, x, **kwargs): - raise NotImplementedError + raise NotImplementedError() def generate_samples(self, n_samples=1, u=None, **kwargs): - raise NotImplementedError + raise NotImplementedError() def log_likelihood(self, x, **kwargs): """ Calculates log p(x) with a Gaussian base density """ @@ -39,17 +38,17 @@ def log_likelihood_and_score(self, x, **kwargs): class BaseConditionalFlow(nn.Module): + def __init__(self, n_conditionals, n_inputs, **kwargs): super(BaseConditionalFlow, self).__init__() - self.n_conditionals = n_conditionals self.n_inputs = n_inputs def forward(self, theta, x, **kwargs): - raise NotImplementedError + raise NotImplementedError() def generate_samples(self, theta, u=None, **kwargs): - raise NotImplementedError + raise NotImplementedError() def log_likelihood(self, theta, x, **kwargs): """ Calculates u(x) and log p(x) with a Gaussian base density """ diff --git a/madminer/utils/ml/models/batch_norm.py b/madminer/utils/ml/models/batch_norm.py index 029929c5d..3fe14349c 100644 --- a/madminer/utils/ml/models/batch_norm.py +++ b/madminer/utils/ml/models/batch_norm.py @@ -56,20 +56,15 @@ def forward(self, x, fixed_params=False): return u, logdet def inverse(self, u): - """Calculates u -> x(u) (the approximate inverse transformation based on running mean and variance)""" - x = torch.sqrt(self.running_var) * u + self.running_mean - - return x + return torch.sqrt(self.running_var) * u + self.running_mean def generate_samples(self, n_samples=1, u=None, **kwargs): if u is None: u = tensor(rng.randn(n_samples, self.n_inputs)) - x = torch.sqrt(self.running_var) * u + self.running_mean - - return x + return torch.sqrt(self.running_var) * u + self.running_mean def to(self, *args, **kwargs): logger.debug("Transforming BatchNorm to %s", args) diff --git a/madminer/utils/ml/models/made.py b/madminer/utils/ml/models/made.py index 12add77ce..5f375bd18 100644 --- a/madminer/utils/ml/models/made.py +++ b/madminer/utils/ml/models/made.py @@ -52,7 +52,6 @@ def forward(self, x, **kwargs): **kwargs : - Returns ------- @@ -94,7 +93,6 @@ def generate_samples(self, n_samples=1, u=None, **kwargs): (Default value = None) **kwargs : - Returns ------- @@ -131,7 +129,6 @@ def to(self, *args, **kwargs): **kwargs : - Returns ------- @@ -159,7 +156,13 @@ class ConditionalGaussianMADE(BaseConditionalFlow): """ """ def __init__( - self, n_conditionals, n_inputs, n_hiddens, activation="relu", input_order="sequential", mode="sequential" + self, + n_conditionals, + n_inputs, + n_hiddens, + activation="relu", + input_order="sequential", + mode="sequential", ): super(ConditionalGaussianMADE, self).__init__(n_conditionals, n_inputs) @@ -198,7 +201,10 @@ def forward(self, theta, x, **kwargs): x : **kwargs : +<<<<<<< HEAD +======= +>>>>>>> src: utils/ml/models module improve fmt Returns ------- @@ -267,7 +273,6 @@ def generate_samples(self, theta, u=None, **kwargs): (Default value = None) **kwargs : - Returns ------- @@ -306,7 +311,6 @@ def to(self, *args, **kwargs): **kwargs : - Returns ------- diff --git a/madminer/utils/ml/models/made_mog.py b/madminer/utils/ml/models/made_mog.py index db274fb72..4d119a788 100644 --- a/madminer/utils/ml/models/made_mog.py +++ b/madminer/utils/ml/models/made_mog.py @@ -73,7 +73,6 @@ def forward(self, theta, x, **kwargs): **kwargs : - Returns ------- @@ -174,7 +173,8 @@ def generate_samples(self, theta, u=None, **kwargs): u = u.to(*self.to_args, **self.to_kwargs) for i in range(1, self.n_inputs + 1): - self.forward(theta, x) # Sets Gaussian parameters (self.m and self.logp) and mixture coeffs (self.loga) + # Sets Gaussian parameters (self.m and self.logp) and mixture coeffs (self.loga) + self.forward(theta, x) ix = np.argwhere(self.input_order == i)[0, 0] diff --git a/madminer/utils/ml/models/maf.py b/madminer/utils/ml/models/maf.py index 5daa413f7..9df44f447 100644 --- a/madminer/utils/ml/models/maf.py +++ b/madminer/utils/ml/models/maf.py @@ -157,7 +157,7 @@ def __init__( if not (isinstance(input_order, str) and input_order != "random"): input_order = made.input_order[::-1] - # Batch normalizatino + # Batch normalization self.bns = None if self.batch_norm: self.bns = nn.ModuleList() @@ -220,7 +220,6 @@ def generate_samples(self, theta, u=None, **kwargs): (Default value = None) **kwargs : - Returns ------- @@ -252,7 +251,6 @@ def to(self, *args, **kwargs): **kwargs : - Returns ------- diff --git a/madminer/utils/ml/models/maf_mog.py b/madminer/utils/ml/models/maf_mog.py index be00d8a9f..78468feb7 100644 --- a/madminer/utils/ml/models/maf_mog.py +++ b/madminer/utils/ml/models/maf_mog.py @@ -65,7 +65,7 @@ def __init__( mode=mode, ) - # Batch normalizatino + # Batch normalization self.bns = None if self.batch_norm: self.bns = nn.ModuleList() diff --git a/madminer/utils/ml/models/score.py b/madminer/utils/ml/models/score.py index 3ca339ac3..02575a89d 100644 --- a/madminer/utils/ml/models/score.py +++ b/madminer/utils/ml/models/score.py @@ -53,9 +53,13 @@ def forward(self, x, return_grad_x=False): # Calculate gradient if return_grad_x: - x_gradient = grad(t_hat, x, grad_outputs=torch.ones_like(t_hat.data), only_inputs=True, create_graph=True)[ - 0 - ] + x_gradient = grad( + t_hat, + x, + grad_outputs=torch.ones_like(t_hat.data), + only_inputs=True, + create_graph=True, + )[0] return t_hat, x_gradient From b8acb4c5c9d79a6d027ab7dcbfb957fad38ce5b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Thu, 24 Sep 2020 11:44:12 -0400 Subject: [PATCH 55/65] src: utils/ml/models module remove 'future' --- madminer/utils/ml/models/base.py | 3 +-- madminer/utils/ml/models/batch_norm.py | 4 +--- madminer/utils/ml/models/made.py | 6 ++---- madminer/utils/ml/models/made_mog.py | 7 +++---- madminer/utils/ml/models/maf.py | 5 ++--- madminer/utils/ml/models/maf_mog.py | 7 +++---- madminer/utils/ml/models/masks.py | 7 +++---- madminer/utils/ml/models/ratio.py | 9 ++++----- madminer/utils/ml/models/score.py | 7 +++---- 9 files changed, 22 insertions(+), 33 deletions(-) diff --git a/madminer/utils/ml/models/base.py b/madminer/utils/ml/models/base.py index 3c40fd261..1772c3a72 100644 --- a/madminer/utils/ml/models/base.py +++ b/madminer/utils/ml/models/base.py @@ -1,8 +1,7 @@ -from __future__ import absolute_import, division, print_function - import numpy as np import torch import torch.nn as nn + from torch.autograd import grad diff --git a/madminer/utils/ml/models/batch_norm.py b/madminer/utils/ml/models/batch_norm.py index 3fe14349c..caa39f1d6 100644 --- a/madminer/utils/ml/models/batch_norm.py +++ b/madminer/utils/ml/models/batch_norm.py @@ -1,11 +1,9 @@ -from __future__ import absolute_import, division, print_function - import logging import numpy.random as rng import torch -from torch import tensor from madminer.utils.ml.models.base import BaseFlow +from torch import tensor logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/models/made.py b/madminer/utils/ml/models/made.py index 5f375bd18..8f1145f1a 100644 --- a/madminer/utils/ml/models/made.py +++ b/madminer/utils/ml/models/made.py @@ -1,16 +1,14 @@ -from __future__ import absolute_import, division, print_function - +import logging import numpy as np import numpy.random as rng import torch -from torch import tensor import torch.nn as nn import torch.nn.functional as F from madminer.utils.ml.models.base import BaseFlow, BaseConditionalFlow from madminer.utils.ml.models.masks import create_degrees, create_masks, create_weights, create_weights_conditional from madminer.utils.ml.utils import get_activation_function -import logging +from torch import tensor logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/models/made_mog.py b/madminer/utils/ml/models/made_mog.py index 4d119a788..15fe54f73 100644 --- a/madminer/utils/ml/models/made_mog.py +++ b/madminer/utils/ml/models/made_mog.py @@ -1,14 +1,13 @@ -from __future__ import absolute_import, division, print_function - +import logging import numpy as np import numpy.random as rng import torch -from torch import tensor import torch.nn.functional as F + from madminer.utils.ml.models.base import BaseConditionalFlow from madminer.utils.ml.models.masks import create_degrees, create_masks, create_weights_conditional from madminer.utils.ml.utils import get_activation_function -import logging +from torch import tensor logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/models/maf.py b/madminer/utils/ml/models/maf.py index 9df44f447..22aa77141 100644 --- a/madminer/utils/ml/models/maf.py +++ b/madminer/utils/ml/models/maf.py @@ -1,12 +1,11 @@ -from __future__ import absolute_import, division, print_function - +import logging import numpy.random as rng import torch.nn as nn + from torch import tensor from madminer.utils.ml.models.base import BaseFlow, BaseConditionalFlow from madminer.utils.ml.models.made import GaussianMADE, ConditionalGaussianMADE from madminer.utils.ml.models.batch_norm import BatchNorm -import logging logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/models/maf_mog.py b/madminer/utils/ml/models/maf_mog.py index 78468feb7..73b9adf40 100644 --- a/madminer/utils/ml/models/maf_mog.py +++ b/madminer/utils/ml/models/maf_mog.py @@ -1,11 +1,10 @@ -from __future__ import absolute_import, division, print_function - +import logging import torch.nn as nn + from madminer.utils.ml.models.base import BaseConditionalFlow -from madminer.utils.ml.models.made import ConditionalGaussianMADE from madminer.utils.ml.models.batch_norm import BatchNorm +from madminer.utils.ml.models.made import ConditionalGaussianMADE from madminer.utils.ml.models.made_mog import ConditionalMixtureMADE -import logging logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/models/masks.py b/madminer/utils/ml/models/masks.py index dfa2875e6..e677794bf 100644 --- a/madminer/utils/ml/models/masks.py +++ b/madminer/utils/ml/models/masks.py @@ -1,10 +1,9 @@ -from __future__ import absolute_import, division, print_function - +import logging import numpy as np import numpy.random as rng -from torch import tensor import torch.nn as nn -import logging + +from torch import tensor logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/models/ratio.py b/madminer/utils/ml/models/ratio.py index a1821d5dc..bfd06df8f 100644 --- a/madminer/utils/ml/models/ratio.py +++ b/madminer/utils/ml/models/ratio.py @@ -1,11 +1,10 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - +import logging +import numpy as np import torch import torch.nn as nn -import numpy as np -from torch.autograd import grad + from madminer.utils.ml.utils import get_activation_function, check_for_nan, check_for_nonpos, NanException -import logging +from torch.autograd import grad logger = logging.getLogger(__name__) diff --git a/madminer/utils/ml/models/score.py b/madminer/utils/ml/models/score.py index 02575a89d..9d28e734d 100644 --- a/madminer/utils/ml/models/score.py +++ b/madminer/utils/ml/models/score.py @@ -1,10 +1,9 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - +import logging import torch import torch.nn as nn -from torch.autograd import grad + from madminer.utils.ml.utils import get_activation_function -import logging +from torch.autograd import grad logger = logging.getLogger(__name__) From bd06b891f58d4bee27f7d0892ae0e729a825cd09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 26 Oct 2020 15:53:32 -0400 Subject: [PATCH 56/65] fix: limits module fmt + typos --- madminer/limits/asymptotic_limits.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/madminer/limits/asymptotic_limits.py b/madminer/limits/asymptotic_limits.py index b3c46da91..4dec96b0e 100644 --- a/madminer/limits/asymptotic_limits.py +++ b/madminer/limits/asymptotic_limits.py @@ -82,7 +82,7 @@ def observed_limits( `x_observed` specifies the observed data as an array of observables, using the same observables and their order as used throughout the MadMiner workflow. - The p-values with frequent hypothesis tests using the likelihood ratio as test statistic. The asymptotic + The p-values with frequentist hypothesis tests using the likelihood ratio as test statistic. The asymptotic approximation is used, see https://arxiv.org/abs/1007.1727. Depending on the keyword `mode`, the likelihood ratio is calculated with one of several different methods: @@ -1089,11 +1089,17 @@ def _calculate_log_likelihood_xsec(self, n_observed, theta_grid, luminosity=3000 def _calculate_log_likelihood_ratio_kinematics(self, x_observed, theta_grid, model, theta1=None): if isinstance(model, ParameterizedRatioEstimator): log_r, _ = model.evaluate_log_likelihood_ratio( - x=x_observed, theta=theta_grid, test_all_combinations=True, evaluate_score=False + x=x_observed, + theta=theta_grid, + test_all_combinations=True, + evaluate_score=False, ) elif isinstance(model, LikelihoodEstimator): log_r, _ = model.evaluate_log_likelihood( - x=x_observed, theta=theta_grid, test_all_combinations=True, evaluate_score=False + x=x_observed, + theta=theta_grid, + test_all_combinations=True, + evaluate_score=False, ) elif isinstance(model, Ensemble) and model.estimator_type == "parameterized_ratio": log_r, _ = model.evaluate_log_likelihood_ratio( @@ -1114,7 +1120,7 @@ def _calculate_log_likelihood_ratio_kinematics(self, x_observed, theta_grid, mod else: raise NotImplementedError( "Likelihood ratio estimation is currently only implemented for " - "ParameterizedRatioEstimator and LikelihoodEstimator instancees" + "ParameterizedRatioEstimator and LikelihoodEstimator instances" ) return log_r From 4ffbb0243edbf081a3b958e7abe336bafba1efe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Mon, 26 Oct 2020 16:11:26 -0400 Subject: [PATCH 57/65] fix: utils/interfaces module multi-line cmds --- madminer/utils/interfaces/mg.py | 61 +++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/madminer/utils/interfaces/mg.py b/madminer/utils/interfaces/mg.py index 0f436c897..2363c9c67 100644 --- a/madminer/utils/interfaces/mg.py +++ b/madminer/utils/interfaces/mg.py @@ -237,34 +237,41 @@ def setup_mg_with_scripts( # Card copying commands copy_commands = "" if run_card_file_from_mgprocdir is not None: - copy_commands += f"cp " \ - f"{mg_process_directory_placeholder}/{run_card_file_from_mgprocdir} " \ + copy_commands += ( + f"cp " + f"{mg_process_directory_placeholder}/{run_card_file_from_mgprocdir} " f"{mg_process_directory_placeholder}/Cards/run_card.dat\n" - + ) if param_card_file_from_mgprocdir is not None: - copy_commands += f"cp " \ - f"{mg_process_directory_placeholder}/{param_card_file_from_mgprocdir} " \ + copy_commands += ( + f"cp " + f"{mg_process_directory_placeholder}/{param_card_file_from_mgprocdir} " f"{mg_process_directory_placeholder}/Cards/param_card.dat\n" - + ) if reweight_card_file_from_mgprocdir is not None and not is_background: - copy_commands += f"cp " \ - f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " \ + copy_commands += ( + f"cp " + f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " f"{mg_process_directory_placeholder}/Cards/reweight_card.dat\n" - + ) if pythia8_card_file_from_mgprocdir is not None and order == "LO": - copy_commands += f"cp " \ - f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " \ + copy_commands += ( + f"cp " + f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " f"{mg_process_directory_placeholder}/Cards/pythia8_card.dat\n" - + ) if pythia8_card_file_from_mgprocdir is not None and order == "NLO": - copy_commands += f"cp " \ - f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " \ + copy_commands += ( + f"cp " + f"{mg_process_directory_placeholder}/{pythia8_card_file_from_mgprocdir} " f"{mg_process_directory_placeholder}/Cards/shower_card.dat\n" - + ) if configuration_file_from_mgprocdir is not None: - copy_commands += f"cp " \ - f"{mg_process_directory_placeholder}/{configuration_file_from_mgprocdir} " \ + copy_commands += ( + f"cp " + f"{mg_process_directory_placeholder}/{configuration_file_from_mgprocdir} " f"{mg_process_directory_placeholder}/Cards/me5_configuration.txt\n" + ) # Replace environment variable in proc card replacement_command = """sed -e 's@\$mgprocdir@'"$mgprocdir"'@' {}/{} > {}/{}""".format( @@ -303,11 +310,12 @@ def setup_mg_with_scripts( make_file_executable(script_file) # How to call it from master script - call_placeholder = \ - f"{mg_process_directory_placeholder}/{script_file_from_mgprocdir} " \ - f"{mg_directory_placeholder} " \ - f"{mg_process_directory_placeholder} " \ + call_placeholder = ( + f"{mg_process_directory_placeholder}/{script_file_from_mgprocdir} " + f"{mg_directory_placeholder} " + f"{mg_process_directory_placeholder} " f"{log_dir_placeholder}" + ) return call_placeholder @@ -518,9 +526,11 @@ def setup_mg_reweighting_with_scripts( # Card copying commands if reweight_card_file_from_mgprocdir is not None: - copy_commands = f"cp " \ - f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " \ + copy_commands = ( + f"cp " + f"{mg_process_directory_placeholder}/{reweight_card_file_from_mgprocdir} " f"{mg_process_directory_placeholder}/Cards/reweight_card.dat\n" + ) else: copy_commands = "" @@ -544,9 +554,10 @@ def setup_mg_reweighting_with_scripts( make_file_executable(script_file) # How to call it from master script - call_instruction = \ - f"{mg_process_directory}/{script_file_from_mgprocdir} " \ + call_instruction = ( + f"{mg_process_directory}/{script_file_from_mgprocdir} " f"[MG_process_directory] [log_directory]" + ) return call_instruction From 636bb3df14b2b069c2adec3cec684b6056cea02a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 20 Apr 2021 09:50:05 -0400 Subject: [PATCH 58/65] fix: utils/interfaces module remove python2 --- madminer/utils/interfaces/mg.py | 53 +++++++++------------------------ 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/madminer/utils/interfaces/mg.py b/madminer/utils/interfaces/mg.py index 2363c9c67..92d2ef73d 100644 --- a/madminer/utils/interfaces/mg.py +++ b/madminer/utils/interfaces/mg.py @@ -15,7 +15,6 @@ def generate_mg_process( ufo_model_directory=None, log_file=None, initial_command=None, - explicit_python_call=False, python_executable=None, ): @@ -47,9 +46,6 @@ def generate_mg_process( log_file : str or None, optional Path to a log file in which the MadGraph output is saved. Default value: None. - explicit_python_call : bool, optional - Calls `python2.7` instead of `python`. - python_executable : None or str, optional Overwrites the default Python executable @@ -76,16 +72,11 @@ def generate_mg_process( myfile.write(f"\n") myfile.write(f"output {mg_process_directory}") - # Call MG5 - initial_command = f"{initial_command}; " if initial_command else "" - - # Explicitly call Python 2 if necessary - if explicit_python_call: - python_call = f"{python_executable} " if python_executable is not None else "python2.7 " - else: - python_call = "" + # Call specific initial command and Python binary + initial_command = f"{initial_command}; " if initial_command is not None else "" + python_binary = f"{python_executable} " if python_executable is not None else "" - command = f"{initial_command}{python_call}{mg_directory}/bin/mg5_aMC {temp_proc_card_file}" + command = f"{initial_command}{python_binary}{mg_directory}/bin/mg5_aMC {temp_proc_card_file}" logger.info(f"Calling MadGraph: {command}") _ = call_command(cmd=command, log_file=log_file) @@ -104,7 +95,6 @@ def setup_mg_with_scripts( initial_command=None, log_dir=None, log_file_from_logdir=None, - explicit_python_call=False, order="LO", python_executable=None, ): @@ -160,9 +150,6 @@ def setup_mg_with_scripts( Path to a log file in which the MadGraph output is saved, relative from the default log directory. Default value: None. - explicit_python_call : bool, optional - Calls `python2.7` instead of `python`. - python_executable : None or str, optional Overwrites the default Python executable @@ -230,10 +217,6 @@ def setup_mg_with_scripts( with open(proc_card_filename, "w") as file: file.write(mg_commands) - # Initial commands - if initial_command is None: - initial_command = "" - # Card copying commands copy_commands = "" if run_card_file_from_mgprocdir is not None: @@ -281,11 +264,9 @@ def setup_mg_with_scripts( "Cards/mg_commands.mg5", ) - # Explicitly call Python 2 if necessary - if explicit_python_call: - python_call = python_executable + " " if python_executable is not None else "python2.7 " - else: - python_call = "" + # Call specific initial command and Python binary + initial_command = f"{initial_command} " if initial_command is not None else "" + python_binary = f"{python_executable} " if python_executable is not None else "" # Put together script script = ( @@ -297,7 +278,7 @@ def setup_mg_with_scripts( placeholder_definition, copy_commands, replacement_command, - python_call, + python_binary, mg_directory_placeholder, mg_process_directory_placeholder, "Cards/mg_commands.mg5", @@ -332,7 +313,6 @@ def run_mg( is_background=False, initial_command=None, log_file=None, - explicit_python_call=False, order="LO", python_executable=None, ): @@ -382,8 +362,8 @@ def run_mg( log_file : str or None, optional Path to a log file in which the MadGraph output is saved. Default value: None. - explicit_python_call : bool, optional - Calls `python2.7` instead of `python`. + python_executable : None or str, optional + Overwrites the default Python executable Returns ------- @@ -439,16 +419,11 @@ def run_mg( with open(proc_card_filename, "w") as file: file.write(mg_commands) - # Call MG5 - initial_command = f"{initial_command}; " if initial_command else "" - - # Explicitly call Python 2 if necessary - if explicit_python_call: - python_call = f"{python_executable} " if python_executable is not None else "python2.7 " - else: - python_call = "" + # Call specific initial command and Python binary + initial_command = f"{initial_command}; " if initial_command is not None else "" + python_binary = f"{python_executable} " if python_executable is not None else "" - command = f"{initial_command}{python_call}{mg_directory}/bin/mg5_aMC {proc_card_filename}" + command = f"{initial_command}{python_binary}{mg_directory}/bin/mg5_aMC {proc_card_filename}" logger.info(f"Calling MadGraph: {command}") _ = call_command(cmd=command, log_file=log_file) From 1ca1e3888896c128ffb8f66118af532723ab0e1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 20 Apr 2021 10:25:31 -0400 Subject: [PATCH 59/65] fix: utils/interfaces module remove 'six' --- madminer/utils/interfaces/madminer_hdf5.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py index cf721799d..864c4ec03 100644 --- a/madminer/utils/interfaces/madminer_hdf5.py +++ b/madminer/utils/interfaces/madminer_hdf5.py @@ -359,12 +359,12 @@ def _save_finite_differences(filename, finite_difference_benchmarks, finite_diff io_tag = "a" # Read-write if file exists, otherwise create with h5py.File(filename, io_tag) as f: n_keys = len(finite_difference_benchmarks) - n_values = len(finite_difference_benchmarks[list(six.iterkeys(finite_difference_benchmarks))[0]]) + n_values = len(finite_difference_benchmarks[list(finite_difference_benchmarks.keys())[0]]) - keys_ascii = [key.encode("ascii", "ignore") for key in six.iterkeys(finite_difference_benchmarks)] + keys_ascii = [key.encode("ascii", "ignore") for key in finite_difference_benchmarks.keys()] values_ascii = [ - [val.encode("ascii", "ignore") for val in six.itervalues(values)] - for values in six.itervalues(finite_difference_benchmarks) + [val.encode("ascii", "ignore") for val in values.values()] + for values in finite_difference_benchmarks.values() ] f.create_dataset("finite_differences/base_benchmarks", (n_keys,), dtype="S256", data=keys_ascii) From e898f82a3b9ea1add99e379a112fa19de7a263d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 20 Apr 2021 10:11:54 -0400 Subject: [PATCH 60/65] fix: core module remove python2 --- madminer/core/madminer.py | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py index 9cafddfc0..77ec1266b 100644 --- a/madminer/core/madminer.py +++ b/madminer/core/madminer.py @@ -716,7 +716,6 @@ def run( log_directory=None, temp_directory=None, initial_command=None, - python2_override=False, systematics=None, order="LO", python_executable=None, @@ -793,11 +792,6 @@ def run( Initial shell commands that have to be executed before MG is run (e.g. to load a virtual environment). Default value: None. - python2_override : bool, optional - If True, MadMiner explicitly calls "python2" instead of relying on the system Python version to be - Python 2.6 or Python 2.7. If you use systematics, make sure that the python interface of LHAPDF was compiled - with the Python version you are using. Default: False. - systematics : None or list of str, optional If list of str, defines which systematics are used for this run. @@ -832,7 +826,6 @@ def run( log_directory=log_directory, temp_directory=temp_directory, initial_command=initial_command, - python2_override=python2_override, systematics=systematics, order=order, python_executable=python_executable, @@ -854,7 +847,6 @@ def run_multiple( log_directory=None, temp_directory=None, initial_command=None, - python2_override=False, systematics=None, order="LO", python_executable=None, @@ -924,11 +916,6 @@ def run_multiple( If not specified and `python2_override` is True, it adds the user-installed Python2 binaries to the PATH. Default value: None. - python2_override : bool, optional - If True, MadMiner explicitly calls "python2" instead of relying on the system Python version to be - Python 2.6 or Python 2.7. If you use systematics, make sure that the python interface of LHAPDF was compiled - with the Python version you are using. Default: False. - systematics : None or list of str, optional If list of str, defines which systematics are used for these runs. @@ -958,19 +945,14 @@ def run_multiple( if sample_benchmarks is None: sample_benchmarks = [benchmark for benchmark in self.benchmarks] - # Python 2 override options - - # Gives 'python2_override' full power if 'initial_command' is empty. - # (Reference: https://github.com/diana-hep/madminer/issues/422) - if python2_override and initial_command is None and not python_executable: - logger.warning( - "The keyword python2_override is discouraged. " - "Instead, consider using python_executable." - ) + # This snippet is useful when using virtual envs. + # (Derives from a Python2 - Python3 issue). + # Ref: https://github.com/diana-hep/madminer/issues/422 + if python_executable and initial_command is None: + logger.info(f"Adding {python_executable} bin folder to PATH") + binary_path = os.popen(f"command -v {python_executable}").read().strip() + binary_folder = os.path.dirname(binary_path) - logger.info("Adding Python2.7 bin folder to PATH") - binary_path = os.popen("command -v python2.7").read().strip() - binary_folder = os.path.dirname(os.path.realpath(binary_path)) initial_command = f"export PATH={binary_folder}:$PATH" logger.info(f"Using Python executable {binary_path}") @@ -985,7 +967,6 @@ def run_multiple( ufo_model_directory=ufo_model_directory, initial_command=initial_command, log_file=log_file_generate, - explicit_python_call=python2_override or (python_executable is not None), python_executable=python_executable, ) @@ -1089,7 +1070,6 @@ def run_multiple( initial_command=initial_command, log_dir=log_directory, log_file_from_logdir=log_file_run, - explicit_python_call=python2_override or (python_executable is not None), python_executable=python_executable, order=order, ) @@ -1107,7 +1087,6 @@ def run_multiple( is_background=is_background, initial_command=initial_command, log_file=f"{log_directory}/{log_file_run}", - explicit_python_call=python2_override or (python_executable is not None), python_executable=python_executable, order=order, ) From c31c9055b26c08704093abffc473dd438acc0f21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 20 Apr 2021 10:01:39 -0400 Subject: [PATCH 61/65] fix: core module remove 'six' --- madminer/core/madminer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/madminer/core/madminer.py b/madminer/core/madminer.py index 77ec1266b..3c5d94e22 100644 --- a/madminer/core/madminer.py +++ b/madminer/core/madminer.py @@ -398,12 +398,11 @@ def finite_differences(self, epsilon=0.01): self.finite_difference_benchmarks = OrderedDict() self.finite_difference_epsilon = epsilon - for benchmark_key, benchmark_spec in six.iteritems( - self.benchmarks.copy() - ): # Copy is necessary to avoid endless loop :/ + # Copy is necessary to avoid endless loop :/ + for benchmark_key, benchmark_spec in self.benchmarks.copy().items(): fd_keys = {} - for param_key, param_value in six.iteritems(benchmark_spec): + for param_key, param_value in benchmark_spec.items(): fd_key = benchmark_key + "_plus_" + param_key fd_spec = benchmark_spec.copy() fd_spec[param_key] += epsilon From 2da8c96611f8ed1e12594e0eeb735617abcdddfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 20 Apr 2021 10:21:00 -0400 Subject: [PATCH 62/65] docs: update installation guide --- docs/installation.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index b6b79ce1b..269c06bda 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -3,10 +3,8 @@ ## Simulator dependencies Make sure the following tools are installed and running: -- MadGraph (we've tested our setup with MG5_aMC v2.6.2 and v2.6.5). See - [https://launchpad.net/mg5amcnlo](https://launchpad.net/mg5amcnlo) for installation instructions. Note that MadGraph - requires a Fortran compiler as well as Python 2.6 or 2.7. (Note that you can still run most MadMiner analysis steps - with Python 3.) +- MadGraph (we've tested our setup with MG5_aMC v2.8.0+). See [https://launchpad.net/mg5amcnlo](https://launchpad.net/mg5amcnlo) + for installation instructions. Note that MadGraph requires a Fortran compiler as well as Python 3.6+. - For the analysis of systematic uncertainties, LHAPDF6 has to be installed with Python support (see also [the documentation of MadGraph's systematics tool](https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics)). From 5e86402b659dad3fe7f4a7910f4fe1d6a474f2df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Fri, 16 Apr 2021 09:42:47 -0400 Subject: [PATCH 63/65] Bound uproot between 3.11 and 4.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 081a28cdf..42f5b291b 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ "scipy>=1.0.0", "scikit-hep>=0.5.0, <0.6.0", "torch>=1.0.0", - "uproot", + "uproot>=3.11.0,<4.0.0", ] EXTRAS_DOCS = sorted( From 6379ad6a860e3a55e04ad95fe522c6570aa26310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Fri, 16 Apr 2021 12:48:48 -0400 Subject: [PATCH 64/65] Set uproot3 instead of uproot Co-authored-by: Matthew Feickert --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 42f5b291b..b45ea5ff2 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ "scipy>=1.0.0", "scikit-hep>=0.5.0, <0.6.0", "torch>=1.0.0", - "uproot>=3.11.0,<4.0.0", + "uproot3>=3.14.1", ] EXTRAS_DOCS = sorted( From 9cc0cac6611a8f324511c70dce734089b8cc71ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Fri, 16 Apr 2021 12:56:34 -0400 Subject: [PATCH 65/65] Use uproot3 instead of uproot (src) --- madminer/utils/interfaces/delphes_root.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py index ba6b5cca7..cca262864 100644 --- a/madminer/utils/interfaces/delphes_root.py +++ b/madminer/utils/interfaces/delphes_root.py @@ -1,7 +1,7 @@ import logging import numpy as np import os -import uproot +import uproot3 from collections import OrderedDict from madminer.utils.particle import MadMinerParticle @@ -39,7 +39,7 @@ def parse_delphes_root_file( logger.debug("Extracting weights %s", weight_labels) # Delphes ROOT file - root_file = uproot.open(delphes_sample_file) + root_file = uproot3.open(delphes_sample_file) # Delphes tree tree = root_file["Delphes"]