diff --git a/.github/workflows/auto-cancellation.yml b/.github/workflows/auto-cancellation.yml deleted file mode 100644 index 8c33f6c7..00000000 --- a/.github/workflows/auto-cancellation.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: auto cancellation running job -on: pull_request - -jobs: - cancel: - name: auto-cancellation-running-action - runs-on: ubuntu-latest - steps: - - uses: fauguste/auto-cancellation-running-action@0.1.4 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a7ff0069..ea6afc49 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,17 +6,27 @@ on: branches: [ "master"] release: types: - - "published" + - "published" jobs: + cancel: + name: 'Cancel Previous Runs' + runs-on: ubuntu-latest + timeout-minutes: 3 + steps: + - uses: styfle/cancel-workflow-action@0.10.0 + with: + all_but_latest: true + access_token: ${{ github.token }} + pre-commit: name: Format runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: actions/setup-python@v4 checks: runs-on: ${{ matrix.os }} @@ -24,12 +34,12 @@ jobs: fail-fast: false matrix: os: - - ubuntu-latest + - ubuntu-latest python-version: - - "3.7" - - "3.8" - - "3.9" - - "3.10" + - "3.7" + - "3.8" + - "3.9" + - "3.10" include: - os: windows-latest python-version: "3.7" @@ -41,87 +51,87 @@ jobs: python-version: "3.10" name: Check Python ${{ matrix.python-version }} ${{ matrix.os }} steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Install package - run: python -m pip install -e .[test] - - - name: Test package - run: python -m pytest --doctest-modules --cov=hepstats --cov-report=xml - - - name: Upload coverage to Codecov - if: matrix.python-version == '3.10' && matrix.os == 'ubuntu-latest' - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - name: codecov-umbrella - fail_ci_if_error: true + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install package + run: python -m pip install -e .[test] + + - name: Test package + run: python -m pytest --doctest-modules --cov=hepstats --cov-report=xml + + - name: Upload coverage to Codecov + if: matrix.python-version == '3.10' && matrix.os == 'ubuntu-latest' + uses: codecov/codecov-action@v3 + with: + file: ./coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: true dist: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 + - uses: actions/checkout@v3 + with: + fetch-depth: 0 - - name: Build - run: pipx run build + - name: Build + run: pipx run build - - uses: actions/upload-artifact@v3 - with: - path: dist/* + - uses: actions/upload-artifact@v3 + with: + path: dist/* - - name: Check metadata - run: pipx run twine check dist/* + - name: Check metadata + run: pipx run twine check dist/* docs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - name: Install dependencies - run: | - pip install -U -q -e .[docs] - pip list - - name: build docs - run: | - sphinx-build -b html docs docs/_build/html - touch docs/_build/html/.nojekyll - - - name: Deploy docs to GitHub Pages - if: success() && github.event_name == 'push' && github.ref == 'refs/heads/master' - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: docs/_build/html - force_orphan: true - user_name: 'github-actions[bot]' - user_email: 'github-actions[bot]@users.noreply.github.com' - commit_message: Deploy to GitHub pages + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: | + pip install -U -q -e .[docs] + pip list + - name: build docs + run: | + sphinx-build -b html docs docs/_build/html + touch docs/_build/html/.nojekyll + + - name: Deploy docs to GitHub Pages + if: success() && github.event_name == 'push' && github.ref == 'refs/heads/master' + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/_build/html + force_orphan: true + user_name: 'github-actions[bot]' + user_email: 'github-actions[bot]@users.noreply.github.com' + commit_message: Deploy to GitHub pages publish: - needs: [dist] + needs: [ dist ] runs-on: ubuntu-latest if: github.event_name == 'release' && github.event.action == 'published' steps: - - uses: actions/download-artifact@v3 - with: - name: artifact - path: dist - - - uses: pypa/gh-action-pypi-publish@v1.5.1 - with: - password: ${{ secrets.pypi_password }} + - uses: actions/download-artifact@v3 + with: + name: artifact + path: dist + + - uses: pypa/gh-action-pypi-publish@v1.5.1 + with: + password: ${{ secrets.pypi_password }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 29419ede..f1151918 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,94 +1,99 @@ repos: - - repo: https://github.com/psf/black - rev: 22.8.0 - hooks: - - id: black - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: check-added-large-files - args: [ '--maxkb=1000' ] - - id: mixed-line-ending - exclude: ^notebooks/ - - id: trailing-whitespace - exclude: ^notebooks/ - - id: check-merge-conflict - - id: check-case-conflict - - id: check-symlinks - - id: check-yaml - exclude: ^notebooks/ - - id: requirements-txt-fixer - - id: debug-statements - - id: end-of-file-fixer - - repo: https://github.com/mgedmin/check-manifest - rev: "0.48" - hooks: - - id: check-manifest - args: [ --update, --no-build-isolation ] - additional_dependencies: [ setuptools-scm ] - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.971 - hooks: - - id: mypy - files: src +- repo: https://github.com/psf/black + rev: 22.6.0 + hooks: + - id: black +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-added-large-files + args: ['--maxkb=1000'] + - id: mixed-line-ending + exclude: ^notebooks/ + - id: trailing-whitespace + exclude: ^notebooks/ + - id: check-merge-conflict + - id: check-case-conflict + - id: check-symlinks + - id: check-yaml + exclude: ^notebooks/ + - id: requirements-txt-fixer + - id: debug-statements + - id: end-of-file-fixer +- repo: https://github.com/mgedmin/check-manifest + rev: "0.48" + hooks: + - id: check-manifest + args: [ --update, --no-build-isolation ] + additional_dependencies: [ setuptools-scm ] +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.971 + hooks: + - id: mypy + files: src - - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.9.0 - hooks: - - id: python-use-type-annotations - - id: python-check-mock-methods - - id: python-no-eval - - id: rst-backticks - - id: rst-directive-colons +- repo: https://github.com/roy-ht/pre-commit-jupyter + rev: v1.2.1 + hooks: + - id: jupyter-notebook-cleanup - - repo: https://github.com/asottile/pyupgrade - rev: v2.37.3 - hooks: - - id: pyupgrade - args: [ --py37-plus ] +- repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.9.0 + hooks: + - id: python-use-type-annotations + - id: python-check-mock-methods + - id: python-no-eval + - id: rst-backticks + - id: rst-directive-colons - - repo: https://github.com/asottile/setup-cfg-fmt - rev: v2.0.0 - hooks: - - id: setup-cfg-fmt +- repo: https://github.com/asottile/pyupgrade + rev: v2.37.3 + hooks: + - id: pyupgrade + args: [ --py37-plus ] - # Notebook formatting - - repo: https://github.com/nbQA-dev/nbQA - rev: 1.4.0 - hooks: - - id: nbqa-isort - additional_dependencies: [ isort==5.6.4 ] +- repo: https://github.com/asottile/setup-cfg-fmt + rev: v2.0.0 + hooks: + - id: setup-cfg-fmt - - id: nbqa-pyupgrade - additional_dependencies: [ pyupgrade==2.7.4 ] - args: [ --py37-plus ] +# Notebook formatting +- repo: https://github.com/nbQA-dev/nbQA + rev: 1.4.0 + hooks: + - id: nbqa-isort + additional_dependencies: [ isort==5.6.4 ] + - id: nbqa-pyupgrade + additional_dependencies: [ pyupgrade==2.7.4 ] + args: [ --py37-plus ] - - repo: https://github.com/roy-ht/pre-commit-jupyter - rev: v1.2.1 - hooks: - - id: jupyter-notebook-cleanup - - repo: https://github.com/sondrelg/pep585-upgrade - rev: 'v1.0' - hooks: - - id: upgrade-type-hints - args: [ '--futures=true' ] +- repo: https://github.com/roy-ht/pre-commit-jupyter + rev: v1.2.1 + hooks: + - id: jupyter-notebook-cleanup - - repo: https://github.com/shssoichiro/oxipng - rev: acdd66b4c5a5fda8b08281ad9b3216327b6847b4 - hooks: - - id: oxipng +- repo: https://github.com/sondrelg/pep585-upgrade + rev: 'v1.0' + hooks: + - id: upgrade-type-hints + args: [ '--futures=true' ] - - repo: https://github.com/dannysepler/rm_unneeded_f_str - rev: v0.1.0 - hooks: - - id: rm-unneeded-f-str +- repo: https://github.com/shssoichiro/oxipng + rev: acdd66b4c5a5fda8b08281ad9b3216327b6847b4 + hooks: + - id: oxipng - - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.18.2 - hooks: - - id: check-github-workflows - - id: check-github-actions - - id: check-dependabot - - id: check-readthedocs +- repo: https://github.com/dannysepler/rm_unneeded_f_str + rev: v0.1.0 + hooks: + - id: rm-unneeded-f-str + +- repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.18.2 + hooks: + - id: check-github-workflows + - id: check-github-actions + - id: check-dependabot + - id: check-readthedocs diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6c983076..0bd52acc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,7 +13,7 @@ Version 0.5.0 Version 0.4.0 ************* -* loss: upgrade API to use `create_new` to make sure that the losses are comparable. Compatible with zfit 0.6.4+ +* loss: upgrade API to use ``create_new`` to make sure that the losses are comparable. Compatible with zfit 0.6.4+ Version 0.3.1 ************* @@ -24,7 +24,7 @@ Version 0.3.1 Version 0.3.0 ************* * New documentation style -* `hepstats` can now do hypothesis tests, and compute upper limits and confidence intervals for counting analysis +* **hepstats** can now do hypothesis tests, and compute upper limits and confidence intervals for counting analysis * Progess bars are used to see the progression of the generation of the toys Version 0.2.5 @@ -34,8 +34,8 @@ Version 0.2.5 function if the loss is not extended * **hepstats.hypotests** can now be used even if there is no nuisances. The **pll** function in **utils/fit/diverse.py** had to be modified such that if there are no nuisances, the **pll** function returns the value of the loss function. -* add notebooks demos for FC intervals with the `FrequentistCalculator` and `AsymptoticCalculator`. -* add warnings when multiple roots are found in `ConfidenceInterval` +* add notebooks demos for FC intervals with the ``FrequentistCalculator`` and ``AsymptoticCalculator``. +* add warnings when multiple roots are found in ``ConfidenceInterval`` * move toys .yml files from notebook to notebook/toys Version 0.2.4 @@ -59,8 +59,8 @@ Version 0.2.1 * Addition of the **FrequentistCalculator** to performs hypothesis test, upper limit and interval calculations with toys. Toys can be saved and loaded in / from yaml files using the methods: - * `to_yaml` - * `from_yaml` + * ``to_yaml`` + * ``from_yaml`` Version 0.2.0 ************** diff --git a/docs/api/utils.rst b/docs/api/utils.rst index d019d137..1eab1a48 100644 --- a/docs/api/utils.rst +++ b/docs/api/utils.rst @@ -18,10 +18,10 @@ A fitting library should provide six basic objects: * minimizer * fitresult (optional) -A function for each object is defined in this module, all should return `True` to work +A function for each object is defined in this module, all should return ``True`` to work with hepstats. -The `zfit` API is currently the standard fitting API in hepstats. +The **zfit** API is currently the standard fitting API in hepstats. .. currentmodule:: hepstats.utils.fit.api_check diff --git a/docs/getting_started/hypotests.rst b/docs/getting_started/hypotests.rst index 6172b223..8a72191f 100644 --- a/docs/getting_started/hypotests.rst +++ b/docs/getting_started/hypotests.rst @@ -110,13 +110,13 @@ signal hypothesis, in hepstats this done using :py:class:`~hepstats.hypotests.pa A :py:class:`~hepstats.hypotests.parameters.POI` takes as input the parameter **Nsig** and a single value for a given hypothesis, for **poialt** it's 0 because this is the background only hypothesis. Similarly :py:class:`~hepstats.hypotests.parameters.POIarray` takes as input the parameter **Nsig** and an array of values to scan for **Nsig**, from 0 to 25. A range is needed -because the **calculator** instance will compute a `p-value` for each value in **poinull**, the upper limit for -a given confidence level :math:`\alpha` is defined as the value of **Nsig** for which the `p-value` is equal +because the **calculator** instance will compute a *p-value* for each value in **poinull**, the upper limit for +a given confidence level :math:`\alpha` is defined as the value of **Nsig** for which the *p-value* is equal to :math:`1 - \alpha`. We can now create an :py:class:`~hepstats.hypotests.core.upperlimit.UpperLimit` instance which takes as input the **calculator**, **poinull** and **poialt**. The :py:class:`~hepstats.hypotests.core.upperlimit.UpperLimit` -instance will ask the **calculator** to compute the `p-values` for each value in **poinull** and eventually find +instance will ask the **calculator** to compute the *p-values* for each value in **poinull** and eventually find the value of the upper limit on **Nsig** (if the upper limit is in the range of the **poinull** values). Below is an example on how to compute a CLs upper limit at 95 % confidence level. diff --git a/docs/getting_started/splot.rst b/docs/getting_started/splot.rst index 19d72b7c..4267922c 100644 --- a/docs/getting_started/splot.rst +++ b/docs/getting_started/splot.rst @@ -1,7 +1,7 @@ splot ##### -A full example using the **sPlot** algorithm can be found `here `_ . **sWeights** for different components in a data sample, modeled with a sum of extended probability density functions, are derived using the `compute_sweights` function: +A full example using the **sPlot** algorithm can be found `here `_ . **sWeights** for different components in a data sample, modeled with a sum of extended probability density functions, are derived using the ``compute_sweights`` function: .. code-block:: python diff --git a/notebooks/hypotests/FC_interval_asy.ipynb b/notebooks/hypotests/FC_interval_asy.ipynb index 22bf8897..0561495f 100644 --- a/notebooks/hypotests/FC_interval_asy.ipynb +++ b/notebooks/hypotests/FC_interval_asy.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Feldman and Cousins intervals with asymptotics.\n", "\n", @@ -12,7 +16,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import os\n", @@ -36,7 +44,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "In this example we consider an experiment where the observable $x$ is simply the measured value of $\\mu$ in an experiment with a Gaussian resolution with known width $\\sigma = 1$. We will compute the confidence belt for a 90 % condifdence level for the mean of the Gaussian $\\mu$.\n", "\n", @@ -46,7 +58,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "bounds = (-10, 10)\n", @@ -62,7 +78,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Below is defined the negative-likelihood function which is needed to compute Feldman and Cousins intervals as described in [arXiv:1109.0714](https://arxiv.org/abs/1109.0714). The negative-likelihood function is mimised to compute the measured mean $x$ and its uncertainty $\\sigma_x$. " ] @@ -70,7 +90,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the negative log likelihood\n", @@ -89,7 +113,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "\n", "\n", @@ -105,7 +133,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "results = {}" @@ -114,7 +146,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "for n in np.arange(-6, 7, 1.0):\n", @@ -178,7 +214,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "The plot of the confidence belt of $\\mu$ at 90 % CL as function of the measured mean values $x$ (in unit of $\\sigma_x$), for the bounded and unbounded case are shown below." ] @@ -186,7 +226,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "f = plt.figure(figsize=(9, 8))\n", @@ -207,7 +251,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "For the unbounded and the $\\mu > 0$ cases the plot reproduces the figure 3 and 10, respectively, of [A Unified Approach to the Classical Statistical Analysis of Small Signals, Gary J. Feldman, Robert D. Cousins](https://arxiv.org/pdf/physics/9711021.pdf)." ] diff --git a/notebooks/hypotests/FC_interval_freq.ipynb b/notebooks/hypotests/FC_interval_freq.ipynb index cd385fee..fc9ec395 100644 --- a/notebooks/hypotests/FC_interval_freq.ipynb +++ b/notebooks/hypotests/FC_interval_freq.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Feldman and Cousins intervals with toys." ] @@ -10,7 +14,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import os\n", @@ -34,7 +42,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "In this example we consider an experiment where the observable $x$ is simply the measured value of $\\mu$ in an experiment with a Gaussian resolution with known width $\\sigma = 1$. We will compute the confidence belt for a 90 % condifdence level for the mean of the Gaussian $\\mu$.\n", "\n", @@ -44,7 +56,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "bounds = (-10, 10)\n", @@ -60,7 +76,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Below is defined the negative-likelihood function which is needed to compute Feldman and Cousins intervals as described in [arXiv:1109.0714](https://arxiv.org/abs/1109.0714). The negative-likelihood function is mimised to compute the measured mean $x$ and its uncertainty $\\sigma_x$. " ] @@ -68,7 +88,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the negative log likelihood\n", @@ -87,7 +111,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "\n", "\n", @@ -103,7 +131,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "results = {}" @@ -112,7 +144,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "ntoys = 600\n", @@ -198,7 +234,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "The plot of the confidence belt of $\\mu$ at 90 % CL as function of the measured mean values $x$ (in unit of $\\sigma_x$), for the bounded and unbounded case are shown below." ] @@ -206,7 +246,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "f = plt.figure(figsize=(9, 8))\n", @@ -227,7 +271,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "For the unbounded and the $\\mu > 0$ cases the plot reproduces the figures 3 and 10, respectively, of [A Unified Approach to the Classical Statistical Analysis of Small Signals, Gary J. Feldman, Robert D. Cousins](https://arxiv.org/pdf/physics/9711021.pdf)." ] @@ -235,7 +283,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [] } diff --git a/notebooks/hypotests/Simultaneous_fit_discovery_splot.ipynb b/notebooks/hypotests/Simultaneous_fit_discovery_splot.ipynb index cd629a48..babdca45 100644 --- a/notebooks/hypotests/Simultaneous_fit_discovery_splot.ipynb +++ b/notebooks/hypotests/Simultaneous_fit_discovery_splot.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Combining measurements\n", "\n", @@ -11,7 +15,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Adding a constraint\n", "\n", @@ -28,9 +36,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -50,9 +60,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -69,9 +81,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -86,9 +100,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -101,9 +117,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -116,9 +134,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -146,9 +166,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -160,9 +182,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -177,9 +201,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -189,7 +215,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Simultaneous fits\n", "\n", @@ -212,9 +242,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -227,9 +259,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -246,7 +280,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Sharing and composing parameters\n", "\n", @@ -278,9 +316,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -303,9 +343,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -333,6 +375,9 @@ "metadata": { "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%% md\n" } }, "source": [ @@ -350,9 +395,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -366,9 +413,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -380,9 +429,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -394,9 +445,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -406,7 +459,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Plotting a simultaneous loss\n", "\n", @@ -417,9 +474,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -463,7 +522,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Discovery test\n", "\n" @@ -471,7 +534,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "We observed an excess of our signal:" ] @@ -480,9 +547,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -492,7 +561,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Now we would like to compute the significance of this observation or in other words the probabilty that this observation is the result of the statistical fluctuation. To do so we have to perform an hypothesis test where the null and alternative hypotheses are defined as:\n", "\n", @@ -506,9 +579,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -521,7 +596,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "What the `POI` class does is to take as input a `zfit.Parameter` instance and a value corresponding to a given hypothesis. You can notice that we didn't define here the alternative hypothesis as in the discovery test the value of POI for alternate is set to the best fit value.\n", "\n", @@ -559,9 +638,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -578,7 +659,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "There is another calculator in `hepstats` called `FrequentistCalculator` which constructs the test statistic distribution $f(q_{0} |H_{0})$ with pseudo-experiments (toys), but it takes more time.\n", "\n", @@ -593,9 +678,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -608,7 +695,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "So we get a significance of about $7\\sigma$ which is well above the $5 \\sigma$ threshold for discoveries 😃.\n", "\n", @@ -621,9 +712,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -643,9 +736,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -659,9 +754,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -673,9 +770,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -691,9 +790,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -706,9 +807,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -727,9 +830,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -741,7 +846,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "\n", "\n", @@ -758,9 +867,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -798,7 +909,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "In this particular example we want to unfold the signal lifetime distribution. To do so **sPlot** needs a discriminant variable to determine the yields of the various sources using an extended maximum likelihood fit." ] @@ -807,9 +922,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -830,9 +947,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -844,7 +963,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "**sPlot** will use the fitted yield for each sources to derive the so-called **sWeights** for each data point:\n", "\n", @@ -868,11 +991,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, - "scrolled": true + "pycharm": { + "name": "#%%\n" + } }, "outputs": [], "source": [ @@ -887,9 +1011,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -900,7 +1026,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Now we can apply the signal **sWeights** on the lifetime distribution and retrieve its signal components." ] @@ -909,9 +1039,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -937,7 +1069,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Be careful the **sPlot** technique works only on variables that are uncorrelated with the discriminant variable." ] @@ -946,9 +1082,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -958,7 +1096,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Let's apply to signal **sWeights** on the mass distribution to see how bad the results of **sPlot** is when applied on a variable that is correlated with the discrimant variable." ] @@ -967,9 +1109,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -981,9 +1125,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" } }, "outputs": [], diff --git a/notebooks/hypotests/confidenceinterval_asy_zfit.ipynb b/notebooks/hypotests/confidenceinterval_asy_zfit.ipynb index 04997856..bab5d0db 100644 --- a/notebooks/hypotests/confidenceinterval_asy_zfit.ipynb +++ b/notebooks/hypotests/confidenceinterval_asy_zfit.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Example of confidence interval computation" ] @@ -10,7 +14,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -28,7 +36,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "plt.rcParams['figure.figsize'] = (9,8)\n", @@ -37,7 +49,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Fit of a Gaussian signal over an exponential background:" ] @@ -45,7 +61,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "bounds = (0.1, 3.0)\n", @@ -66,7 +86,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "obs = zfit.Space('x', limits=bounds)" @@ -75,7 +99,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "mean = zfit.Parameter(\"mean\", 1.2, 0.5, 2.0)\n", @@ -88,7 +116,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)\n", @@ -99,7 +131,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the negative log likelihood\n", @@ -110,7 +146,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Instantiate a minuit minimizer\n", @@ -120,7 +160,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# minimisation of the loss function\n", @@ -132,7 +176,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "nbins = 80\n", @@ -144,7 +192,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Confidence interval\n", "\n", @@ -154,7 +206,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the calculator\n", @@ -165,7 +221,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# parameter of interest of the null hypothesis\n", @@ -175,7 +235,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the discovery test\n", @@ -185,7 +249,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "ci.interval();" @@ -194,7 +262,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "f = plt.figure(figsize=(9, 8))\n", diff --git a/notebooks/hypotests/confidenceinterval_freq_zfit.ipynb b/notebooks/hypotests/confidenceinterval_freq_zfit.ipynb index a8b3b5a3..18b3fb45 100644 --- a/notebooks/hypotests/confidenceinterval_freq_zfit.ipynb +++ b/notebooks/hypotests/confidenceinterval_freq_zfit.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Example of confidence interval computation" ] @@ -10,7 +14,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -27,7 +35,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Fit of a Gaussian signal over an exponential background:" ] @@ -35,7 +47,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "bounds = (0.1, 3.0)\n", @@ -56,7 +72,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "obs = zfit.Space('x', limits=bounds)" @@ -65,7 +85,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "mean = zfit.Parameter(\"mean\", 1.2, 0.5, 2.0)\n", @@ -78,7 +102,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)\n", @@ -89,7 +117,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the negative log likelihood\n", @@ -100,7 +132,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Instantiate a minuit minimizer\n", @@ -110,7 +146,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# minimisation of the loss function\n", @@ -121,7 +161,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "nbins = 80\n", @@ -133,7 +177,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Confidence interval\n", "\n", @@ -143,7 +191,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the calculator\n", @@ -155,7 +207,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# parameter of interest of the null hypothesis\n", @@ -165,7 +221,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the discovery test\n", @@ -175,7 +235,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "ci.interval();" @@ -184,7 +248,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "f = plt.figure(figsize=(9, 8))\n", @@ -195,7 +263,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "calculator.to_yaml(\"toys/ci_freq_zfit_toys.yml\")" @@ -204,14 +276,22 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [] } diff --git a/notebooks/hypotests/discovery_asy_zfit.ipynb b/notebooks/hypotests/discovery_asy_zfit.ipynb index 73cc274a..c4ed0517 100644 --- a/notebooks/hypotests/discovery_asy_zfit.ipynb +++ b/notebooks/hypotests/discovery_asy_zfit.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Discovery test example" ] @@ -10,7 +14,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -28,7 +36,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "plt.rcParams['figure.figsize'] = (8,6)\n", @@ -37,7 +49,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Fit of a Gaussian signal over an exponential background:" ] @@ -45,7 +61,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "bounds = (0.1, 3.0)\n", @@ -64,7 +84,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pltdist(data, bins=80, bounds=bounds)" @@ -73,7 +97,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "obs = zfit.Space('x', limits=bounds)" @@ -82,7 +110,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "lambda_ = zfit.Parameter(\"lambda\",-2.0, -4.0, -1.0)\n", @@ -93,7 +125,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)\n", @@ -104,7 +140,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the negative log likelihood\n", @@ -115,7 +155,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Instantiate a minuit minimizer\n", @@ -125,7 +169,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# minimisation of the loss function\n", @@ -137,7 +185,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "nbins = 80\n", @@ -149,7 +201,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Discovery test\n", "\n", @@ -159,7 +215,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the calculator\n", @@ -170,7 +230,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# parameter of interest of the null hypothesis\n", @@ -180,7 +244,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the discovery test\n", @@ -190,7 +258,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pnull, significance = discovery_test.result()" @@ -199,7 +271,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [] } diff --git a/notebooks/hypotests/discovery_freq_zfit.ipynb b/notebooks/hypotests/discovery_freq_zfit.ipynb index 59b87f0f..f680a024 100644 --- a/notebooks/hypotests/discovery_freq_zfit.ipynb +++ b/notebooks/hypotests/discovery_freq_zfit.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Discovery test example" ] @@ -10,7 +14,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -28,7 +36,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "plt.rcParams['figure.figsize'] = (8,6)\n", @@ -37,7 +49,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Fit of a Gaussian signal over an exponential background:" ] @@ -45,7 +61,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "bounds = (0.1, 3.0)\n", @@ -64,7 +84,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pltdist(data, bins=80, bounds=bounds)" @@ -73,7 +97,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "obs = zfit.Space('x', limits=bounds)" @@ -82,7 +110,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "lambda_ = zfit.Parameter(\"lambda\",-2.0, -4.0, -1.0)\n", @@ -93,7 +125,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)\n", @@ -104,7 +140,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the negative log likelihood\n", @@ -115,7 +155,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Instantiate a minuit minimizer\n", @@ -125,7 +169,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# minimisation of the loss function\n", @@ -137,7 +185,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "nbins = 80\n", @@ -149,7 +201,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Discovery test\n", "\n", @@ -159,7 +215,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the calculator\n", @@ -171,7 +231,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# parameter of interest of the null hypothesis\n", @@ -181,7 +245,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the discovery test\n", @@ -191,7 +259,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pnull, significance = discovery_test.result()" @@ -200,7 +272,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "plt.hist(calculator.qnull(poinull, None, onesided=True, onesideddiscovery=True)[poinull], bins=20, label=\"qnull distribution\", log=True)\n", @@ -212,7 +288,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "calculator.to_yaml(\"toys/discovery_freq_zfit_toys.yml\")" @@ -221,7 +301,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [] } diff --git a/notebooks/hypotests/toys/discovery_freq_zfit_toys.yml b/notebooks/hypotests/toys/discovery_freq_zfit_toys.yml index 19774c5b..49d8f02d 100644 Binary files a/notebooks/hypotests/toys/discovery_freq_zfit_toys.yml and b/notebooks/hypotests/toys/discovery_freq_zfit_toys.yml differ diff --git a/notebooks/hypotests/upperlimit_asy_zfit.ipynb b/notebooks/hypotests/upperlimit_asy_zfit.ipynb index 291932e0..8537a1c9 100644 --- a/notebooks/hypotests/upperlimit_asy_zfit.ipynb +++ b/notebooks/hypotests/upperlimit_asy_zfit.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Example of upper limit computation." ] @@ -10,7 +14,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -28,7 +36,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "plt.rcParams['figure.figsize'] = (9,8)\n", @@ -37,7 +49,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Fit of a Gaussian signal over an exponential background:" ] @@ -45,7 +61,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "bounds = (0.1, 3.0)\n", @@ -64,7 +84,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pltdist(data, bins=80, bounds=bounds)" @@ -73,7 +97,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "data.size" @@ -82,7 +110,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "obs = zfit.Space('x', limits=bounds)" @@ -91,7 +123,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "lambda_ = zfit.Parameter(\"lambda\",-2.0, -4.0, -1.0)\n", @@ -102,7 +138,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)\n", @@ -113,7 +153,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the negative log likelihood\n", @@ -124,7 +168,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Instantiate a minuit minimizer\n", @@ -134,7 +182,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# minimisation of the loss function\n", @@ -146,7 +198,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "nbins = 80\n", @@ -158,7 +214,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Upper limit:\n", "\n", @@ -168,7 +228,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the calculator\n", @@ -179,7 +243,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# parameter of interest of the null hypothesis\n", @@ -191,7 +259,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the discovery test\n", @@ -201,7 +273,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "ul.upperlimit(alpha=0.05, CLs=True);" @@ -210,7 +286,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "f = plt.figure(figsize=(9, 8))\n", @@ -222,7 +302,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "f = plt.figure(figsize=(9, 8))\n", @@ -232,7 +316,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [] } diff --git a/notebooks/hypotests/upperlimit_freq_zfit.ipynb b/notebooks/hypotests/upperlimit_freq_zfit.ipynb index 35df1910..9a8c80bd 100644 --- a/notebooks/hypotests/upperlimit_freq_zfit.ipynb +++ b/notebooks/hypotests/upperlimit_freq_zfit.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Example of upper limit computation." ] @@ -10,7 +14,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -28,7 +36,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "plt.rcParams['figure.figsize'] = (9,6)\n", @@ -37,7 +49,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Fit of a Gaussian signal over an exponential background:" ] @@ -45,7 +61,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "bounds = (0.1, 3.0)\n", @@ -64,7 +84,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pltdist(data, bins=80, bounds=bounds)" @@ -73,7 +97,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "obs = zfit.Space('x', limits=bounds)" @@ -82,7 +110,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "lambda_ = zfit.Parameter(\"lambda\",-2.0, -4.0, -1.0)\n", @@ -93,7 +125,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)\n", @@ -104,7 +140,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the negative log likelihood\n", @@ -115,7 +155,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Instantiate a minuit minimizer\n", @@ -125,7 +169,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# minimisation of the loss function\n", @@ -137,7 +185,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "nbins = 80\n", @@ -149,7 +201,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Upper limit:\n", "\n", @@ -159,7 +215,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the calculator\n", @@ -171,7 +231,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# parameter of interest of the null hypothesis\n", @@ -183,7 +247,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# instantation of the discovery test\n", @@ -193,7 +261,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "ul.upperlimit(alpha=0.05, CLs=True);" @@ -202,7 +274,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "f = plt.figure(figsize=(9, 8))\n", @@ -213,7 +289,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "f = plt.figure(figsize=(9, 8))\n", @@ -224,7 +304,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "calculator.to_yaml(\"toys/upperlimit_freq_zfit_toys.yml\")" @@ -232,7 +316,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Illustration of the computation of the pvalues:\n", "\n", @@ -242,7 +330,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "qnull = calculator.qnull(poinull, poialt, onesided=True)" @@ -251,7 +343,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "qobs = calculator.qobs(poinull, onesided=True)" @@ -260,7 +356,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "qalt = calculator.qalt(poinull, poialt, onesided=True)" @@ -269,7 +369,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "nbins = 30\n", diff --git a/setup.cfg b/setup.cfg index c24b3991..a70c80d0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,6 +38,7 @@ install_requires = pandas scipy tqdm + uhi python_requires = >=3.7 package_dir = = src diff --git a/src/hepstats/hypotests/calculators/asymptotic_calculator.py b/src/hepstats/hypotests/calculators/asymptotic_calculator.py index 0a4a304f..77c3938d 100644 --- a/src/hepstats/hypotests/calculators/asymptotic_calculator.py +++ b/src/hepstats/hypotests/calculators/asymptotic_calculator.py @@ -1,17 +1,20 @@ from __future__ import annotations -from typing import Union, Any +import math +from typing import Union, Any, Optional, List import numpy as np from scipy.stats import norm import warnings from .basecalculator import BaseCalculator -from ...utils import eval_pdf, array2dataset, pll, get_value +from ...utils import eval_pdf, array2dataset, pll, get_value, set_values from ..parameters import POI, POIarray +from ...utils.fit.api_check import is_valid_fitresult, is_valid_loss +from ...utils.fit.diverse import get_ndims def generate_asimov_hist( - model, params: dict[Any, dict[str, Any]], nbins: int = 100 + model, params: dict[Any, dict[str, Any]], nbins: int | None = None ) -> tuple[np.ndarray, np.ndarray]: """Generate the Asimov histogram using a model and dictionary of parameters. @@ -30,11 +33,12 @@ def generate_asimov_hist( >>> model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma) >>> hist, bin_edges = generate_asimov_hist(model, {"mean": 1.2, "sigma": 0.1}) """ - + if nbins is None: + nbins = 100 space = model.space bounds = space.limit1d bin_edges = np.linspace(*bounds, nbins + 1) - bin_centers = bin_edges[0:-1] + np.diff(bin_edges) / 2 + bin_centers = bin_edges[:-1] + np.diff(bin_edges) / 2 hist = eval_pdf(model, bin_centers, params, allow_extended=True) hist *= space.area() / nbins @@ -42,14 +46,75 @@ def generate_asimov_hist( return hist, bin_edges +def generate_asimov_dataset(data, model, is_binned, nbins, values): + """Generate the Asimov dataset using a model and dictionary of parameters. + + Args: + data: Data, the same class should be used for the generated dataset. + model: Model to use for the generation. Can be binned or unbinned. + is_binned: If the model is binned. + nbins: Number of bins for the asimov dataset. + values: Dictionary of parameters values. + + Returns: + Dataset with the asimov dataset. + """ + nsample = None + if not model.is_extended: + nsample = get_value(data.n_events) + if is_binned: + with set_values(list(values), [v["value"] for v in values.values()]): + dataset = model.to_binneddata() + if nsample is not None: + dataset = type(dataset).from_hist(dataset.to_hist() * nsample) + else: + if len(nbins) > 1: # meaning we have multiple dimensions + raise ValueError( + "Currently, only one dimension is supported for models that do not follow" + " the new binned loss convention. New losses can be registered with the" + " asymtpotic calculator." + ) + weights, bin_edges = generate_asimov_hist(model, values, nbins[0]) + bin_centers = bin_edges[:-1] + np.diff(bin_edges) / 2 + + if nsample is not None: # It's not extended + weights *= nsample + + dataset = array2dataset(type(data), data.space, bin_centers, weights) + return dataset + + class AsymptoticCalculator(BaseCalculator): """ Class for asymptotic calculators, using asymptotic formulae of the likelihood ratio described in :cite:`Cowan:2010js`. Can be used only with one parameter of interest. """ - def __init__(self, input, minimizer, asimov_bins: int = 100): - """Asymptotic calculator class. + UNBINNED_TO_BINNED_LOSS = {} + try: + from zfit.loss import ( + UnbinnedNLL, + BinnedNLL, + ExtendedUnbinnedNLL, + ExtendedBinnedNLL, + ) + except ImportError: + pass + else: + UNBINNED_TO_BINNED_LOSS[UnbinnedNLL] = BinnedNLL + UNBINNED_TO_BINNED_LOSS[ExtendedUnbinnedNLL] = ExtendedBinnedNLL + + def __init__( + self, + input, + minimizer, + asimov_bins: int | list[int] | None = None, + ): + """Asymptotic calculator class using Wilk's and Wald's asymptotic formulae. + + The asympotic formula is significantly faster than the Frequentist calculator, as it does not + require the calculation of the frequentist p-value, which involves the calculation of toys (sample-and-fit). + Args: input: loss or fit result. @@ -58,7 +123,7 @@ def __init__(self, input, minimizer, asimov_bins: int = 100): Example with **zfit**: >>> import zfit - >>> from zfit.core.loss import UnbinnedNLL + >>> from zfit.loss import UnbinnedNLL >>> from zfit.minimize import Minuit >>> >>> obs = zfit.Space('x', limits=(0.1, 2.0)) @@ -70,14 +135,114 @@ def __init__(self, input, minimizer, asimov_bins: int = 100): >>> >>> calc = AsymptoticCalculator(input=loss, minimizer=Minuit(), asimov_bins=100) """ + if is_valid_fitresult(input): + loss = input.loss + result = input + elif is_valid_loss(input): + loss = input + result = None + else: + raise ValueError("input must be a fitresult or a loss") + + asimov_bins_converted = self._check_convert_asimov_bins(asimov_bins, loss.data) super().__init__(input, minimizer) - self._asimov_bins = asimov_bins + self._asimov_bins = asimov_bins_converted self._asimov_dataset: dict = {} self._asimov_loss: dict = {} + self._binned_loss = None # cache of nll values computed with the asimov dataset self._asimov_nll: dict[POI, np.ndarray] = {} + def _convert_to_binned(self, loss, asimov_bins): + """Converts the loss to binned if necessary.""" + + for unbinned_loss, binned_loss in self.UNBINNED_TO_BINNED_LOSS.items(): + if type(loss) == unbinned_loss: + datasets = [] + models = [] + for d, m, nbins in zip(loss.data, loss.model, asimov_bins): + binnings = m.space.with_binning(nbins) + model_binned = m.to_binned(binnings) + data_binned = d.to_binned(binnings) + datasets.append(data_binned) + models.append(model_binned) + loss = binned_loss( + model=models, data=datasets, constraints=loss.constraints + ) + break + elif type(loss) == binned_loss: + break + else: + loss = False + + return loss + + def _get_binned_loss(self): + """Returns the binned loss.""" + binned_loss = self._binned_loss + if binned_loss is None: + binned_loss = self._convert_to_binned(self.loss, self._asimov_bins) + self._binned_loss = binned_loss + return binned_loss + + @staticmethod + def _check_convert_asimov_bins( + asimov_bins, datasets + ) -> list[list[int]]: # TODO: we want to allow axes from UHI + nsimultaneous = len(datasets) + ndims = [get_ndims(dataset) for dataset in datasets] + if asimov_bins is None: + asimov_bins = [[math.ceil(100 / ndim**0.5)] * ndim for ndim in ndims] + if isinstance(asimov_bins, int): + if nsimultaneous == 1: + asimov_bins = [[asimov_bins] * ndim for ndim in ndims] + else: + raise ValueError( + "asimov_bins is an int but there are multiple datasets. " + "Please provide a list of int for each dataset." + ) + elif isinstance(asimov_bins, list): + if len(asimov_bins) != nsimultaneous: + raise ValueError( + "asimov_bins is a list but the number of elements is different from the number of datasets." + ) + else: + raise TypeError( + f"asimov_bins must be an int or a list of int (or list of list of int), not {type(asimov_bins)}" + ) + + for i, (asimov_bin, ndim) in enumerate(zip(asimov_bins, ndims)): + if isinstance(asimov_bin, int): + if ndim == 1: + asimov_bins[i] = [asimov_bin] + else: + raise ValueError( + f"asimov_bins[{i}] is not a list but the dataset has {ndim} dimensions." + ) + elif isinstance(asimov_bin, list): + if len(asimov_bin) != ndim: + raise ValueError( + f"asimov_bins[{i}] is a list with {len(asimov_bin)} elements but the" + f" dataset has {ndim} dimensions." + ) + if not all(isinstance(x, int) for x in asimov_bin): + raise ValueError( + f"asimov_bins[{i}] is a list with non-int elements." + ) + else: + raise TypeError( + f"asimov_bins[{i}] is not an int or a list but a {type(asimov_bin)}." + ) + assert isinstance( + asimov_bins, list + ), "INTERNAL ERROR: Could not correctly convert asimov_bins" + assert all( + isinstance(asimov_bin, list) and len(asimov_bin) == ndim + for ndim, asimov_bin in zip(ndims, asimov_bins) + ), "INTERNAL ERROR: Could not correctly convert asimov_bins, dimensions wrong" + return asimov_bins + @staticmethod def check_pois(pois: POI | POIarray): """ @@ -97,12 +262,12 @@ def check_pois(pois: POI | POIarray): msg = "Tests using the asymptotic calculator can only be used with one parameter of interest." raise NotImplementedError(msg) - def asimov_dataset(self, poi: POI, ntrials_fit: int = 5): + def asimov_dataset(self, poi: POI, ntrials_fit: int | None = None): """Gets the Asimov dataset for a given alternative hypothesis. Args: poi: parameter of interest of the alternative hypothesis. - ntrials_fit: maximum number of fits to perform + ntrials_fit: (default: 5) maximum number of fits to perform Returns: The asymov dataset. @@ -112,9 +277,18 @@ def asimov_dataset(self, poi: POI, ntrials_fit: int = 5): >>> dataset = calc.asimov_dataset(poialt) """ - if poi not in self._asimov_dataset.keys(): - model = self.model - data = self.data + if ntrials_fit is None: + ntrials_fit = 5 + if poi not in self._asimov_dataset: + binned_loss = self._get_binned_loss() + if binned_loss is False: # LEGACY + model = self.model + data = self.data + loss = self.loss + else: + model = binned_loss.model + data = binned_loss.data + loss = binned_loss minimizer = self.minimizer oldverbose = minimizer.verbosity minimizer.verbosity = 0 @@ -135,7 +309,7 @@ def asimov_dataset(self, poi: POI, ntrials_fit: int = 5): else: with poiparam.set_value(poivalue): for trial in range(ntrials_fit): - minimum = minimizer.minimize(loss=self.loss) + minimum = minimizer.minimize(loss=loss) if minimum.valid: break else: @@ -159,24 +333,14 @@ def asimov_dataset(self, poi: POI, ntrials_fit: int = 5): minimizer.verbosity = oldverbose asimov_data = [] - - if not isinstance(self._asimov_bins, list): - asimov_bins = [self._asimov_bins] * len(data) - else: - asimov_bins = self._asimov_bins - assert len(asimov_bins) == len(data) - - for i, (m, nbins) in enumerate(zip(model, asimov_bins)): - - weights, bin_edges = generate_asimov_hist(m, values, nbins) - bin_centers = bin_edges[0:-1] + np.diff(bin_edges) / 2 - - if not model[i].is_extended: - weights *= get_value(data[i].n_events) - - asimov_data.append( - array2dataset(type(data[i]), data[i].space, bin_centers, weights) - ) + asimov_bins = self._asimov_bins + assert len(asimov_bins) == len(data) + is_binned_loss = isinstance( + loss, tuple(self.UNBINNED_TO_BINNED_LOSS.values()) + ) + for i, (m, d, nbins) in enumerate(zip(model, data, asimov_bins)): + dataset = generate_asimov_dataset(d, m, is_binned_loss, nbins, values) + asimov_data.append(dataset) self._asimov_dataset[poi] = asimov_data @@ -195,8 +359,15 @@ def asimov_loss(self, poi: POI): >>> poialt = POI(mean, 1.2) >>> loss = calc.asimov_loss(poialt) """ + oldloss = self._get_binned_loss() + if oldloss is False: # LEGACY + oldloss = self.loss + if oldloss is None: + raise ValueError("No loss function was provided.") if poi not in self._asimov_loss: - loss = self.lossbuilder(self.model, self.asimov_dataset(poi)) + loss = self.lossbuilder( + oldloss.model, self.asimov_dataset(poi), oldloss=oldloss + ) self._asimov_loss[poi] = loss return self._asimov_loss[poi] @@ -225,7 +396,7 @@ def asimov_nll(self, pois: POIarray, poialt: POI) -> np.ndarray: minimizer = self.minimizer ret = np.empty(pois.shape) for i, p in enumerate(pois): - if p not in self._asimov_nll.keys(): + if p not in self._asimov_nll: loss = self.asimov_loss(poialt) nll = pll(minimizer, loss, p) self._asimov_nll[p] = nll diff --git a/src/hepstats/hypotests/calculators/basecalculator.py b/src/hepstats/hypotests/calculators/basecalculator.py index 5c6ce0c6..a30f99ca 100644 --- a/src/hepstats/hypotests/calculators/basecalculator.py +++ b/src/hepstats/hypotests/calculators/basecalculator.py @@ -71,7 +71,7 @@ def qobs( self, poinull: POI, onesided: bool = True, - onesideddiscovery: bool = False, + onesideddiscovery: bool = True, qtilde: bool = False, ): """Computes observed values of the :math:`\\Delta` log-likelihood test statistic. diff --git a/src/hepstats/hypotests/core/discovery.py b/src/hepstats/hypotests/core/discovery.py index 71d9db1e..93be1e9f 100644 --- a/src/hepstats/hypotests/core/discovery.py +++ b/src/hepstats/hypotests/core/discovery.py @@ -53,8 +53,10 @@ def __init__(self, calculator: BaseCalculator, poinull: POI): super().__init__(calculator, poinull) def result(self, printlevel: int = 1) -> tuple[float, float]: - """ - Returns the result of the discovery hypothesis test. + """Return the result of the discovery hypothesis test. + + The result can be (0.0, inf), which means that the numerical precision is not high enough or that the + number of toys is not large enough. For example if all toys are rejected, the result is (0.0, inf). Args: printlevel: if > 0 print the result. diff --git a/src/hepstats/hypotests/hypotests_object.py b/src/hepstats/hypotests/hypotests_object.py index 5cc75ead..d733ee60 100644 --- a/src/hepstats/hypotests/hypotests_object.py +++ b/src/hepstats/hypotests/hypotests_object.py @@ -1,5 +1,7 @@ import warnings +import numpy as np + from ..utils.fit.api_check import is_valid_loss, is_valid_fitresult, is_valid_minimizer from ..utils.fit.api_check import is_valid_data, is_valid_pdf from ..utils.fit import get_nevents @@ -23,7 +25,7 @@ def __init__(self, input, minimizer): self._loss = input self._bestfit = None else: - raise ValueError(f"{input} is not a valid loss funtion or fit result!") + raise ValueError(f"{input} is not a valid loss function or fit result!") if not is_valid_minimizer(minimizer): raise ValueError(f"{minimizer} is not a valid minimizer !") @@ -31,9 +33,7 @@ def __init__(self, input, minimizer): self._minimizer = minimizer self.minimizer.verbosity = 0 - self._parameters = {} - for param in self.loss.get_params(): - self._parameters[param.name] = param + self._parameters = {param.name: param for param in self.loss.get_params()} @property def loss(self): @@ -58,10 +58,11 @@ def bestfit(self): return self._bestfit else: print("Get fit best values!") + old_verbosity = self.minimizer.verbosity self.minimizer.verbosity = 5 - mininum = self.minimizer.minimize(loss=self.loss) - self.minimizer.verbosity = 0 - self._bestfit = mininum + minimum = self.minimizer.minimize(loss=self.loss) + self.minimizer.verbosity = old_verbosity + self._bestfit = minimum return self._bestfit @bestfit.setter @@ -120,13 +121,14 @@ def set_params_to_bestfit(self): for param in self.parameters: param.set_value(self.bestfit.params[param]["value"]) - def lossbuilder(self, model, data, weights=None): + def lossbuilder(self, model, data, weights=None, oldloss=None): """Method to build a new loss function. Args: * **model** (List): The model or models to evaluate the data on * **data** (List): Data to use * **weights** (optional, List): the data weights + * **oldloss**: Previous loss that has data, models, type Example with `zfit`: >>> data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000)) @@ -140,6 +142,8 @@ def lossbuilder(self, model, data, weights=None): """ + if oldloss is None: + oldloss = self.loss assert all(is_valid_pdf(m) for m in model) assert all(is_valid_data(d) for d in data) @@ -155,8 +159,8 @@ def lossbuilder(self, model, data, weights=None): for d, w in zip(data, weights): d.set_weights(w) - if hasattr(self.loss, "create_new"): - loss = self.loss.create_new( + if hasattr(oldloss, "create_new"): + loss = oldloss.create_new( model=model, data=data, constraints=self.constraints ) else: @@ -165,7 +169,7 @@ def lossbuilder(self, model, data, weights=None): "upgrade to >= 0.6.4", FutureWarning, ) - loss = type(self.loss)(model=model, data=data) + loss = type(oldloss)(model=model, data=data) loss.add_constraints(self.constraints) return loss @@ -204,10 +208,13 @@ def sampler(self, floating_params=None): self.set_params_to_bestfit() nevents = [] for m, d in zip(self.loss.model, self.loss.data): + nevents_data = get_nevents(d) if m.is_extended: - nevents.append("extended") + nevents.append( + np.random.poisson(lam=nevents_data) + ) # TODO: handle constraint yields correctly? else: - nevents.append(get_nevents(d)) + nevents.append(nevents_data) return self._sampler(self.loss.model, nevents, floating_params) diff --git a/src/hepstats/hypotests/toyutils.py b/src/hepstats/hypotests/toyutils.py index 9b141bc8..51b38987 100644 --- a/src/hepstats/hypotests/toyutils.py +++ b/src/hepstats/hypotests/toyutils.py @@ -14,7 +14,6 @@ from ..utils import pll, base_sampler, base_sample from .hypotests_object import ToysObject - """ Module defining the classes to perform and store the results of toy experiments. @@ -361,6 +360,7 @@ def to_yaml(self, filename: str): tree["toys"] = self.toyresults_to_dict() af = asdf.AsdfFile(tree) af.write_to(filename) + af.close() def toysresults_from_yaml(self, filename: str) -> list[ToyResult]: """ @@ -370,30 +370,35 @@ def toysresults_from_yaml(self, filename: str) -> list[ToyResult]: filename: the yaml file name. """ ret = [] - try: - toys = asdf.open(filename).tree["toys"] - except KeyError: - raise FormatError(f"The key `toys` is not found in {filename}.") - - for t in toys: - poiparam = None - for p in self.loss.get_params(): - if t["poi"] == p.name: - poiparam = p - - if poiparam is None: - raise ParameterNotFound(f"Parameter with name {t['poi']} is not found.") + with asdf.open(filename) as asdf_file: + try: + toys = asdf_file.tree["toys"] + except KeyError as error: + raise FormatError( + f"The key `toys` is not found in {filename}, not a valid toy file." + ) from error + + for t in toys: + poiparam = None + for p in self.loss.get_params(): + if t["poi"] == p.name: + poiparam = p + + if poiparam is None: + raise ParameterNotFound( + f"Parameter with name {t['poi']} is not found." + ) - poigen = POI(poiparam, t["genvalue"]) - poieval = POIarray(poiparam, np.asarray(t["evalvalues"])) + poigen = POI(poiparam, t["genvalue"]) + poieval = POIarray(poiparam, np.asarray(t["evalvalues"])) - bestfit = t["bestfit"] - nll_bestfit = t["nlls"]["bestfit"] - nlls = {p: t["nlls"][p.value] for p in poieval} + bestfit = t["bestfit"] + nll_bestfit = t["nlls"]["bestfit"] + nlls = {p: t["nlls"][p.value] for p in poieval} - t = ToyResult(poigen, poieval) - t.add_entries(bestfit=bestfit, nll_bestfit=nll_bestfit, nlls=nlls) - ret.append(t) + t = ToyResult(poigen, poieval) + t.add_entries(bestfit=bestfit, nll_bestfit=nll_bestfit, nlls=nlls) + ret.append(t) return ret diff --git a/src/hepstats/splot/sweights.py b/src/hepstats/splot/sweights.py index 4a781bde..ac927f6e 100644 --- a/src/hepstats/splot/sweights.py +++ b/src/hepstats/splot/sweights.py @@ -22,7 +22,7 @@ def is_sum_of_extended_pdfs(model) -> bool: if not hasattr(model, "get_models"): return False - return all(m.is_extended for m in model.get_models()) + return all(m.is_extended for m in model.get_models()) and model.is_extended def compute_sweights(model, x: np.ndarray) -> dict[Any, np.ndarray]: diff --git a/src/hepstats/utils/__init__.py b/src/hepstats/utils/__init__.py index 2627ff03..9ae943fe 100644 --- a/src/hepstats/utils/__init__.py +++ b/src/hepstats/utils/__init__.py @@ -1 +1,9 @@ -from .fit import eval_pdf, array2dataset, pll, base_sampler, base_sample, get_value +from .fit import ( + eval_pdf, + array2dataset, + pll, + base_sampler, + base_sample, + get_value, + set_values, +) diff --git a/src/hepstats/utils/fit/__init__.py b/src/hepstats/utils/fit/__init__.py index 39912ca8..d67fc4aa 100644 --- a/src/hepstats/utils/fit/__init__.py +++ b/src/hepstats/utils/fit/__init__.py @@ -1,2 +1,2 @@ from .sampling import base_sampler, base_sample -from .diverse import get_value, eval_pdf, pll, array2dataset, get_nevents +from .diverse import get_value, eval_pdf, pll, array2dataset, get_nevents, set_values diff --git a/src/hepstats/utils/fit/api_check.py b/src/hepstats/utils/fit/api_check.py index 96c800a8..53509816 100644 --- a/src/hepstats/utils/fit/api_check.py +++ b/src/hepstats/utils/fit/api_check.py @@ -16,6 +16,9 @@ The `zfit` API is currently the standard fitting API in hepstats. """ +import warnings + +import uhi.typing.plottable def is_valid_parameter(object): @@ -54,7 +57,10 @@ def is_valid_data(object): has_weights = hasattr(object, "weights") has_set_weights = hasattr(object, "set_weights") has_space = hasattr(object, "space") - return has_nevents and has_weights and has_set_weights and has_space + is_histlike = isinstance(object, uhi.typing.plottable.PlottableHistogram) + return ( + has_nevents and has_weights and has_set_weights and has_space + ) or is_histlike def is_valid_pdf(object): diff --git a/src/hepstats/utils/fit/diverse.py b/src/hepstats/utils/fit/diverse.py index 22a4c926..7dc5ffc2 100644 --- a/src/hepstats/utils/fit/diverse.py +++ b/src/hepstats/utils/fit/diverse.py @@ -1,14 +1,24 @@ -from contextlib import ExitStack +from contextlib import ExitStack, contextmanager import numpy as np +from .api_check import is_valid_pdf + + +def get_ndims(dataset): + """Return the number of dimensions in the dataset""" + return len(dataset.obs) + def get_value(value): return np.array(value) -def eval_pdf(model, x, params={}, allow_extended=False): +def eval_pdf(model, x, params=None, allow_extended=False): """Compute pdf of model at a given point x and for given parameters values""" + if params is None: + params = {} + def pdf(model, x): if model.is_extended and allow_extended: ret = model.ext_pdf(x) @@ -46,6 +56,16 @@ def pll(minimizer, loss, pois) -> float: return value +@contextmanager +def set_values(params, values): + old_values = [p.value() for p in params] + for p, v in zip(params, values): + p.set_value(v) + yield + for p, v in zip(params, old_values): + p.set_value(v) + + def array2dataset(dataset_cls, obs, array, weights=None): """ dataset_cls: only used to get the class in which array/weights will be @@ -53,9 +73,9 @@ def array2dataset(dataset_cls, obs, array, weights=None): """ if hasattr(dataset_cls, "from_numpy"): - return dataset_cls.from_numpy(obs=obs, array=array, weights=weights) + return dataset_cls.from_numpy(obs, array=array, weights=weights) else: - return dataset_cls(obs=obs, array=array, weights=weights) + return dataset_cls(obs, array=array, weights=weights) def get_nevents(dataset): diff --git a/tests/conftest.py b/tests/conftest.py index eb0f1809..06815328 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -49,3 +49,47 @@ def setup_teardown(): zfit.run.chunking.max_n_points = old_chunksize zfit.run.set_graph_mode() zfit.run.set_autograd_mode() + + +def create_loss_func(npeak, nbins=None): + import zfit + + bounds = (0.1, 3.0) + obs = zfit.Space("x", limits=bounds) + + # Data and signal + np.random.seed(0) + tau = -2.0 + beta = -1 / tau + bkg = np.random.exponential(beta, 300) + peak = np.random.normal(1.2, 0.1, npeak) + data = np.concatenate((bkg, peak)) + data = data[(data > bounds[0]) & (data < bounds[1])] + N = len(data) + data = zfit.data.Data.from_numpy(obs=obs, array=data) + + mean = zfit.Parameter("mean", 1.2, 0.5, 2.0) + sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.2) + lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) + Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N) + Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 1.1) + + signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig) + background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) + + tot_model = zfit.pdf.SumPDF([signal, background]) + + if nbins is not None: + binned_space = obs.with_binning(nbins) + data = data.to_binned(binned_space) + tot_model = tot_model.to_binned(binned_space) + loss = zfit.loss.ExtendedBinnedNLL(tot_model, data) + else: + loss = zfit.loss.ExtendedUnbinnedNLL(model=tot_model, data=data) + + return loss, (Nsig, Nbkg, mean, sigma) + + +@pytest.fixture() +def create_loss(): + return create_loss_func diff --git a/tests/hypotests/test_calculators.py b/tests/hypotests/test_calculators.py index 858be2a1..1560bc94 100644 --- a/tests/hypotests/test_calculators.py +++ b/tests/hypotests/test_calculators.py @@ -1,4 +1,6 @@ #!/usr/bin/python +import copy + import pytest import numpy as np @@ -11,19 +13,35 @@ from hepstats.hypotests.parameters import POI, POIarray from hepstats.utils.fit.api_check import is_valid_loss, is_valid_data - true_mu = 1.2 true_sigma = 0.1 -def create_loss(constraint=False): +def create_loss(constraint=False, nbins=None, make2d=False): + if not isinstance(nbins, list): + nbins = [nbins] * 2 if make2d else [nbins] + obs1 = zfit.Space("x", limits=(0.1, 2.0), binning=nbins[0]) + obs = obs1 + if make2d: + obs2 = zfit.Space("y", limits=(-0.1, 3.0), binning=nbins[1]) + obs = obs1 * obs2 - obs = zfit.Space("x", limits=(0.1, 2.0)) - data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000)) + array1 = np.random.normal(1.2, 0.1, (10000, 2 if make2d else 1)) + data = zfit.data.Data.from_numpy(obs=obs.with_binning(None), array=array1) + if nbins[0] is not None: + data = data.to_binned(obs) mean = zfit.Parameter("mu", true_mu) sigma = zfit.Parameter("sigma", true_sigma) - model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma) - loss = UnbinnedNLL(model=model, data=data) + model = zfit.pdf.Gauss(obs=obs1.with_binning(None), mu=mean, sigma=sigma) + if make2d: + model2 = zfit.pdf.Gauss(obs=obs2.with_binning(None), mu=mean, sigma=sigma) + model = model * model2 + if nbins[0] is not None: + model = zfit.pdf.BinnedFromUnbinnedPDF(model, space=obs) + if nbins[0] is None: + loss = UnbinnedNLL(model=model, data=data) + else: + loss = zfit.loss.BinnedNLL(model=model, data=data) if constraint: loss.add_constraints( @@ -36,13 +54,31 @@ def create_loss(constraint=False): @pytest.mark.parametrize( - "calculator", [BaseCalculator, AsymptoticCalculator, FrequentistCalculator] + "calculator", + [BaseCalculator, AsymptoticCalculator, FrequentistCalculator, "AsymptoticOld"], ) -def test_base_calculator(calculator): +@pytest.mark.parametrize("make2d", [False, True], ids=["1d", "2d"]) +@pytest.mark.parametrize( + "nbins", + [None, [10, 12], [5, 50]], + ids=lambda x: f"Binning {x}" if x is not None else "Unbinned", +) +def test_base_calculator(calculator, make2d, nbins): + if calculator == "AsymptoticOld": + if make2d: + pytest.skip("AsymptoticOld does not support 2D") + if nbins is not None: + pytest.skip("AsymptoticOld does not support binned") + + class calculator(AsymptoticCalculator): + UNBINNED_TO_BINNED_LOSS = {} + + assert calculator is not AsymptoticCalculator, "Must not be the same" + assert AsymptoticCalculator.UNBINNED_TO_BINNED_LOSS, "Has to be filled" with pytest.raises(TypeError): calculator() - loss, (mean, sigma) = create_loss() + loss, (mean, sigma) = create_loss(make2d=make2d, nbins=nbins) with pytest.raises(ValueError): calculator("loss", Minuit()) diff --git a/tests/hypotests/test_confidence_intervals.py b/tests/hypotests/test_confidence_intervals.py index a6b06e70..5299db58 100644 --- a/tests/hypotests/test_confidence_intervals.py +++ b/tests/hypotests/test_confidence_intervals.py @@ -2,7 +2,7 @@ import numpy as np import zfit import os -from zfit.loss import ExtendedUnbinnedNLL, UnbinnedNLL +from zfit.loss import ExtendedUnbinnedNLL, UnbinnedNLL, ExtendedBinnedNLL, BinnedNLL from zfit.minimize import Minuit import hepstats @@ -15,42 +15,11 @@ notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests" -def create_loss(): - - bounds = (0.1, 3.0) - obs = zfit.Space("x", limits=bounds) - - # Data and signal - np.random.seed(0) - tau = -2.0 - beta = -1 / tau - bkg = np.random.exponential(beta, 300) - peak = np.random.normal(1.2, 0.1, 80) - data = np.concatenate((bkg, peak)) - data = data[(data > bounds[0]) & (data < bounds[1])] - N = len(data) - data = zfit.data.Data.from_numpy(obs=obs, array=data) - - mean = zfit.Parameter("mean", 1.2, 0.5, 2.0) - sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.2) - lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) - Nsig = zfit.Parameter("Ns", 20.0, -20.0, N) - Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 1.1) - - signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig) - background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) - tot_model = zfit.pdf.SumPDF([signal, background]) - - loss = ExtendedUnbinnedNLL(model=tot_model, data=data) - - return loss, mean - - -def test_constructor(): +def test_constructor(create_loss): with pytest.raises(TypeError): ConfidenceInterval() - loss, mean = create_loss() + loss, (_, __, mean, _) = create_loss(npeak=80) calculator = BaseCalculator(loss, Minuit()) poi_1 = POI(mean, 1.5) @@ -66,45 +35,55 @@ def test_constructor(): ConfidenceInterval(calculator, [poi_1], [poi_2], qtilde=False) -def asy_calc(): - loss, mean = create_loss() +def asy_calc(create_loss, nbins=None): + loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins) return mean, AsymptoticCalculator(loss, Minuit()) -def freq_calc(): - loss, mean = create_loss() +def asy_calc_old(create_loss, nbins=None): + loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins) + + class calculator(AsymptoticCalculator): + UNBINNED_TO_BINNED_LOSS = {} + + assert calculator is not AsymptoticCalculator, "Must not be the same" + assert AsymptoticCalculator.UNBINNED_TO_BINNED_LOSS, "Has to be filled" + return mean, calculator(loss, Minuit()) + + +def freq_calc(create_loss, nbins=None): + loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins) calculator = FrequentistCalculator.from_yaml( f"{notebooks_dir}/toys/ci_freq_zfit_toys.yml", loss, Minuit() ) return mean, calculator -@pytest.mark.parametrize("calculator", [asy_calc, freq_calc]) -def test_with_gauss_exp_example(calculator): - - mean, calculator = calculator() +@pytest.mark.parametrize("calculator", [asy_calc, freq_calc, asy_calc_old]) +@pytest.mark.parametrize("nbins", [None, 47, 300], ids=lambda x: f"nbins={x}") +def test_with_gauss_exp_example(create_loss, calculator, nbins): + if calculator is asy_calc_old and nbins is not None: + pytest.skip("Not implemented for old calculator") + mean, calculator = calculator(create_loss, nbins=nbins) scan_values = np.linspace(1.15, 1.26, 50) poinull = POIarray(mean, scan_values) ci = ConfidenceInterval(calculator, poinull) interval = ci.interval() - assert interval["lower"] == pytest.approx(1.1810371356602791, rel=0.1) assert interval["upper"] == pytest.approx(1.2156701172321935, rel=0.1) - with pytest.raises(POIRangeError): poinull = POIarray( mean, scan_values[(scan_values >= 1.2) & (scan_values <= 1.205)] ) + ci = ConfidenceInterval(calculator, poinull) ci.interval() - with pytest.raises(POIRangeError): - poinull = POIarray(mean, scan_values[(scan_values >= 1.2)]) + poinull = POIarray(mean, scan_values[scan_values >= 1.2]) ci = ConfidenceInterval(calculator, poinull) ci.interval() - with pytest.raises(POIRangeError): - poinull = POIarray(mean, scan_values[(scan_values <= 1.205)]) + poinull = POIarray(mean, scan_values[scan_values <= 1.205]) ci = ConfidenceInterval(calculator, poinull) ci.interval() diff --git a/tests/hypotests/test_discovery.py b/tests/hypotests/test_discovery.py index 3ab53929..26d11319 100644 --- a/tests/hypotests/test_discovery.py +++ b/tests/hypotests/test_discovery.py @@ -1,6 +1,7 @@ import os import pytest import numpy as np +import tqdm import zfit from zfit.loss import ExtendedUnbinnedNLL, UnbinnedNLL from zfit.minimize import Minuit @@ -16,43 +17,15 @@ from hepstats.hypotests import Discovery from hepstats.hypotests.parameters import POI -notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests" +notebooks_dir = f"{os.path.dirname(hepstats.__file__)}/../../notebooks/hypotests" -def create_loss(): - - bounds = (0.1, 3.0) - obs = zfit.Space("x", limits=bounds) - - # Data and signal - np.random.seed(0) - tau = -2.0 - beta = -1 / tau - bkg = np.random.exponential(beta, 300) - peak = np.random.normal(1.2, 0.1, 25) - data = np.concatenate((bkg, peak)) - data = data[(data > bounds[0]) & (data < bounds[1])] - N = len(data) - data = zfit.data.Data.from_numpy(obs=obs, array=data) - - lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) - Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N) - Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 1.1) - - signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig) - background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) - tot_model = zfit.pdf.SumPDF([signal, background]) - - loss = ExtendedUnbinnedNLL(model=tot_model, data=data) - - return loss, (Nsig, Nbkg) - - -def test_constructor(): +@pytest.mark.parametrize("nbins", [None, 30], ids=["unbinned", "binned"]) +def test_constructor(create_loss, nbins): with pytest.raises(TypeError): Discovery() - loss, (Nsig, Nbkg) = create_loss() + loss, (Nsig, Nbkg, _, _) = create_loss(nbins=nbins, npeak=25) calculator = BaseCalculator(loss, Minuit()) poi_1 = POI(Nsig, 0.0) @@ -68,35 +41,63 @@ def test_constructor(): Discovery(calculator, [poi_1], [poi_2]) -def test_with_asymptotic_calculator(): +class AsymptoticCalculatorOld(AsymptoticCalculator): + UNBINNED_TO_BINNED_LOSS = {} - loss, (Nsig, Nbkg) = create_loss() - calculator = AsymptoticCalculator(loss, Minuit()) + +@pytest.mark.parametrize( + "nbins", [None, 76, 253], ids=lambda x: "unbinned" if x is None else f"nbin={x}" +) +@pytest.mark.parametrize("Calculator", [AsymptoticCalculator, AsymptoticCalculatorOld]) +def test_with_asymptotic_calculator(create_loss, nbins, Calculator): + if Calculator is AsymptoticCalculatorOld and nbins is not None: + pytest.skip("Old AsymptoticCalculator does not support binned loss") + + loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=25, nbins=nbins) + mean.floating = False + sigma.floating = False + calculator = Calculator(loss, Minuit()) poinull = POI(Nsig, 0) discovery_test = Discovery(calculator, poinull) pnull, significance = discovery_test.result() - assert pnull == pytest.approx(0.0007571045089567185, abs=0.05) - assert significance == pytest.approx(3.1719464953752565, abs=0.05) - assert significance >= 3 + uncertainty = 0.05 + if nbins is not None and nbins < 80: + uncertainty *= 4 + # check absolute significance + assert pnull == pytest.approx(0.000757, abs=uncertainty) + assert significance == pytest.approx(3.17, abs=uncertainty) + assert significance >= 3 -def test_with_frequentist_calculator(): - loss, (Nsig, Nbkg) = create_loss() +@pytest.mark.parametrize( + "nbins", [None, 95, 153], ids=lambda x: "unbinned" if x is None else f"nbin={x}" +) +def test_with_frequentist_calculator(create_loss, nbins): + loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=25, nbins=nbins) + mean.floating = False + sigma.floating = False calculator = FrequentistCalculator.from_yaml( f"{notebooks_dir}/toys/discovery_freq_zfit_toys.yml", loss, Minuit() ) + # calculator = FrequentistCalculator(loss, Minuit(), ntoysnull=500, ntoysalt=500) poinull = POI(Nsig, 0) discovery_test = Discovery(calculator, poinull) pnull, significance = discovery_test.result() - assert pnull == pytest.approx(0.0004, rel=0.05, abs=0.0005) - assert significance == pytest.approx(3.3527947805048592, rel=0.05, abs=0.1) + abserr = 0.1 + if nbins is not None and nbins < 120: + abserr *= 4 + abserr_pnull = 0.0005 + if nbins is not None and nbins < 120: + abserr_pnull *= 4 + assert pnull == pytest.approx(0.0004, rel=0.05, abs=abserr_pnull) + assert significance == pytest.approx(3.3427947805048592, rel=0.05, abs=abserr) assert significance >= 3 @@ -126,7 +127,6 @@ def __init__( def create_loss_counting(): - n = 370 nbkg = 340 @@ -145,7 +145,6 @@ def create_loss_counting(): def test_counting_with_asymptotic_calculator(): - ( loss, Nsig, @@ -161,7 +160,6 @@ def test_counting_with_asymptotic_calculator(): def test_counting_with_frequentist_calculator(): - ( loss, Nsig, diff --git a/tests/hypotests/test_upperlimit.py b/tests/hypotests/test_upperlimit.py index a2f6f367..13d33853 100644 --- a/tests/hypotests/test_upperlimit.py +++ b/tests/hypotests/test_upperlimit.py @@ -15,40 +15,40 @@ notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests" -def create_loss(): - - bounds = (0.1, 3.0) - obs = zfit.Space("x", limits=bounds) - - # Data and signal - np.random.seed(0) - tau = -2.0 - beta = -1 / tau - bkg = np.random.exponential(beta, 300) - peak = np.random.normal(1.2, 0.1, 10) - data = np.concatenate((bkg, peak)) - data = data[(data > bounds[0]) & (data < bounds[1])] - N = len(data) - data = zfit.data.Data.from_numpy(obs=obs, array=data) - - lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) - Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N) - Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 1.1) - - signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig) - background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) - tot_model = zfit.pdf.SumPDF([signal, background]) - - loss = ExtendedUnbinnedNLL(model=tot_model, data=data) - - return loss, (Nsig, Nbkg) - - -def test_constructor(): +# def create_loss(): +# +# bounds = (0.1, 3.0) +# obs = zfit.Space("x", limits=bounds) +# +# # Data and signal +# np.random.seed(0) +# tau = -2.0 +# beta = -1 / tau +# bkg = np.random.exponential(beta, 300) +# peak = np.random.normal(1.2, 0.1, 10) +# data = np.concatenate((bkg, peak)) +# data = data[(data > bounds[0]) & (data < bounds[1])] +# N = len(data) +# data = zfit.data.Data.from_numpy(obs=obs, array=data) +# +# lambda_ = zfit.Parameter("lambda", -2.0, -10.0, -0.1) +# Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N) +# Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 2) +# +# signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig) +# background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) +# tot_model = zfit.pdf.SumPDF([signal, background]) +# +# loss = ExtendedUnbinnedNLL(model=tot_model, data=data) +# +# return loss, (Nsig, Nbkg) + + +def test_constructor(create_loss): with pytest.raises(TypeError): UpperLimit() - loss, (Nsig, Nbkg) = create_loss() + loss, (Nsig, Nbkg, _, _) = create_loss(npeak=10) calculator = BaseCalculator(loss, Minuit()) poi_1 = POI(Nsig, 0.0) @@ -64,23 +64,43 @@ def test_constructor(): UpperLimit(calculator, [poi_1], poi_2) -def asy_calc(): - loss, (Nsig, Nbkg) = create_loss() +class AsymptoticCalculatorOld(AsymptoticCalculator): + UNBINNED_TO_BINNED_LOSS = {} + + +def asy_calc(create_loss, nbins): + loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins) + mean.floating = False + sigma.floating = False return Nsig, AsymptoticCalculator(loss, Minuit()) -def freq_calc(): - loss, (Nsig, Nbkg) = create_loss() +def asy_calc_old(create_loss, nbins): + loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins) + mean.floating = False + sigma.floating = False + return Nsig, AsymptoticCalculatorOld(loss, Minuit()) + + +def freq_calc(create_loss, nbins): + loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins) + mean.floating = False + sigma.floating = False calculator = FrequentistCalculator.from_yaml( f"{notebooks_dir}/toys/upperlimit_freq_zfit_toys.yml", loss, Minuit() ) + # calculator = FrequentistCalculator(loss, Minuit(), ntoysnull=10000, ntoysalt=10000) return Nsig, calculator -@pytest.mark.parametrize("calculator", [asy_calc, freq_calc]) -def test_with_gauss_exp_example(calculator): - - Nsig, calculator = calculator() +@pytest.mark.parametrize( + "nbins", [None, 73, 211], ids=lambda x: "unbinned" if x is None else f"nbins={x}" +) +@pytest.mark.parametrize("calculator", [asy_calc, freq_calc, asy_calc_old]) +def test_with_gauss_exp_example(create_loss, calculator, nbins): + if calculator is asy_calc_old and nbins is not None: + pytest.skip("Old asymptotic calculator does not support binned loss") + Nsig, calculator = calculator(create_loss, nbins) poinull = POIarray(Nsig, np.linspace(0.0, 25, 15)) poialt = POI(Nsig, 0) @@ -89,8 +109,8 @@ def test_with_gauss_exp_example(calculator): ul_qtilde = UpperLimit(calculator, poinull, poialt, qtilde=True) limits = ul.upperlimit(alpha=0.05, CLs=True) - assert limits["observed"] == pytest.approx(15.725784747406346, rel=0.05) - assert limits["expected"] == pytest.approx(11.464238503550177, rel=0.05) + assert limits["observed"] == pytest.approx(16.7, rel=0.15) + assert limits["expected"] == pytest.approx(11.5, rel=0.15) assert limits["expected_p1"] == pytest.approx(16.729552184042365, rel=0.1) assert limits["expected_p2"] == pytest.approx(23.718823517614066, rel=0.15) assert limits["expected_m1"] == pytest.approx(7.977175378979202, rel=0.1)