diff --git a/.copier-answers.yml b/.copier-answers.yml index 11c3dd20..a8e3bfd4 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -1,12 +1,13 @@ # Changes here will be overwritten by Copier -_commit: v1.4.1 +_commit: v1.4.2 _src_path: gh:lincc-frameworks/python-project-template author_email: lincc-frameworks-team@lists.lsst.org author_name: LINCC Frameworks create_example_module: false custom_install: true +include_benchmarks: true include_docs: true -include_notebooks: false +include_notebooks: true mypy_type_checking: basic package_name: lsdb preferred_linter: pylint diff --git a/.github/workflows/asv-main.yml b/.github/workflows/asv-main.yml new file mode 100644 index 00000000..0cc09df0 --- /dev/null +++ b/.github/workflows/asv-main.yml @@ -0,0 +1,107 @@ +# This workflow will run benchmarks with airspeed velocity (asv), +# store the new results in the "benchmarks" branch and publish them +# to a dashboard on GH Pages. + +name: Run ASV benchmarks for main + +on: + push: + branches: [ main ] + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + +jobs: + + consecutiveness: + runs-on: ubuntu-latest + + steps: + - name: Set workflows on main to run consecutively + uses: mktcode/consecutive-workflow-action@eb43c6b5852dd0e33efa797a1817196d06daa4b2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + setup-python: + runs-on: ubuntu-latest + needs: consecutiveness + + steps: + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + asv-main: + runs-on: ubuntu-latest + needs: setup-python + + permissions: + contents: write + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout main branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.5.1 virtualenv tabulate + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Fetch previous results from the "benchmarks" branch + run: | + if git ls-remote --exit-code origin benchmarks > /dev/null 2>&1; then + git merge origin/benchmarks \ + --allow-unrelated-histories \ + --no-commit + mv ../_results . + fi + + - name: Run ASV for the main branch + run: asv run ALL --skip-existing + + - name: Submit new results to the "benchmarks" branch + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: benchmarks + folder: ${{ env.WORKING_DIR }}/_results + target-folder: _results + + - name: Generate dashboard HTML + run: | + asv show + asv publish + + - name: Deploy to Github pages + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: gh-pages + folder: ${{ env.WORKING_DIR }}/_html \ No newline at end of file diff --git a/.github/workflows/asv-nightly.yml b/.github/workflows/asv-nightly.yml new file mode 100644 index 00000000..ea42823f --- /dev/null +++ b/.github/workflows/asv-nightly.yml @@ -0,0 +1,80 @@ +# This workflow will run daily at 06:45. +# It will run benchmarks with airspeed velocity (asv) +# and compare performance with the previous nightly build. + +name: Run benchmarks nightly job + +on: + schedule: + - cron: 45 6 * * * + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + NIGHTLY_HASH_FILE: nightly-hash + +jobs: + + asv-nightly: + runs-on: ubuntu-latest + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout main branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.5.1 virtualenv + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Get nightly dates under comparison + id: nightly-dates + run: | + echo "yesterday=$(date -d yesterday +'%Y-%m-%d')" >> $GITHUB_OUTPUT + echo "today=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + + - name: Use last nightly commit hash from cache + uses: actions/cache@v3 + with: + path: ${{ env.WORKING_DIR }} + key: nightly-results-${{ steps.nightly-dates.outputs.yesterday }} + + - name: Run comparison of main against last nightly build + run: | + HASH_FILE=${{ env.NIGHTLY_HASH_FILE }} + CURRENT_HASH=${{ github.sha }} + + if [ -f $HASH_FILE ]; then + PREV_HASH=$(cat $HASH_FILE) + asv continuous $PREV_HASH $CURRENT_HASH || true + asv compare $PREV_HASH $CURRENT_HASH --sort ratio + fi + + echo $CURRENT_HASH > $HASH_FILE + + - name: Update last nightly hash in cache + uses: actions/cache@v3 + with: + path: ${{ env.WORKING_DIR }} + key: nightly-results-${{ steps.nightly-dates.outputs.today }} \ No newline at end of file diff --git a/.github/workflows/asv-pr.yml b/.github/workflows/asv-pr.yml new file mode 100644 index 00000000..22c34b26 --- /dev/null +++ b/.github/workflows/asv-pr.yml @@ -0,0 +1,103 @@ +# This workflow will run benchmarks with airspeed velocity (asv) for pull requests. +# It will compare the performance of the main branch with the performance of the merge +# with the new changes and publish a comment with this assessment. + +name: Run ASV benchmarks for PR + +on: + pull_request: + branches: [ main ] + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + +jobs: + + setup-python: + runs-on: ubuntu-latest + + steps: + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + asv-pr: + runs-on: ubuntu-latest + needs: setup-python + + permissions: + actions: read + pull-requests: write + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout PR branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.5.1 virtualenv tabulate lf-asv-formatter + + - name: Get current job logs URL + uses: Tiryoh/gha-jobid-action@v0 + id: jobs + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + job_name: ${{ github.job }} + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Run comparison of PR against main branch + run: | + git remote add upstream https://github.com/${{ github.repository }}.git + git fetch upstream + asv continuous upstream/main HEAD || true + asv compare upstream/main HEAD --sort ratio | tee output + python -m lf_asv_formatter + printf "\n\nClick [here]($STEP_URL) to view all benchmarks." >> output + env: + STEP_URL: "${{ steps.jobs.outputs.html_url }}#step:8:1" + + - name: Publish comment to PR + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const path = require('path'); + + const workingDir = process.env.WORKING_DIR; + try { + process.chdir(workingDir); + const comment = fs.readFileSync('output', 'utf-8'); + const { data } = await github.rest.issues.createComment({ + ...context.repo, + issue_number: context.issue.number, + body: comment, + }); + console.log('Comment published:', data.html_url); + } catch (err) { + console.error(err); + } \ No newline at end of file diff --git a/.github/workflows/build-documentation.yml b/.github/workflows/build-documentation.yml index 36f0b6cd..e329f578 100644 --- a/.github/workflows/build-documentation.yml +++ b/.github/workflows/build-documentation.yml @@ -26,6 +26,9 @@ jobs: python -m pip install --upgrade pip if [ -f docs/requirements.txt ]; then pip install -r docs/requirements.txt; fi pip install . + - name: Install notebook requirements + run: | + sudo apt-get install pandoc - name: Build docs run: | sphinx-build -T -E -b html -d docs/build/doctrees ./docs docs/build/html diff --git a/.gitignore b/.gitignore index 523b316c..4bb48883 100644 --- a/.gitignore +++ b/.gitignore @@ -138,3 +138,13 @@ dask-worker-space/ # tmp directory tmp/ + +# benchmarking +_results/ +_html/ + +# Mac OS +.DS_Store + +# IntelliJ +.idea diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 59020468..71d544db 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -116,7 +116,10 @@ repos: ] - # Make sure Sphinx can build the documentation without issues. + # Make sure Sphinx can build the documentation while explicitly omitting + # notebooks from the docs, so users don't have to wait through the execution + # of each notebook or each commit. By default, these will be checked in the + # GitHub workflows. - repo: local hooks: - id: sphinx-build @@ -127,12 +130,15 @@ repos: exclude_types: [file, symlink] args: [ + "-M", # Run sphinx in make mode, so we can use -D flag later + # Note: -M requires next 3 args to be builder, source, output + "html", # Specify builder + "./docs", # Source directory of documents + "./_readthedocs", # Output directory for rendered documents "-T", # Show full trace back on exception - "-E", # Don't use saved env. always read all files. - "-b", # Flag to select which builder to use - "html", # Use the HTML builder + "-E", # Don't use saved env; always read all files "-d", # Flag for cached environment and doctrees - "./docs/_build/doctrees", # directory - "./docs", # Source directory of documents - "./_readthedocs", # Output directory for rendered documents. + "./docs/_build/doctrees", # Directory + "-D", # Flag to override settings in conf.py + "exclude_patterns=notebooks/*", # Exclude our notebooks from pre-commit ] diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 00000000..8ae286b6 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,77 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + // The name of the project being benchmarked. + "project": "lsdb", + // The project's homepage. + "project_url": "https://github.com/astronomy-commons/lsdb", + // The URL or local path of the source code repository for the + // project being benchmarked. + "repo": "..", + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "tip" (for mercurial). + "branches": [ + "HEAD" + ], + "build_command": [ + "python -m build --wheel -o {build_cache_dir} {build_dir}" + ], + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/astronomy-commons/lsdb/commit/", + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": [ + "3.10" + ], + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list indicates to just test against the default (latest) + // version. + "matrix": { + "Cython": [], + "build": [], + "packaging": [] + }, + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks". + "benchmark_dir": ".", + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env". + "env_dir": "env", + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": "_results", + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": "_html", + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "build_cache_size": 8 + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } +} \ No newline at end of file diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py new file mode 100644 index 00000000..b83093c8 --- /dev/null +++ b/benchmarks/benchmarks.py @@ -0,0 +1,29 @@ +"""Two sample benchmarks to compute runtime and memory usage. + +For more information on writing benchmarks: +https://asv.readthedocs.io/en/stable/writing_benchmarks.html.""" +import os + +import lsdb + +TEST_DIR = os.path.join(os.path.dirname(__file__), '..', 'tests') +DATA_DIR_NAME = "data" +SMALL_SKY_DIR_NAME = "small_sky" +SMALL_SKY_XMATCH_NAME = "small_sky_xmatch" + + +def load_small_sky(): + path = os.path.join(TEST_DIR, DATA_DIR_NAME, SMALL_SKY_DIR_NAME) + return lsdb.read_hipscat(path, catalog_type=lsdb.Catalog) + + +def load_small_sky_xmatch(): + path = os.path.join(TEST_DIR, DATA_DIR_NAME, SMALL_SKY_XMATCH_NAME) + return lsdb.read_hipscat(path, catalog_type=lsdb.Catalog) + + +def time_crossmatch(): + """Time computations are prefixed with 'time'.""" + small_sky = load_small_sky() + small_sky_xmatch = load_small_sky_xmatch() + small_sky.crossmatch(small_sky_xmatch).compute() diff --git a/docs/Makefile b/docs/Makefile index aa8ae081..a5622f10 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -4,16 +4,21 @@ # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= -T -E -d _build/doctrees -D language=en +EXCLUDENB ?= -D exclude_patterns="notebooks/*","_build","**.ipynb_checkpoints" SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = ../_readthedocs/ -.PHONY: help clean Makefile +.PHONY: help clean Makefile no-nb no-notebooks # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) +# Build all Sphinx docs locally, except the notebooks +no-nb no-notebooks: + @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(EXCLUDENB) $(O) + # Cleans up files generated by the build process clean: rm -r "_build/doctrees" diff --git a/docs/conf.py b/docs/conf.py index 6376c72f..1d3c65a4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,6 +29,7 @@ extensions = ["sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx.ext.viewcode"] extensions.append("autoapi.extension") +extensions.append("nbsphinx") templates_path = [] exclude_patterns = ["_build", "**.ipynb_checkpoints"] diff --git a/docs/requirements.txt b/docs/requirements.txt index dfb53d03..5c0f7d8a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,9 @@ sphinx==6.1.3 sphinx-rtd-theme==1.2.0 sphinx-autoapi==2.0.1 +nbsphinx +ipython +jupytext +jupyter +matplotlib +numpy diff --git a/pyproject.toml b/pyproject.toml index 1a5af230..8f5da1e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,15 @@ dev = [ "sphinx==6.1.3", # Used to automatically generate documentation "sphinx-rtd-theme==1.2.0", # Used to render documentation "sphinx-autoapi==2.0.1", # Used to automatically generate api documentation -] + # if you add dependencies here while experimenting in a notebook and you + # want that notebook to render in your documentation, please add the + # dependencies to ./docs/requirements.txt as well. + "nbconvert", # Needed for pre-commit check to clear output from Python notebooks + "nbsphinx", # Used to integrate Python notebooks into Sphinx documentation + "ipython", # Also used in building notebooks into Sphinx + "asv==0.5.1", # Used to compute performance benchmarks +] + [build-system] requires = [ "setuptools>=62", # Used to build and package the Python project