dillondaudert
diff --git a/‎.github/workflows/CI.yml‎
Lines changed: 24 additions & 22 deletions b/‎.github/workflows/CI.yml‎
Lines changed: 24 additions & 22 deletions
diff --git a/‎.github/workflows/TagBot.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/TagBot.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/documentation.yml‎
Lines changed: 33 additions & 0 deletions b/‎.github/workflows/documentation.yml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎PlotMNIST.ipynb‎
Lines changed: 0 additions & 99 deletions b/‎PlotMNIST.ipynb‎
Lines changed: 0 additions & 99 deletions
diff --git a/‎Project.toml‎
Lines changed: 7 additions & 8 deletions b/‎Project.toml‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎README.md‎
Lines changed: 34 additions & 48 deletions b/‎README.md‎
Lines changed: 34 additions & 48 deletions
diff --git a/‎docs/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎docs/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/Project.toml‎
Lines changed: 10 additions & 0 deletions b/‎docs/Project.toml‎
Lines changed: 10 additions & 0 deletions
@@ -2,9 +2,8 @@ name: CI
 on:
   pull_request:
   push:
-    branches:
-      - master
-    tags: '*'
+    branches: [main, master]
+    tags: ["*"]
 jobs:
   test:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
@@ -13,34 +12,37 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6'
           - '1'
-#          - 'nightly'
         os:
           - ubuntu-latest
-          - macOS-latest
           - windows-latest
         arch:
           - x64
+        include:
+          - os: macOS-latest
+            arch: x64
+            version: '1'
     steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+      - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-runtest@v1
-      - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - name: Run tests with coverage
+        uses: julia-actions/julia-runtest@v1
         with:
-          file: lcov.info
+          coverage: true
+      # Option 1: Use Coverage.jl with official uploaders
+      - name: Process and upload coverage with Coverage.jl
+        if: matrix.version == '1' && matrix.os == 'ubuntu-latest'
+        run: |
+          julia -e '
+            using Pkg; Pkg.add("Coverage")
+            using Coverage
+            process_and_upload(service=:both, folder="src")
+          '
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+          COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
@@ -12,3 +12,4 @@ jobs:
       - uses: JuliaRegistries/TagBot@v1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
@@ -0,0 +1,33 @@
+name: Documentation
+on:
+    push:
+        branches: 
+            - master
+            - v0.2-dev
+        tags: '*'
+
+jobs:
+    build:
+        permissions:
+            actions: write
+            contents: write
+            pull-requests: read
+            statuses: write
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v4
+            - uses: julia-actions/setup-julia@v2
+              with:
+                version: '1'
+            - uses: julia-actions/cache@v2
+            - name: Install dependencies
+              shell: julia --color=yes --project=docs {0}
+              run: |
+                using Pkg
+                Pkg.develop(PackageSpec(path=pwd()))
+                Pkg.instantiate()
+            - name: Build and deploy
+              run: julia --color=yes --project=docs docs/make.jl
+              env:
+                  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+                  DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
@@ -6,3 +6,6 @@ deps/deps.jl
 *.ipynb_checkpoints
 
 Manifest.toml
+Manifest-*.toml
+.vscode/settings.json
+.DS_Store
@@ -1,9 +1,13 @@
 name = "UMAP"
 uuid = "c4f8c510-2410-5be4-91d7-4fbaeb39457e"
+version = "0.2.0"
 authors = ["Dillon Daudert <[email protected]>"]
-version = "0.1.11"
+
+[workspace]
+projects = ["UMAP", "test", "docs"]
 
 [deps]
+Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
 Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -13,14 +17,9 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
+Accessors = "0.1.43"
 Arpack = "0.4, 0.5"
 Distances = "0.8, 0.9, 0.10"
 LsqFit = "0.6, 0.7, 0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15"
 NearestNeighborDescent = "0.3"
-julia = "1.6"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test"]
+julia = "1.10, 1.11, 1.12"
@@ -1,5 +1,7 @@
 # UMAP.jl
-[![Coverage Status](https://coveralls.io/repos/github/dillondaudert/UMAP.jl/badge.svg?branch=master)](https://coveralls.io/github/dillondaudert/UMAP.jl?branch=master) [![codecov](https://codecov.io/gh/dillondaudert/UMAP.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/dillondaudert/UMAP.jl)
+| **Documentation** | **Build Status** | **Test Coverage** |
+|:-----------------:|:----------------:|:----------------:|
+| [![][docs-stable-img]][docs-stable-url] [![][docs-dev-img]][docs-dev-url] | [![CI](https://github.com/dillondaudert/UMAP.jl/actions/workflows/CI.yml/badge.svg?branch=v0.2-dev)](https://github.com/dillondaudert/UMAP.jl/actions/workflows/CI.yml) | [![][codecov-img]][codecov-url] [![][coveralls-img]][coveralls-url] |
 
 A pure Julia implementation of the [Uniform Manifold Approximation and Projection](https://arxiv.org/abs/1802.03426) dimension reduction
 algorithm
@@ -9,74 +11,58 @@ algorithm
 
 ## Usage
 ```jl
-embedding = umap(X, n_components; n_neighbors, metric, min_dist, ...)
+result = UMAP.fit(data, n_components; n_neighbors, metric, ...) -> UMAP.UMAPResult
+result.embedding
 ```
-The `umap` function takes two arguments, `X` (a column-major matrix of shape (n_features, n_samples)), `n_components` (the number of dimensions in the output embedding), and various keyword arguments. Several important ones are:
-- `n_neighbors::Int=15`: This controls how many neighbors around each point are considered to be part of its local neighborhood. Larger values will result in embeddings that capture more global structure, while smaller values will preserve more local structures.
-- `metric::SemiMetric=Euclidean()`: The (semi)metric to use when calculating distances between points. This can be any subtype of the `SemiMetric` type from the `Distances.jl` package, including user-defined types.
-- `min_dist::Float=0.1`: This controls the minimum spacing of points in the embedding. Larger values will cause points to be more evenly distributed, while smaller values will preserve more local structure.
+The `fit` function takes two arguments, `data` (either a column-major matrix or a vector of "points", e.g. vectors), `n_components` (the number of dimensions in the output embedding), and various keyword arguments. Several important ones are:
+- `n_neighbors`: This controls how many neighbors around each point are considered to be part of its local neighborhood. Larger values will result in embeddings that capture more global structure, while smaller values will preserve more local structures.
+- `metric`: The distance (semi-)metric to use when calculating distances between points. This can be any subtype of the `SemiMetric` type from the `Distances.jl` package, including user-defined types.
+- `min_dist`: This controls the minimum spacing of points in the embedding. Larger values will cause points to be more evenly distributed, while smaller values will preserve more local structure.
 
-The returned `embedding` will be a matrix of shape (n_components, n_samples).
+`UMAP.fit` returns a `UMAPResult` struct, with the output embedding at
+`result.embedding`.
 
 ### Using precomputed distances
-UMAP can use a precomputed distance matrix instead of finding the nearest neighbors itself. In this case, the distance matrix is passed as `X` and the `metric` keyword argument should be `:precomputed`. Example:
+UMAP can use a precomputed distance matrix instead of finding the nearest neighbors itself. In this case, the distance matrix is passed as `data` and the `metric` keyword argument should be `:precomputed`. Example:
 
 ```jl
-embedding = umap(distances, n_components; metric=:precomputed)
+result = UMAP.fit(distances, n_components; metric=:precomputed)
 ```
 
-## Fitting a UMAP model to a dataset and transforming new data
+### Transforming new data
 
-### Constructing a model
-To construct a model to use for embedding new data, use the constructor:
+After embedding a dataset, we can transform new points into the same
+embedding space via `UMAP.transform`:
 ```jl
-model = UMAP_(X, n_components; <kwargs>)
-```
-where the constructor takes the same keyword arguments (kwargs) as `umap`. The returned object has the following fields:
-```jl
-model.graph     # The graph of fuzzy simplicial set membership strengths of each point in the dataset
-model.embedding # The embedding of the dataset
-model.data      # A reference to the original dataset
-model.knns      # A matrix of indices of nearest neighbors of points in the dataset,
-                # as determined on the original manifold (may be approximate)
-model.dists     # The distances of the neighbors indicated by model.knns
-```
+result = UMAP.fit(data, n_component; <kwargs>)
 
-### Embedding new data
-To transform new data into the existing embedding of a UMAP model, use the `transform` function:
-```jl
-Q_embedding = transform(model, Q; <kwargs>)
+transform_result = UMAP.transform(result, new_data) -> UMAP.UMAPTransformResult
+transform_result.embedding
 ```
-where `Q` is a matrix of new query data to embed into the existing embedding, and `model` is the object obtained from the `UMAP_` call above. `Q` must come from a space of the same dimensionality as `model.data` (ie `X` in the `UMAP_` call above).
 
-The remaining keyword arguments (kwargs) are the same as for above functions.
+Note that the type of `new_data` must match the original `data`
+exactly. The parameterization used for `fit` is re-used where
+appropriate in `transform`, via the `UMAPResult` struct.
 
-## Implementation Details
-There are two main steps involved in UMAP: building a weighted graph with edges connecting points to their nearest neighbors, and optimizing the low-dimensional embedding of that graph. The first step is accomplished either by an exact kNN search (for datasets with `< 4096` points) or by the approximate kNN search algorithm, [NNDescent](https://github.com/dillondaudert/NearestNeighborDescent.jl). This step is also usually the most costly.
-
-The low-dimensional embedding is initialized (by default) with the eigenvectors of the normalized Laplacian of the kNN graph. These are found using ARPACK (via [Arpack.jl](https://github.com/JuliaLinearAlgebra/Arpack.jl)).
+## Examples
+The docs have more examples, e.g. 
+- [MNIST](https://dillondaudert.github.io/UMAP.jl/dev/examples/mnist/)
+- [Advanced Usage](https://dillondaudert.github.io/UMAP.jl/dev/examples/advanced_usage/)
 
-## Current Limitations
-- **Input data types**: Only data points that are represented by vectors of numbers (passed in as a matrix) are valid inputs. This is mostly due to a lack of support for other formats in [NNDescent](https://github.com/dillondaudert/NearestNeighborDescent.jl). Support for e.g. string datasets is possible in the future
-- **Sequential**: This implementation does not take advantage of any parallelism
 
 ## External Resources
 - [Understanding UMAP](https://pair-code.github.io/understanding-umap/)
 - For a great description of how UMAP works, see [this page](https://umap-learn.readthedocs.io/en/latest/how_umap_works.html) from the Python UMAP documentation
 - If you're familiar with [t-SNE](https://lvdmaaten.github.io/tsne/), then [this page](https://jlmelville.github.io/uwot/umap-for-tsne.html) describes UMAP with similar vocabulary to that dimension reduction algorithm
 
-## Examples
-The full MNIST and FMNIST datasets are plotted below using both this implementation and the [Python implementation](github.com/lmcinnes/umap) for comparison. These were generated by [this notebook](PlotMNIST.ipynb).
+[docs-stable-img]: https://img.shields.io/badge/docs-stable-blue.svg
+[docs-stable-url]: https://dillondaudert.github.io/UMAP.jl/stable
 
-Note that the memory allocation for the Python UMAP is unreliable, as Julia's benchmarking doesn't count memory allocated within Python itself.
-### MNIST
-![Julia MNIST](img/mnist_julia.png)
-![Python MNIST](img/mnist_python.png)
+[docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg
+[docs-dev-url]: https://dillondaudert.github.io/UMAP.jl/dev
 
-### FMNIST
-![Julia FMNIST](img/fmnist_julia.png)
-![Python FMNIST](img/fmnist_python.png)
+[codecov-img]: https://codecov.io/gh/dillondaudert/UMAP.jl/branch/v0.2-dev/graph/badge.svg
+[codecov-url]: https://codecov.io/gh/dillondaudert/UMAP.jl
 
-## Disclaimer
-This implementation is a work-in-progress. If you encounter any issues, please create
-an issue or make a pull request.
+[coveralls-img]: https://coveralls.io/repos/github/dillondaudert/UMAP.jl/badge.svg?branch=v0.2-dev
+[coveralls-url]: https://coveralls.io/github/dillondaudert/UMAP.jl?branch=v0.2-dev
@@ -0,0 +1,2 @@
+build/
+.DS_Store
@@ -0,0 +1,10 @@
+[deps]
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
+NearestNeighborDescent = "dd2c4c9e-a32f-5b2f-b342-08c2f244fce8"
+Pluto = "c3e4b0f8-55cb-11ea-2926-15256bba5781"
+PlutoStaticHTML = "359b1769-a58e-495b-9770-312e911026ad"
+StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404"
+UMAP = "c4f8c510-2410-5be4-91d7-4fbaeb39457e"