IALM implementation (#7)

surgura · surgura · commit 10c9760bb841 · 2025-04-08T00:31:55.000+02:00
* Implemented IALM
* Updated README to reflect changes
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -29,7 +29,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -46,7 +46,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+.vscode/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/README.md b/README.md
@@ -1,20 +1,40 @@
-# Robust principle component analysis for Python
+# Robust principal component analysis for Python
 
-Work in progress. Coming soon here and on PyPI.
+This package provides algorithms to solve the Robust Principal Component Analysis (RPCA) problem, as presented by Candès et al.[[1]](#candes2011).
+Currently, a single algorithm is implemented: it solves the Principal Component Pursuit (PCP) convex relaxation of RPCA from the same paper, using the Inexact Augmented Lagrange Multiplier (IALM) method from Lin et al.[[2]](#lin2011)[[3]](#lin2013).
+
+## Example
+```python
+from pyrpca import rpca_pcp_ialm
+
+# given an m x n data matrix
+data = ...
+
+# decide on sparsity factor.
+# this parameter is also commonly known as `lambda`.
+sparsity_factor = 1.0 / numpy.sqrt(max(data.shape))
+
+# run the ialm algorithm.
+low_rank, sparse = rpca_pcp_ialm(data, sparsity_factor)
+```
 
 ## Installing
 ```shell
 pip install pyrpca
 ```
 
-## Example
-TODO
+## Feature requests and contributing
+Pull requests and feature requests are welcome. The current version is minimal and suits my personal needs, but feel free to make suggestions. Even if the repository looks inactive, I will still respond :)
+
+## References
+1. <a name="candes2011"></a> [Emmanuel J. Candès, Xiaodong Li, Yi Ma, John Wright. Robust principal component analysis? Association for Computing Machinery 2011.](https://doi.org/10.1145/1970392.1970395) (preprint on [arXiv](https://doi.org/10.48550/arXiv.0912.3599))
+2. <a name="lin2011"></a> [Zhouchen Lin, Risheng Liu, Zhixun Su. Linearized Alternating Direction Method with Adaptive Penalty for Low-Rank Representation. arXiv 2011.](https://doi.org/10.48550/arXiv.1109.0367)
+3. <a name="lin2013"></a> [Zhouchen Lin, Minming Chen, Yi Ma. The Augmented Lagrange Multiplier Method for Exact Recovery of Corrupted Low-Rank Matrices, V3. arXiv 2013.](https://doi.org/10.48550/arXiv.1009.5055)
 
-## Acknowledgements
-Appreciation is due to various other Python implementations of RPCA; below is a non-exhaustive list.
-The code in this project has been loosely inspired by these works.
+## Acknowledgements  
+Appreciation is due to various other Python implementations of RPCA that served as inspiration for this project. Below is a non-exhaustive list:
 
-- https://github.com/2020leon/rpca
-- https://github.com/dganguli/robust-pca
-- https://github.com/weilinear/PyRPCA
+- https://github.com/2020leon/rpca  
+- https://github.com/dganguli/robust-pca  
+- https://github.com/weilinear/PyRPCA  
 - https://github.com/loiccoyle/RPCA
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,50 +1,38 @@
 [build-system]
-requires = [
-  "setuptools >= 78",
-  "setuptools-git-versioning >=2.1, <3",
-]
+requires = ["setuptools >= 78", "setuptools-git-versioning >=2.1, <3"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools-git-versioning]
 enabled = true
 
 [project]
 name = "pyrpca"
-description = "Robust principle component analysis for Python."
+description = "Robust principal component analysis for Python."
 dynamic = ["version"]
 readme = "README.md"
-requires-python = ">=3.9,<4"
+requires-python = ">=3.10,<4"
 classifiers = [
   "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
 ]
-dependencies = [
-  "numpy >=2.0.2, <3",
-]
-keywords = [
-  "rpca",
-  "robust pca",
-  "robust principal component analysis",
-]
-authors = [
-  {name = "Aart Stuurman", email = "aart@astuurman.com"},
-]
-maintainers = [
-  {name = "Aart Stuurman", email = "aart@astuurman.com"},
-]
-license = {file = "LICENSE"}
+dependencies = ["numpy >= 2.0.2, < 3", "scipy >= 1.15.2"]
+keywords = ["rpca", "robust pca", "robust principal component analysis"]
+authors = [{ name = "Aart Stuurman", email = "aart@astuurman.com" }]
+maintainers = [{ name = "Aart Stuurman", email = "aart@astuurman.com" }]
+license = { file = "LICENSE" }
 
 [project.optional-dependencies]
 dev = [
   "ruff == 0.11.2",
   "mypy == 1.15.0",
   "pytest == 8.3.5",
+  "scipy-stubs >= 1.15.2",
 ]
 
 [project.urls]
 homepage = "https://github.com/surgura/PyRPCA"
 
 [tool.setuptools]
-package-dir = {"pyrpca" = "pyrpca"}
+package-dir = { "pyrpca" = "pyrpca" }
 
 [tool.mypy]
 strict = true
diff --git a/pyrpca/__init__.py b/pyrpca/__init__.py
@@ -0,0 +1,3 @@
+from .pcp_ialm import rpca_pcp_ialm
+
+__all__ = ["rpca_pcp_ialm"]
diff --git a/pyrpca/pcp_ialm.py b/pyrpca/pcp_ialm.py
@@ -0,0 +1,85 @@
+import numpy.typing as npt
+from typing import Tuple
+import numpy as np
+from numpy.linalg import norm
+from scipy.linalg import svd
+
+
+def rpca_pcp_ialm(
+    observations: npt.ArrayLike,
+    sparsity_factor: float,
+    max_iter: int = 1000,
+    mu: float | None = None,
+    mu_upper_bound: float | None = None,
+    rho: float = 1.5,
+    tol: float = 1e-7,
+    verbose: bool = True,
+) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
+    """
+    Solve the Principal Component Pursuit (PCP) convex relaxation of Robust PCA using the Inexact Augmented Lagrange Multiplier (IALM) method.
+
+    See README for algorithmic details and references.
+
+    Mu is updated every loop by multiplying it by `rho` until reaching `mu_upper_bound`.
+
+    Parameters:
+        observations: The m x n input matrix to decompose ('D' in the IALM paper).
+        sparsity_factor: Weight on the sparse term in the objective ('lambda' in the IALM paper).
+        max_iter: Maximum number of iterations to perform.
+        mu: Initial value for the penalty parameter. If None, defaults to 1/spectral norm of observations.
+        mu_upper_bound: Maximum allowed value for `mu`. If None, defaults to `mu * 1e7`.
+        rho: Multiplicative factor to increase `mu` in each iteration.
+        tol: Tolerance for stopping criterion (relative Frobenius norm of the residual).
+        verbose: If True, print status and debug information during optimization.
+
+    Returns:
+        low_rank_component: The recovered low-rank matrix ('A' in the IALM paper).
+        sparse_component: The recovered sparse matrix ('E' in the IALM paper).
+    """
+    if mu is None:
+        mu = float(1.25 / norm(observations, ord=2))
+    if mu_upper_bound is None:
+        mu_upper_bound = mu * 1e7
+
+    norm_fro_obs = norm(observations, ord="fro")
+
+    dual = observations / np.maximum(
+        norm(observations, ord=2), norm(observations, ord=np.inf) / sparsity_factor
+    )
+    sparse = np.zeros_like(observations)
+
+    i = 0
+    while True:
+        # compute next iteration of a
+        u, s, v = svd(observations - sparse + 1.0 / mu * dual, full_matrices=False)
+        s_thresholded = np.maximum(s - 1.0 / mu, 0)
+        low_rank = (u * s_thresholded) @ v
+
+        # compute next iteration of e
+        residual_for_sparse = observations - low_rank + 1.0 / mu * dual
+        sparse = np.sign(residual_for_sparse) * np.maximum(
+            np.abs(residual_for_sparse) - sparsity_factor / mu, 0
+        )
+
+        # calculate error
+        residual = observations - low_rank - sparse
+        err = norm(residual, ord="fro") / norm_fro_obs
+
+        i += 1
+
+        if verbose:
+            print(f"iter {i:<4} | err {err:<25} | mu {mu:<25}")
+
+        if err < tol:
+            if verbose:
+                print("Finished optimization. Error smaller than tolerance.")
+            break
+        if i == max_iter:
+            if verbose:
+                print("Finized optimization. Max iterations reached.")
+            break
+
+        # update dual and mu
+        dual = dual + mu * (residual)
+        mu = min(mu * rho, mu_upper_bound)
+    return low_rank, sparse
diff --git a/tests/test_dummy.py b/tests/test_dummy.py
diff --git a/tests/test_pcp_ialm.py b/tests/test_pcp_ialm.py
@@ -0,0 +1,43 @@
+import numpy as np
+from pyrpca import rpca_pcp_ialm
+from numpy.linalg import norm
+from scipy.sparse import random as sparse_random
+
+
+def test_rpca_separates_low_rank_and_sparse():
+    np.random.seed(0)
+    m, n, rank = 500, 400, 5
+
+    # create low-rank matrix A
+    u = np.random.randn(m, rank)
+    v = np.random.randn(rank, n)
+    low_rank = u @ v
+
+    # create sparse matrix E
+    sparse = sparse_random(
+        m, n, density=0.1, format="csr", data_rvs=np.random.randn
+    ).toarray()
+
+    # create observation matrix
+    observations = low_rank + sparse
+
+    # Run RPCA
+    low_rank_recovered, sparse_recovered = rpca_pcp_ialm(
+        observations,
+        sparsity_factor=1.0 / np.sqrt(max(observations.shape)),
+    )
+
+    # check that the reconstruction is close
+    reconstruction_error = norm(
+        observations - (low_rank_recovered + sparse_recovered), ord="fro"
+    ) / norm(observations, ord="fro")
+    assert reconstruction_error < 1e-6, (
+        f"Reconstruction error too high: {reconstruction_error}"
+    )
+
+    # check that recovered matrices are low rank and sparse
+    approx_rank = np.linalg.matrix_rank(low_rank_recovered, tol=1e-3)
+    sparsity = np.count_nonzero(sparse_recovered) / sparse_recovered.size
+
+    assert approx_rank <= rank + 2, f"Recovered A not low rank: {approx_rank}"
+    assert sparsity < 0.2, f"Recovered E not sparse: sparsity={sparsity}"

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+.vscode/`
	`2`	`+`
`1`	`3`	`# Byte-compiled / optimized / DLL files`
`2`	`4`	`__pycache__/`
`3`	`5`	`*.py[cod]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .pcp_ialm import rpca_pcp_ialm`
	`2`	`+`
	`3`	`+__all__ = ["rpca_pcp_ialm"]`