Docs for LSQ CPU

una-dinosauria · una-dinosauria · commit bd90a7d8eed2 · 2018-10-14T17:56:23.000-04:00
diff --git a/docs/make.jl b/docs/make.jl
@@ -14,7 +14,8 @@ makedocs(
             "OPQ.md",
             "RVQ.md",
             "ERVQ.md",
-            "ChainQ.md"
+            "ChainQ.md",
+            "LSQ.md"
         ]
     ]
     # doctest = test
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -33,3 +33,4 @@ nav:
   - RVQ.html
   - ERVQ.html
   - ChainQ.html
+  - LSQ.html
diff --git a/docs/src/LSQ.md b/docs/src/LSQ.md
@@ -0,0 +1,15 @@
+# Local search quantization (LSQ)
+
+Local search quantization (LSQ) is an non-orthogonal MCQ method.
+
+LSQ uses fully dimensional codebooks. Codebook update is done via least squares, and encoding is done with
+iterated local search (ILS), using randomized iterated conditional modes (ICM) as a local search subroutine.
+
+```@docs
+encoding_icm
+train_lsq
+```
+
+## Reference
+
+Martinez, J., Clement, J., Hoos, H. H., & Little, J. J. (2016). Revisiting additive quantization. In _European Conference on Computer Vision_ (pp. 137-153). Springer, Cham. [[PDF](https://www.cs.ubc.ca/~julm/papers/eccv16.pdf)]
diff --git a/src/ChainQ.jl b/src/ChainQ.jl
@@ -254,7 +254,6 @@ function quantize_chainq_cuda!(
 
   # Forward pass
   @inbounds for i = 1:(m-1) # Loop over states
-    @time begin
     if i > 1; CuArrays.BLAS.axpy!(n * h, 1.0f0, d_mincost, 1, d_unaries[i], 1); end
 
     for j = 1:h # Loop over the cost of going to j
@@ -263,7 +262,6 @@ function quantize_chainq_cuda!(
       Mem.download!(mini, d_mini.buf)
       minidx[j,i,:] .= mini .+ one(eltype(mini))
     end
-    end
   end
 
   CuArrays.BLAS.axpy!(n * h, 1.0f0, d_mincost, 1, d_unaries[m], 1)
@@ -272,7 +270,6 @@ function quantize_chainq_cuda!(
   mini .+= one(eltype(mini))
 
   # Backward trace
-  @time begin
   @inbounds for idx = IDX # Loop over the datapoints
 
     backpath = [ mini[idx] ]
@@ -283,7 +280,6 @@ function quantize_chainq_cuda!(
     # Save the inferred code
     CODES[:, idx] .= reverse!( backpath )
   end
-  end
 
   CudaUtilsModule.finit()
   destroy!(ctx)
@@ -303,7 +299,7 @@ Given data and chain codebooks, find codes using the Viterbi algorithm chain qua
 - `use_cuda::Bool`: whether to use a CUDA implementation
 - `use_cpp::Bool`: whether to use a c++ implementation
 
-If both `use_cuda` and `use_cpp` are `true`, the CUDA implementation is used. 
+If both `use_cuda` and `use_cpp` are `true`, the CUDA implementation is used.
 
 # Returns
 - `B::Matrix{Int16}`: `m`-by-`n` matrix with the codes
diff --git a/src/LSQ.jl b/src/LSQ.jl
@@ -251,7 +251,24 @@ function encode_icm_fully!(
   return B
 end
 
-# Encode a full dataset
+"""
+    encoding_icm(X, oldB, C, ilsiter, icmiter, randord, npert, cpp=true, V=false) -> B
+
+Given data and chain codebooks, find codes using iterated local search with ICM.
+
+# Arguments
+- `X::Matrix{T}`: `d`-by-`n` data to quantize
+- `OldB::Matrix{Int16}`: `m`-by-`n` initial set of codes
+- `ilsiter::Integer`: Number of iterated local search (ILS) iterations
+- `icmiter::Integer`: Number of iterated conditional modes (ICM) iterations
+- `randord::Bool`: Whether to use random order
+- `npert::Integer`: Number of codes to perturb
+- `cpp::Bool=true`: Whether to use the c++ implementation
+- `V::Bool=false`: Whehter to print progress
+
+# Returns
+- `B::Matrix{Int16}`: `m`-by-`n` matrix with the new codes
+"""
 function encoding_icm(
   X::Matrix{T},         # d-by-n matrix. Data to encode
   oldB::Matrix{Int16},  # m-by-n matrix. Previous encoding
@@ -276,6 +293,33 @@ function encoding_icm(
   return B
 end
 
+
+"""
+    train_lsq(X, m, h, R, B, C, niter, ilsiter, icmiter, randord, npert, cpp=true, V=false) -> C, B, obj
+
+Train a local-search quantizer.
+This method is typically initialized by [Chain quantization (ChainQ)](@ref)
+
+# Arguments
+- `X::Matrix{T}`: `d`-by-`n` data to quantize
+- `m::Integer`: Number of codebooks
+- `h::Integer`: Number of entries in each codebook (typically 256)
+- `R::Matrix{T}`: `d`-by-`d` rotation matrix for initialization
+- `B::Matrix{Int16}`: `m`-by-`n` matrix with pre-trained codes for initialization
+- `C::Vector{Matrix{T}}`: `m`-long vector with `d`-by-`h` matrices. Each matrix is a pretrained codebook of size approximately `d`-by-`h`
+- `niter::Integer`: Number of iterations to use
+- `ilster::Integer`: Number of iterated local search (ILS) iterations
+- `icmiter::Integer`: Number of iterated conditional modes (ICM) iterations
+- `randord::Bool`: Whether to visit the nodes in a random order in ICM
+- `npert::Integer`: Number of codes to perturb
+- `cpp::Bool`: Whether to use a c++ implementation for encoding
+- `V::Bool`: Whether to print progress
+
+# Returns
+- `C::Vector{Matrix{T}}`: `m`-long vector with `d`-by-`h` matrix entries. Each matrix is a codebook of size approximately `d`-by-`h`
+- `B::Matrix{Int16}`: `m`-by-`n` matrix with the codes
+- `obj::Vector{T}`: `niter`-long vector with the quantization error after each iteration
+"""
 function train_lsq(
   X::Matrix{T},         # d-by-n matrix of data points to train on.
   m::Integer,           # number of codebooks
diff --git a/src/RVQ.jl b/src/RVQ.jl
@@ -66,7 +66,7 @@ function quantize_rvq(
 end
 
 """
-    train_rvq(X, m, h, niter, V=false) -> C, B, error
+    train_rvq(X, m, h, niter=25, V=false) -> C, B, error
 
 Train a residual quantizer.
 
diff --git a/test/chainq.jl b/test/chainq.jl
@@ -0,0 +1,28 @@
+
+
+# Make sure the fast version of codebook update is still okay
+@testset "Chain codebook update" begin
+  d, n, m, h, V, rho = 32, 10_000, 4, 256, false, 1e-4
+  X, _, B = generate_random_dataset(Float64, Int16, d, n, m, h)
+
+  # These two methods are equivalent, but the second should be faster
+  C1, _ = Rayuela.update_codebooks_chain(X, B, h, V)
+  C2, _ = Rayuela.update_codebooks_chain_bin(X, B, h, V, rho)
+  @test isapprox(C1, C2)
+end
+
+
+# Chain quantization
+@testset "Chain encoding" begin
+  d, n, m, h = 32, Int(1e3), 4, 256
+  X, C, B = generate_random_dataset(Float32, Int16, d, n, m, h)
+
+  B1, _ = Rayuela.quantize_chainq(X, C) # Julia
+
+  use_cuda, use_cpp = true, false
+  B2, _ = Rayuela.quantize_chainq(X, C, use_cuda, use_cpp) # Cuda
+
+  use_cuda, use_cpp = false, true
+  B3, _ = Rayuela.quantize_chainq(X, C, use_cuda, use_cpp) # C++
+  @test all(B1 .== B2 .== B3) # C++ implememtation
+end
diff --git a/test/codebook_update.jl b/test/codebook_update.jl
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -7,20 +7,5 @@ include("common.jl")
 # IO - fvecs and ivecs read/write
 include("xvecs.jl")
 
-# Codebook update
-include("codebook_update.jl")
-
 # Chain quantization
-# Test cpp viterbi encoding implementation
-@testset "Viterbi encoding" begin
-  d, n, m, h = 32, Int(1e3), 4, 256
-  X, C, B = generate_random_dataset(Float32, Int16, d, n, m, h)
-
-  Bj, _ = Rayuela.quantize_chainq(X, C) # Julia
-
-  use_cuda = true
-  use_cpp = false
-
-  Bc, _ = Rayuela.quantize_chainq(X, C, use_cuda, use_cpp) # C
-  @test all(Bj .== Bc)
-end
+include("chainq.jl")

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,8 @@ makedocs(`
`14`	`14`	`"OPQ.md",`
`15`	`15`	`"RVQ.md",`
`16`	`16`	`"ERVQ.md",`
`17`		`- "ChainQ.md"`
	`17`	`+ "ChainQ.md",`
	`18`	`+ "LSQ.md"`
`18`	`19`	`]`
`19`	`20`	`]`
`20`	`21`	`# doctest = test`