Added ability to parametrize window size

ArtiomTr · sauliusgrigaitis · commit f8288e3dffad · 2025-10-27T14:34:15.000+02:00
diff --git a/kzg/src/msm/arkmsm/arkmsm_msm.rs b/kzg/src/msm/arkmsm/arkmsm_msm.rs
@@ -15,15 +15,19 @@ impl VariableBaseMSM {
     /// on a Ubuntu 20.04.2 LTS server with AMD EPYC 7282 16-Core CPU
     /// and 128G memory, the optimal performance may vary on a different
     /// configuration.
-    const fn get_opt_window_size(k: u32) -> u32 {
-        match k {
-            0..=9 => 8,
-            10..=12 => 10,
-            13..=14 => 12,
-            15..=19 => 13,
-            20..=22 => 15,
-            23.. => 16,
-        }
+    fn get_opt_window_size(k: u32) -> u32 {
+        option_env!("WINDOW_SIZE")
+            .and_then(|v| v.parse().ok())
+            .unwrap_or({
+                match k {
+                    0..=9 => 8,
+                    10..=12 => 10,
+                    13..=14 => 12,
+                    15..=19 => 13,
+                    20..=22 => 15,
+                    23.. => 16,
+                }
+            })
     }
 
     pub fn msm_slice(mut scalar: Scalar256, slices: &mut [u32], window_bits: u32) {
diff --git a/kzg/src/msm/bgmw.rs b/kzg/src/msm/bgmw.rs
@@ -99,73 +99,100 @@ const fn get_sequential_window_size(window: BgmwWindow) -> usize {
 ///   2^w - 2     - computing total bucket sum (bucket aggregation). Total number of buckets (scratch size) is 2^(w-1).
 ///                 Adding each point to total bucket sum requires 2 point addition operations, so 2 * 2^(w-1) = 2^w.
 #[allow(unused)]
-const fn bgmw_window_size(npoints: usize) -> usize {
-    let wbits = num_bits(npoints);
-
-    match (wbits) {
-        1 => 4,
-        2..=3 => 5,
-        4 => 6,
-        5 => 7,
-        6..=7 => 8,
-        8 => 9,
-        9..=10 => 10,
-        11 => 11,
-        12 => 12,
-        13..=14 => 13,
-        15..=16 => 15,
-        17 => 16,
-        18..=19 => 17,
-        20 => 19,
-        21..=22 => 20,
-        23..=24 => 22,
-        25..=26 => 24,
-        27..=29 => 26,
-        30..=32 => 29,
-        33..=37 => 32,
-        _ => 37,
-    }
+fn bgmw_window_size(npoints: usize) -> usize {
+    option_env!("WINDOW_SIZE")
+        .map(|v| {
+            v.parse()
+                .expect("WINDOW_SIZE environment variable must be valid number")
+        })
+        .unwrap_or({
+            let wbits = num_bits(npoints);
+
+            match (wbits) {
+                1 => 4,
+                2..=3 => 5,
+                4 => 6,
+                5 => 7,
+                6..=7 => 8,
+                8 => 9,
+                9..=10 => 10,
+                11 => 11,
+                12 => 12,
+                13..=14 => 13,
+                15..=16 => 15,
+                17 => 16,
+                18..=19 => 17,
+                20 => 19,
+                21..=22 => 20,
+                23..=24 => 22,
+                25..=26 => 24,
+                27..=29 => 26,
+                30..=32 => 29,
+                33..=37 => 32,
+                _ => 37,
+            }
+        })
 }
 
 #[cfg(feature = "parallel")]
-const fn bgmw_parallel_window_size(npoints: usize, ncpus: usize) -> (usize, usize, usize) {
-    let mut min_ops = usize::MAX;
-    let mut opt = 0;
-
-    let mut win = 2;
-    while win <= 40 {
-        let ops = (1 << win) + (255usize.div_ceil(win).div_ceil(ncpus) * npoints) - 2;
-        if min_ops >= ops {
-            min_ops = ops;
-            opt = win;
-        }
-        win += 1;
-    }
+#[allow(clippy::option_env_unwrap)]
+fn bgmw_parallel_window_size(npoints: usize, ncpus: usize) -> (usize, usize, usize) {
+    option_env!("WINDOW_NX")
+        .and_then(|v| v.parse().ok())
+        .map(|nx| {
+            let wnd = option_env!("WINDOW_SIZE")
+                .expect(
+                    "Unable to use BGMW: when specifying WINDOW_NX environment \
+            variable, please also specify WINDOW_SIZE",
+                )
+                .parse()
+                .expect("WINDOW_SIZE environment variable must be valid number");
+
+            (
+                nx,
+                255usize.div_ceil(wnd) + is_zero((NBITS % wnd) as u64) as usize,
+                wnd,
+            )
+        })
+        .unwrap_or({
+            let mut min_ops = usize::MAX;
+            let mut opt = 0;
+
+            let mut win = 2;
+            while win <= 40 {
+                let ops = (1 << win) + (255usize.div_ceil(win).div_ceil(ncpus) * npoints) - 2;
+                if min_ops >= ops {
+                    min_ops = ops;
+                    opt = win;
+                }
+                win += 1;
+            }
 
-    let mut mult = 1;
+            let mut mult = 1;
 
-    let mut opt_x = 1;
+            let mut opt_x = 1;
 
-    while mult <= 8 {
-        let nx = ncpus * mult;
-        let wnd = bgmw_window_size(npoints / nx);
+            while mult <= 8 {
+                let nx = ncpus * mult;
+                let wnd = bgmw_window_size(npoints / nx);
 
-        let ops = mult * 255usize.div_ceil(wnd) * npoints.div_ceil(nx) + (1 << wnd) - 2;
+                let ops = mult * 255usize.div_ceil(wnd) * npoints.div_ceil(nx) + (1 << wnd) - 2;
 
-        if min_ops > ops {
-            min_ops = ops;
-            opt = wnd;
-            opt_x = nx;
-        }
+                if min_ops > ops {
+                    min_ops = ops;
+                    opt = wnd;
+                    opt_x = nx;
+                }
 
-        mult += 1;
-    }
+                mult += 1;
+            }
 
-    (
-        opt_x,
-        255usize.div_ceil(opt) + is_zero((NBITS % opt) as u64) as usize,
-        opt,
-    )
+            (
+                opt_x,
+                255usize.div_ceil(opt) + is_zero((NBITS % opt) as u64) as usize,
+                opt,
+            )
+        })
 }
 
 impl<
diff --git a/kzg/src/msm/parallel_pippenger_utils.rs b/kzg/src/msm/parallel_pippenger_utils.rs
@@ -1,35 +1,47 @@
 use crate::msm::pippenger_utils::num_bits;
 
-pub const fn breakdown(window: usize, ncpus: usize) -> (usize, usize, usize) {
+pub fn breakdown(window: usize, ncpus: usize) -> (usize, usize, usize) {
     const NBITS: usize = 255;
-    let mut nx: usize;
-    let mut wnd: usize;
 
-    if NBITS > window * ncpus {
-        nx = 1;
-        wnd = num_bits(ncpus / 4);
-        if (window + wnd) > 18 {
-            wnd = window - wnd;
-        } else {
-            wnd = (NBITS / window).div_ceil(ncpus);
-            if (NBITS / (window + 1)).div_ceil(ncpus) < wnd {
-                wnd = window + 1;
+    option_env!("WINDOW_NX")
+        .map(|v| {
+            v.parse()
+                .expect("WINDOW_NX environment variable must be valid number")
+        })
+        .map(|nx| {
+            let ny = NBITS / window + 1;
+            (nx, ny, NBITS / ny + 1)
+        })
+        .unwrap_or({
+            let mut nx: usize;
+            let mut wnd: usize;
+
+            if NBITS > window * ncpus {
+                nx = 1;
+                wnd = num_bits(ncpus / 4);
+                if (window + wnd) > 18 {
+                    wnd = window - wnd;
+                } else {
+                    wnd = (NBITS / window).div_ceil(ncpus);
+                    if (NBITS / (window + 1)).div_ceil(ncpus) < wnd {
+                        wnd = window + 1;
+                    } else {
+                        wnd = window;
+                    }
+                }
             } else {
-                wnd = window;
+                nx = 2;
+                wnd = window - 2;
+                while (NBITS / wnd + 1) * nx < ncpus {
+                    nx += 1;
+                    wnd = window - num_bits(3 * nx / 2);
+                }
+                nx -= 1;
+                wnd = window - num_bits(3 * nx / 2);
             }
-        }
-    } else {
-        nx = 2;
-        wnd = window - 2;
-        while (NBITS / wnd + 1) * nx < ncpus {
-            nx += 1;
-            wnd = window - num_bits(3 * nx / 2);
-        }
-        nx -= 1;
-        wnd = window - num_bits(3 * nx / 2);
-    }
-    let ny = NBITS / wnd + 1;
-    wnd = NBITS / ny + 1;
+            let ny = NBITS / wnd + 1;
+            wnd = NBITS / ny + 1;
 
-    (nx, ny, wnd)
+            (nx, ny, wnd)
+        })
 }
diff --git a/kzg/src/msm/pippenger_utils.rs b/kzg/src/msm/pippenger_utils.rs
@@ -297,16 +297,23 @@ pub const fn num_bits(l: usize) -> usize {
 ///                 Adding each point to total bucket sum requires 2 point addition operations, so 2 * 2^(w-1) = 2^w.
 ///   w + 1       - each bucket sum must be multiplied by 2^w. To do this, we need w doublings. Adding this sum to the
 ///                 total requires one more point addition, hence +1.
-pub const fn pippenger_window_size(npoints: usize) -> usize {
-    let wbits = num_bits(npoints);
-
-    if wbits > 13 {
-        return wbits - 4;
-    }
-    if wbits > 5 {
-        return wbits - 3;
-    }
-    2
+pub fn pippenger_window_size(npoints: usize) -> usize {
+    option_env!("WINDOW_SIZE")
+        .map(|v| {
+            v.parse()
+                .expect("WINDOW_SIZE environment variable must be valid number")
+        })
+        .unwrap_or({
+            let wbits = num_bits(npoints);
+
+            if wbits > 13 {
+                return wbits - 4;
+            }
+            if wbits > 5 {
+                return wbits - 3;
+            }
+            2
+        })
 }
 
 #[cfg(test)]
diff --git a/kzg/src/msm/wbits.rs b/kzg/src/msm/wbits.rs
@@ -3,8 +3,6 @@ use core::{marker::PhantomData, ops::Neg};
 
 use crate::{Fr, G1Affine, G1Fp, G1GetFp, G1Mul, G1ProjAddAffine, G1};
 
-const WBITS: usize = 8;
-
 #[derive(Debug, Clone)]
 pub struct WbitsTable<TFr, TG1, TG1Fp, TG1Affine, TG1ProjAddAffine>
 where
@@ -26,6 +24,15 @@ where
     g1_affine_add_marker: PhantomData<TG1ProjAddAffine>,
 }
 
+fn get_window_size() -> usize {
+    option_env!("WINDOW_SIZE")
+        .map(|v| {
+            v.parse()
+                .expect("WINDOW_SIZE environment variable must be valid number")
+        })
+        .unwrap_or(8)
+}
+
 // Code was taken from: https://github.com/privacy-scaling-explorations/halo2curves/blob/b753a832e92d5c86c5c997327a9cf9de86a18851/src/msm.rs#L13
 pub fn get_booth_index(window_index: usize, window_size: usize, el: &[u8]) -> i32 {
     // Booth encoding:
@@ -294,13 +301,13 @@ impl<
         let mut table = Vec::new();
 
         table
-            .try_reserve_exact(points.len() * (1 << (WBITS - 1)))
+            .try_reserve_exact(points.len() * (1 << (get_window_size() - 1)))
             .map_err(|_| "WBITS precomputation table is too large".to_string())?;
 
         for point in points {
             let mut current = point.clone();
 
-            for _ in 0..(1 << (WBITS - 1)) {
+            for _ in 0..(1 << (get_window_size() - 1)) {
                 table.push(TG1Affine::into_affine(&current));
                 current = current.add_or_dbl(point);
             }
@@ -329,13 +336,13 @@ impl<
             for row in matrix {
                 let mut temp_table = Vec::new();
                 temp_table
-                    .try_reserve_exact(row.len() * (1 << (WBITS - 1)))
+                    .try_reserve_exact(row.len() * (1 << (get_window_size() - 1)))
                     .map_err(|_| "WBITS precomputation table is too large".to_owned())?;
 
                 for point in row {
                     let mut current = point.clone();
 
-                    for _ in 0..(1 << (WBITS - 1)) {
+                    for _ in 0..(1 << (get_window_size() - 1)) {
                         temp_table.push(TG1Affine::into_affine(&current));
                         current = current.add_or_dbl(point);
                     }
@@ -362,15 +369,16 @@ impl<
     fn multiply_sequential_raw(bases: &[TG1Affine], scalars: &[TFr]) -> TG1 {
         let scalars = scalars.iter().map(TFr::to_scalar).collect::<Vec<_>>();
 
-        let number_of_windows = 255 / WBITS + 1;
+        let number_of_windows = 255 / get_window_size() + 1;
         let mut windows_of_points = vec![Vec::with_capacity(scalars.len()); number_of_windows];
 
         for window_idx in 0..windows_of_points.len() {
             for (scalar_idx, scalar_bytes) in scalars.iter().enumerate() {
-                let sub_table =
-                    &bases[scalar_idx * (1 << (WBITS - 1))..(scalar_idx + 1) * (1 << (WBITS - 1))];
+                let sub_table = &bases[scalar_idx * (1 << (get_window_size() - 1))
+                    ..(scalar_idx + 1) * (1 << (get_window_size() - 1))];
 
-                let point_idx = get_booth_index(window_idx, WBITS, scalar_bytes.as_u8());
+                let point_idx =
+                    get_booth_index(window_idx, get_window_size(), scalar_bytes.as_u8());
 
                 if point_idx == 0 {
                     continue;
@@ -396,7 +404,7 @@ impl<
         let mut result: TG1 = accumulated_points.last().unwrap().clone();
         for point in accumulated_points.into_iter().rev().skip(1) {
             // Double the result 'wbits' times
-            for _ in 0..WBITS {
+            for _ in 0..get_window_size() {
                 result = result.dbl();
             }
             // Add the accumulated point for this window
diff --git a/msm-benches/README.md b/msm-benches/README.md