Lagrange-Labs · Zyouell · Oct 17, 2024 · Oct 18, 2024 · Oct 21, 2024 · Nov 7, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -31,6 +31,7 @@ alloy = { version = "0.6", default-features = false, features = [
   "transports",
   "postgres",
 ] }
+
 anyhow = "1.0"
 base64 = "0.22"
 bb8 = "0.8.5"

diff --git a/mp2-common/Cargo.toml b/mp2-common/Cargo.toml
@@ -31,7 +31,6 @@ hex.workspace = true
 rand.workspace = true
 rstest.workspace = true
 tokio.workspace = true
-
 mp2_test = { path = "../mp2-test" }
 
 [features]

diff --git a/mp2-common/src/array.rs b/mp2-common/src/array.rs
@@ -1,6 +1,9 @@
 use crate::{
     serialization::{deserialize_long_array, serialize_long_array},
-    utils::{less_than_or_equal_to_unsafe, range_check_optimized, Endianness, PackerTarget},
+    utils::{
+        less_than_or_equal_to_unsafe, less_than_unsafe, range_check_optimized, Endianness,
+        PackerTarget,
+    },
 };
 use anyhow::{anyhow, Result};
 use plonky2::{
@@ -600,6 +603,110 @@ where
     pub fn last(&self) -> T {
         self.arr[SIZE - 1]
     }
+
+    /// This function allows you to search a larger [`Array`] by representing it as a number of
+    /// smaller [`Array`]s with size [`RANDOM_ACCESS_SIZE`], padding the final smaller array where required.
+    /// For example if we have an array of length `512` and we wish to find the value at index `324` the following
+    /// occurs:
+    ///     1) Split the original [`Array`] into `512 / 64 = 8` chunks `[A_0, ... , A_7]`
+    ///     2) Express `324` in base 64 (Little Endian)  `[4, 5]`
+    ///     3) For each `i \in [0, 7]` use a [`RandomAccesGate`] to lookup the `4`th element, `v_i,3` of `A_i`
+    ///        and create a new list of length `8` that consists of `[v_0,3, v_1,3, ... v_7,3]`
+    ///     4) Now use another [`RandomAccessGate`] to select the `5`th elemnt of this new list (`v_4,3` as we have zero-indexed both times)
+    ///
+    /// For comparison using [`Self::value_at`] on an [`Array`] with length `512` results in 129 rows, using this method
+    /// on the same [`Array`] results in 15 rows.
+    ///
+    /// As an aside, if the [`Array`] length is not divisible by `64` then we pad with zero values, since the size of the
+    /// [`Array`] is a compile time constant this will not affect circuit preprocessing.
+    pub fn random_access_large_array<F: RichField + Extendable<D>, const D: usize>(
+        &self,
+        b: &mut CircuitBuilder<F, D>,
+        at: Target,
+    ) -> T {
+        // We will split the array into smaller arrays of size 64, padding the last array with zeroes if required
+        let padded_size = (SIZE - 1) / RANDOM_ACCESS_SIZE + 1;
+
+        // Create an array of `Array`s
+        let arrays: Vec<Array<T, RANDOM_ACCESS_SIZE>> = (0..padded_size)
+            .map(|i| Array {
+                arr: create_array(|j| {
+                    let index = RANDOM_ACCESS_SIZE * i + j;
+                    if index < self.arr.len() {
+                        self.arr[index]
+                    } else {
+                        T::from_target(b.zero())
+                    }
+                }),
+            })
+            .collect();
+
+        // We need to express `at` in base 64, we are also assuming that the initial array was smaller than 64^2 = 4096 which we enforce with a range check.
+        // We also check that `at` is smaller that the size of the array.
+        let array_size = b.constant(F::from_noncanonical_u64(SIZE as u64));
+        let less_than_check = less_than_unsafe(b, at, array_size, 12);
+        let true_target = b._true();
+        b.connect(less_than_check.target, true_target.target);
+
+        let (low_bits, high_bits) = b.split_low_high(at, 6, 12);
+
+        // Search each of the smaller arrays for the target at `low_bits`
+        let mut first_search = arrays
+            .into_iter()
+            .map(|array| {
+                b.random_access(
+                    low_bits,
+                    array
+                        .arr
+                        .iter()
+                        .map(Targetable::to_target)
+                        .collect::<Vec<Target>>(),
+                )
+            })
+            .collect::<Vec<Target>>();
+
+        // Now we push a number of zero targets into the array to make it a power of 2
+        let next_power_of_two = first_search.len().next_power_of_two();
+        let zero_target = b.zero();
+        first_search.resize(next_power_of_two, zero_target);
+        // Serach the result for the Target at `high_bits`
+        T::from_target(b.random_access(high_bits, first_search))
+    }
+
+    /// Returns [`Self[at..at+SUB_SIZE]`].
+    /// This is more expensive than [`Self::extract_array`] for [`Array`]s that are shorter than 64 elements long due to using [`Self::random_access_large_array`]
+    /// instead of [`Self::value_at`]. This function enforces that the values extracted are within the array.
+    ///
+    /// For comparison usin [`Self::extract_array`] on an [`Array`] of size `512` results in 5179 rows, using this method instead
+    /// results in 508 rows.
+    pub fn extract_array_large<
+        F: RichField + Extendable<D>,
+        const D: usize,
+        const SUB_SIZE: usize,
+    >(
+        &self,
+        b: &mut CircuitBuilder<F, D>,
+        at: Target,
+    ) -> Array<T, SUB_SIZE> {
+        let m = b.constant(F::from_canonical_usize(SUB_SIZE));
+        let array_len = b.constant(F::from_canonical_usize(SIZE));
+        let upper_bound = b.add(at, m);
+        let num_bits_size = SIZE.ilog2() + 1;
+
+        let lt = less_than_or_equal_to_unsafe(b, upper_bound, array_len, num_bits_size as usize);
+
+        let t = b._true();
+        b.connect(t.target, lt.target);
+
+        Array::<T, SUB_SIZE> {
+            arr: core::array::from_fn(|i| {
+                let i_target = b.constant(F::from_canonical_usize(i));
+                let i_plus_n_target = b.add(at, i_target);
+
+                self.random_access_large_array(b, i_plus_n_target)
+            }),
+        }
+    }
 }
 /// Returns the size of the array in 32-bit units, rounded up.
 #[allow(non_snake_case)]
@@ -815,6 +922,53 @@ mod test {
         run_circuit::<F, D, C, _>(ValueAtCircuit { arr, idx, exp });
     }
 
+    #[test]
+    fn test_random_access_large_array() {
+        const SIZE: usize = 512;
+        #[derive(Clone, Debug)]
+        struct ValueAtCircuit {
+            arr: [u8; SIZE],
+            idx: usize,
+            exp: u8,
+        }
+        impl<F, const D: usize> UserCircuit<F, D> for ValueAtCircuit
+        where
+            F: RichField + Extendable<D>,
+        {
+            type Wires = (Array<Target, SIZE>, Target, Target);
+            fn build(c: &mut CircuitBuilder<F, D>) -> Self::Wires {
+                let array = Array::<Target, SIZE>::new(c);
+                let exp_value = c.add_virtual_target();
+                let index = c.add_virtual_target();
+                let extracted = array.random_access_large_array(c, index);
+                c.connect(exp_value, extracted);
+
+                (array, index, exp_value)
+            }
+            fn prove(&self, pw: &mut PartialWitness<F>, wires: &Self::Wires) {
+                wires
+                    .0
+                    .assign(pw, &create_array(|i| F::from_canonical_u8(self.arr[i])));
+                pw.set_target(wires.1, F::from_canonical_usize(self.idx));
+                pw.set_target(wires.2, F::from_canonical_u8(self.exp));
+            }
+        }
+
+        let mut rng = thread_rng();
+        let mut arr = [0u8; SIZE];
+        rng.fill(&mut arr[..]);
+        let idx: usize = rng.gen_range(0..SIZE);
+        let exp = arr[idx];
+        run_circuit::<F, D, C, _>(ValueAtCircuit { arr, idx, exp });
+
+        // Now we check that it fails when the index is too large
+        let idx = SIZE;
+        let result = std::panic::catch_unwind(|| {
+            run_circuit::<F, D, C, _>(ValueAtCircuit { arr, idx, exp })
+        });
+        assert!(result.is_err());
+    }
+
     #[test]
     fn test_extract_array() {
         const SIZE: usize = 80;
@@ -858,6 +1012,57 @@ mod test {
         run_circuit::<F, D, C, _>(ExtractArrayCircuit { arr, idx, exp });
     }
 
+    #[test]
+    fn test_extract_array_large() {
+        const SIZE: usize = 512;
+        const SUBSIZE: usize = 40;
+        #[derive(Clone, Debug)]
+        struct ExtractArrayCircuit {
+            arr: [u8; SIZE],
+            idx: usize,
+            exp: [u8; SUBSIZE],
+        }
+        impl<F, const D: usize> UserCircuit<F, D> for ExtractArrayCircuit
+        where
+            F: RichField + Extendable<D>,
+        {
+            type Wires = (Array<Target, SIZE>, Target, Array<Target, SUBSIZE>);
+            fn build(c: &mut CircuitBuilder<F, D>) -> Self::Wires {
+                let array = Array::<Target, SIZE>::new(c);
+                let index = c.add_virtual_target();
+                let expected = Array::<Target, SUBSIZE>::new(c);
+                let extracted = array.extract_array_large::<_, _, SUBSIZE>(c, index);
+                let are_equal = expected.equals(c, &extracted);
+                let tru = c._true();
+                c.connect(are_equal.target, tru.target);
+                (array, index, expected)
+            }
+            fn prove(&self, pw: &mut PartialWitness<F>, wires: &Self::Wires) {
+                wires
+                    .0
+                    .assign(pw, &create_array(|i| F::from_canonical_u8(self.arr[i])));
+                pw.set_target(wires.1, F::from_canonical_usize(self.idx));
+                wires
+                    .2
+                    .assign(pw, &create_array(|i| F::from_canonical_u8(self.exp[i])));
+            }
+        }
+
+        let mut rng = thread_rng();
+        let mut arr = [0u8; SIZE];
+        rng.fill(&mut arr[..]);
+        let idx: usize = rng.gen_range(0..(SIZE - SUBSIZE));
+        let exp = create_array(|i| arr[idx + i]);
+        run_circuit::<F, D, C, _>(ExtractArrayCircuit { arr, idx, exp });
+
+        // It should panic if we try to extract an array where some of the indices fall outside of (0..SIZE)
+        let idx = SIZE;
+        let result = std::panic::catch_unwind(|| {
+            run_circuit::<F, D, C, _>(ExtractArrayCircuit { arr, idx, exp })
+        });
+        assert!(result.is_err());
+    }
+
     #[test]
     fn test_contains_subarray() {
         #[derive(Clone, Debug)]
@@ -1088,7 +1293,10 @@ mod test {
         };
         run_circuit::<F, D, C, _>(circuit);
 
-        arr2[0] += 1; // ensure arr2 is different from arr
+        arr2[0] = match arr2[0].checked_add(1) {
+            Some(num) => num,
+            None => arr2[0] - 1,
+        };
         let res = panic::catch_unwind(|| {
             let circuit = TestSliceEqual {
                 arr,