Merge pull request #25 from jlapeyre/more-more-iterators

jlapeyre · web-flow · commit 6ca933d3cb4d · 2025-11-07T12:40:05.000-05:00
Replace more allocation with iterators in `solve_tdgp`

The PR reduces a great deal of allocation in `solve_tdgp`.

On the level of `solve_tdgp`, everything is now done with iterators. No `Vec`s are allocated.

* Add comments documenting data structures

Some of the names could be better chosen here.
Some of the `clone`s could very likely be removed.
diff --git a/src/gridsynth.rs b/src/gridsynth.rs
@@ -15,7 +15,10 @@ use crate::unitary::DOmegaUnitary;
 use dashu_float::round::mode::{self, HalfEven};
 use dashu_float::{Context, FBig};
 use dashu_int::IBig;
-use log::{debug, info};
+
+//use log::{debug, info};
+use log::debug;
+
 use nalgebra::{Matrix2, Vector2};
 use std::cmp::Ordering;
 use std::time::{Duration, Instant};
@@ -117,6 +120,7 @@ impl Region for EpsilonRegion {
         let term2 = fb_with_prec(&self.z_y * u0.imag());
         let temp_sub = fb_with_prec(&self.d - &term1);
         let rhs = fb_with_prec(&temp_sub - &term2);
+        // t0 <= t1
         let (t0, t1) = solve_quadratic(a.real(), b.real(), c.real())?;
         let zero = fb_with_prec(ib_to_bf_prec(IBig::ZERO));
 
@@ -200,11 +204,14 @@ fn process_solution_candidate(mut z: DOmega, mut w_val: DOmega) -> DOmegaUnitary
     }
 }
 
-fn process_solutions(
+fn process_solutions<I>(
     config: &mut GridSynthConfig,
-    solutions: Vec<DOmega>,
+    solutions: I,
     time_of_diophantine_dyadic: &mut Duration,
-) -> Option<DOmegaUnitary> {
+) -> Option<DOmegaUnitary>
+where
+    I: Iterator<Item = DOmega>,
+{
     let start_diophantine = if config.measure_time {
         Some(Instant::now())
     } else {
@@ -303,7 +310,7 @@ fn search_for_solution(
         } else {
             None
         };
-        let solution = solve_tdgp(
+        let solutions = solve_tdgp(
             epsilon_region,
             unit_disk,
             &transformed.0,
@@ -312,23 +319,31 @@ fn search_for_solution(
             k,
             config.verbose,
         );
-        if config.verbose {
-            info!("k = {}, found {} candidates", k, solution.len());
-        }
+        // TODO: Reenable
+        // if config.verbose {
+        //     // Warning! Printing the length will materialize a potentially large iterator.
+        //     let lensol = match &solutions {
+        //         None => 0,
+        //         Some(sols) => sols.len(),
+        //     };
+        //     info!("k = {}, found {} candidates", k, lensol);
+        // }
         if let Some(start) = start_tdgp {
             time_of_solve_tdgp += start.elapsed();
         }
-
-        if let Some(result) = process_solutions(config, solution, &mut time_of_diophantine_dyadic) {
-            if config.measure_time {
-                debug!(
-                    "time of solve_TDGP: {:.3} ms",
-                    time_of_solve_tdgp.as_secs_f64() * 1000.0
-                );
+        if let Some(solutions) = solutions {
+            if let Some(result) =
+                process_solutions(config, solutions, &mut time_of_diophantine_dyadic)
+            {
+                if config.measure_time {
+                    debug!(
+                        "time of solve_TDGP: {:.3} ms",
+                        time_of_solve_tdgp.as_secs_f64() * 1000.0
+                    );
+                }
+                return result;
             }
-            return result;
         }
-
         k += 1;
     }
 }
diff --git a/src/math.rs b/src/math.rs
@@ -130,6 +130,17 @@ fn compute_logarithm(
     (n, r)
 }
 
+/// Solves the quadratic equation ax^2 + bx + c = 0 for real roots.
+///
+/// # Arguments
+///
+/// - `a`: Coefficient of x^2, assumed to be non-zero for a valid quadratic equation.
+/// - `b`: Coefficient of x.
+/// - `c`: Constant term.
+///
+/// # Returns
+///
+/// An `Option` containing a tuple of two roots if they exist; otherwise, `None` if the roots are not real (i.e., the discriminant is negative).
 pub fn solve_quadratic(
     a: &FBig<HalfEven>,
     b: &FBig<HalfEven>,
diff --git a/src/odgp.rs b/src/odgp.rs
@@ -133,6 +133,10 @@ pub fn solve_odgp_with_parity(
         .map(move |alpha| (alpha * ZRootTwo::new(IBig::ZERO, IBig::ONE)) + &p)
 }
 
+pub fn first_solve_scaled_odgp(i: &Interval, j: &Interval, k: i64) -> Option<DRootTwo> {
+    solve_scaled_odgp(i, j, k).next()
+}
+
 pub fn solve_scaled_odgp(i: &Interval, j: &Interval, k: i64) -> impl Iterator<Item = DRootTwo> {
     let scale = pow_sqrt2(k);
     let neg_scale = -scale.clone();
diff --git a/src/tdgp.rs b/src/tdgp.rs
@@ -7,44 +7,43 @@ use dashu_int::IBig;
 
 use crate::common::{fb_with_prec, ib_to_bf_prec};
 use crate::grid_op::GridOp;
-use crate::odgp::{solve_scaled_odgp, solve_scaled_odgp_with_parity_k_ne_0};
+use crate::odgp::{
+    first_solve_scaled_odgp, solve_scaled_odgp, solve_scaled_odgp_with_parity_k_ne_0,
+};
 use crate::region::{Ellipse, Interval, Rectangle};
 use crate::ring::{DOmega, DRootTwo};
 
+/// See Remark 5.4, page 5, Ross and Selinger arXiv:1403.2975v3
 pub trait Region {
+    /// An ellipse bounding the region A
     fn ellipse(&self) -> Ellipse;
+
+    /// Returns `true` if `u` is inside the region A
     fn inside(&self, u: &DOmega) -> bool;
+
+    /// Intersection of the line with the region A
+    /// Given L(t) = u + tv, return endpoints of the interval {t | L(t) ∈ A}
     fn intersect(&self, u: &DOmega, v: &DOmega) -> Option<(FBig<HalfEven>, FBig<HalfEven>)>;
 }
 
-pub fn solve_tdgp(
-    set_a: &impl Region,
-    set_b: &impl Region,
-    op_g: &GridOp,
-    bbox_a: &Rectangle,
-    bbox_b: &Rectangle,
+pub fn solve_tdgp<'a>(
+    set_a: &'a impl Region,
+    set_b: &'a impl Region,
+    op_g: &'a GridOp,
+    bbox_a: &'a Rectangle,
+    bbox_b: &'a Rectangle,
     k: i64,
     _verbose: bool,
-) -> Vec<DOmega> {
-    let mut sol_sufficient = Vec::with_capacity(100); // Pre-allocate reasonable capacity
-
-    let mut sol_x = solve_scaled_odgp(&bbox_a.x, &bbox_b.x, k + 1);
-
-    let alpha0 = match sol_x.next() {
-        Some(val) => val,
-        None => return vec![],
-    };
-
-    let droot_zero = DRootTwo::from_int(IBig::ZERO);
+) -> Option<impl Iterator<Item = DOmega> + 'a> {
+    let alpha0 = first_solve_scaled_odgp(&bbox_a.x, &bbox_b.x, k + 1)?;
     let _k_ibig = IBig::from(k);
     let dx = DRootTwo::power_of_inv_sqrt2(k);
     let op_g_inv_result = op_g.inv();
 
     let op_g_inv = op_g_inv_result.unwrap();
     let zero_droottwo = DRootTwo::from_int(IBig::ZERO);
     let v = op_g_inv * DOmega::from_droottwo_vector(&dx, &zero_droottwo, k);
-
-    let v_conj_sq2 = v.conj_sq2();
+    let v_conj_sq2 = v.conj_sq2().clone();
 
     let bbox_a_new = bbox_a
         .y
@@ -54,63 +53,65 @@ pub fn solve_tdgp(
         .fatten(&(bbox_b.y.width() / ib_to_bf_prec(IBig::from(10000))));
     let sol_y = solve_scaled_odgp(&bbox_a_new, &bbox_b_new, k + 1);
 
-    for beta in sol_y {
-        let dx = DRootTwo::power_of_inv_sqrt2(k);
-        let z0 = op_g.inv().unwrap() * DOmega::from_droottwo_vector(&alpha0, &beta, k + 1);
-        let v = op_g.inv().unwrap() * DOmega::from_droottwo_vector(&dx, &droot_zero, k);
-
-        let t_a = set_a.intersect(&z0, &v);
-        let t_b = set_b.intersect(z0.conj_sq2(), v_conj_sq2);
-        if t_a.is_none() || t_b.is_none() {
-            continue;
-        }
-        let (t_a, t_b) = (t_a.unwrap(), t_b.unwrap());
-
-        let parity = (&beta - &alpha0).mul_by_sqrt2_power_renewing_denomexp(k);
-        let (mut int_a, mut int_b) = (Interval::new(t_a.0, t_a.1), Interval::new(t_b.0, t_b.1));
-        let dt_a = {
-            let ten = ib_to_bf_prec(IBig::from(10));
+    let sol_sufficient = sol_y.flat_map(move |y| {
+        newproc(y, set_a, set_b, op_g, alpha0.clone(), v_conj_sq2.clone(), k)
+            .into_iter()
+            .flatten()
+    });
 
-            let shift_k = IBig::ONE << (k as usize);
-            let width_product = shift_k * int_b.width();
-            let max_val = {
-                if ten > width_product {
-                    &ten
-                } else {
-                    &width_product
-                }
-            };
-            fb_with_prec(&ten / max_val)
-        };
-        let dt_b = {
-            let ten = ib_to_bf_prec(IBig::from(10));
-            let shift_k = IBig::from(1) << (k as usize);
-            let width_product = shift_k * int_a.width();
-            let max_val = {
-                if ten > width_product {
-                    &ten
-                } else {
-                    &width_product
-                }
-            };
-            fb_with_prec(&ten / max_val)
-        };
+    let solutions = sol_sufficient
+        .map(|z| op_g.inv().unwrap() * z)
+        .filter(|z| set_a.inside(z) && set_b.inside(z.conj_sq2()));
+    Some(solutions)
+}
 
-        int_a = int_a.fatten(&dt_a);
-        int_b = int_b.fatten(&dt_b);
+fn newproc<'a>(
+    beta: DRootTwo,
+    set_a: &'a impl Region,
+    set_b: &'a impl Region,
+    op_g: &'a GridOp,
+    alpha0: DRootTwo,
+    //    alpha0: &'a DRootTwo,
+    v_conj_sq2: DOmega,
+    //    v_conj_sq2: &'a DOmega,
+    k: i64,
+) -> Option<impl Iterator<Item = DOmega> + 'a> {
+    let alpha0 = alpha0.clone();
+    let droot_zero = DRootTwo::from_int(IBig::ZERO);
+    let dx = DRootTwo::power_of_inv_sqrt2(k);
+    let z0 = op_g.inv().unwrap() * DOmega::from_droottwo_vector(&alpha0, &beta, k + 1);
+    let v = op_g.inv().unwrap() * DOmega::from_droottwo_vector(&dx, &droot_zero, k);
 
-        let sol_t = solve_scaled_odgp_with_parity_k_ne_0(int_a, int_b, 1, &parity);
-        let sol_x = sol_t.map(|alpha| alpha * dx.clone() + alpha0.clone());
-        for alpha in sol_x {
-            sol_sufficient.push(DOmega::from_droottwo_vector(&alpha, &beta, k));
-        }
+    let t_a = set_a.intersect(&z0, &v);
+    let t_b = set_b.intersect(z0.conj_sq2(), &v_conj_sq2);
+    if t_a.is_none() || t_b.is_none() {
+        return None;
     }
+    let (t_a, t_b) = (t_a.unwrap(), t_b.unwrap());
+
+    let parity = (&beta - &alpha0).mul_by_sqrt2_power_renewing_denomexp(k);
+    let (mut int_a, mut int_b) = (Interval::new(t_a.0, t_a.1), Interval::new(t_b.0, t_b.1));
+    let dt_a = get_dt_x(k, &int_b);
+    let dt_b = get_dt_x(k, &int_a);
+    int_a = int_a.fatten(&dt_a);
+    int_b = int_b.fatten(&dt_b);
+
+    let sol_t = solve_scaled_odgp_with_parity_k_ne_0(int_a, int_b, 1, &parity);
+    let sol_x = sol_t.map(move |alpha| alpha * dx.clone() + alpha0.clone());
+    let sol_xx = sol_x.map(move |alpha| DOmega::from_droottwo_vector(&alpha, &beta, k));
+    Some(sol_xx)
+}
 
-    let op_g_inv = op_g.inv().unwrap();
-
-    sol_sufficient
-        .into_iter()
-        .map(|z| op_g_inv.clone() * z)
-        .filter(|z| set_a.inside(z) && set_b.inside(z.conj_sq2()))
-        .collect()
+fn get_dt_x(k: i64, int_y: &Interval) -> FBig<HalfEven> {
+    let ten = ib_to_bf_prec(IBig::from(10));
+    let shift_k = IBig::from(1) << (k as usize);
+    let width_product = shift_k * int_y.width();
+    let max_val = {
+        if ten > width_product {
+            &ten
+        } else {
+            &width_product
+        }
+    };
+    fb_with_prec(&ten / max_val)
 }