xiph · barrbrain · Oct 20, 2023 · Sep 30, 2023 · Sep 30, 2023 · Oct 2, 2023
diff --git a/benches/mc.rs b/benches/mc.rs
@@ -263,7 +263,7 @@ fn bench_prep_8tap_top_left_lbd(c: &mut Criterion) {
   let w = 640;
   let h = 480;
   let input_plane = new_plane::<u8>(&mut ra, w, h);
-  let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
+  let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);
 
   let (row_frac, col_frac, src) = get_params(
     &input_plane,
@@ -294,7 +294,7 @@ fn bench_prep_8tap_top_lbd(c: &mut Criterion) {
   let w = 640;
   let h = 480;
   let input_plane = new_plane::<u8>(&mut ra, w, h);
-  let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
+  let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);
 
   let (row_frac, col_frac, src) = get_params(
     &input_plane,
@@ -325,7 +325,7 @@ fn bench_prep_8tap_left_lbd(c: &mut Criterion) {
   let w = 640;
   let h = 480;
   let input_plane = new_plane::<u8>(&mut ra, w, h);
-  let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
+  let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);
 
   let (row_frac, col_frac, src) = get_params(
     &input_plane,
@@ -356,7 +356,7 @@ fn bench_prep_8tap_center_lbd(c: &mut Criterion) {
   let w = 640;
   let h = 480;
   let input_plane = new_plane::<u8>(&mut ra, w, h);
-  let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
+  let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);
 
   let (row_frac, col_frac, src) = get_params(
     &input_plane,
@@ -387,7 +387,7 @@ fn bench_prep_8tap_top_left_hbd(c: &mut Criterion) {
   let w = 640;
   let h = 480;
   let input_plane = new_plane::<u16>(&mut ra, w, h);
-  let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
+  let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);
 
   let (row_frac, col_frac, src) = get_params(
     &input_plane,
@@ -418,7 +418,7 @@ fn bench_prep_8tap_top_hbd(c: &mut Criterion) {
   let w = 640;
   let h = 480;
   let input_plane = new_plane::<u16>(&mut ra, w, h);
-  let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
+  let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);
 
   let (row_frac, col_frac, src) = get_params(
     &input_plane,
@@ -449,7 +449,7 @@ fn bench_prep_8tap_left_hbd(c: &mut Criterion) {
   let w = 640;
   let h = 480;
   let input_plane = new_plane::<u16>(&mut ra, w, h);
-  let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
+  let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);
 
   let (row_frac, col_frac, src) = get_params(
     &input_plane,
@@ -480,7 +480,7 @@ fn bench_prep_8tap_center_hbd(c: &mut Criterion) {
   let w = 640;
   let h = 480;
   let input_plane = new_plane::<u16>(&mut ra, w, h);
-  let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
+  let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);
 
   let (row_frac, col_frac, src) = get_params(
     &input_plane,

diff --git a/benches/transform.rs b/benches/transform.rs
@@ -15,6 +15,7 @@ use rav1e::bench::transform;
 use rav1e::bench::transform::{
   forward_transform, get_valid_txfm_types, TxSize,
 };
+use std::mem::MaybeUninit;
 
 fn init_buffers(size: usize) -> (Vec<i32>, Vec<i32>) {
   let mut ra = ChaChaRng::from_seed([0; 32]);
@@ -96,7 +97,7 @@ pub fn bench_forward_transforms(c: &mut Criterion) {
 
     let input: Vec<i16> =
       (0..area).map(|_| rng.gen_range(-255..256)).collect();
-    let mut output = vec![0i16; area];
+    let mut output = vec![MaybeUninit::new(0i16); area];
 
     for &tx_type in get_valid_txfm_types(tx_size) {
       group.bench_function(

diff --git a/src/asm/shared/predict.rs b/src/asm/shared/predict.rs
@@ -39,18 +39,10 @@ mod test {
 
   fn pred_matches_inner<T: Pixel>(cpu: CpuFeatureLevel, bit_depth: usize) {
     let tx_size = TxSize::TX_4X4;
-    // SAFETY: We write to the array below before reading from it.
-    let mut ac: Aligned<[i16; 32 * 32]> = unsafe { Aligned::uninitialized() };
-    for i in 0..ac.data.len() {
-      ac.data[i] = i as i16 - 16 * 32;
-    }
-    // SAFETY: We write to the array below before reading from it.
-    let mut edge_buf: Aligned<[T; 4 * MAX_TX_SIZE + 1]> =
-      unsafe { Aligned::uninitialized() };
-    for i in 0..edge_buf.data.len() {
-      edge_buf.data[i] =
-        T::cast_from(((i ^ 1) + 32).saturating_sub(2 * MAX_TX_SIZE));
-    }
+    let ac: Aligned<[i16; 32 * 32]> = Aligned::from_fn(|i| i as i16 - 16 * 32);
+    let edge_buf: Aligned<[T; 4 * MAX_TX_SIZE + 1]> = Aligned::from_fn(|i| {
+      T::cast_from(((i ^ 1) + 32).saturating_sub(2 * MAX_TX_SIZE))
+    });
 
     let ief_params_all = [
       None,

diff --git a/src/asm/shared/transform/inverse.rs b/src/asm/shared/transform/inverse.rs
@@ -9,6 +9,7 @@
 
 use crate::tiling::PlaneRegionMut;
 use crate::util::*;
+use std::mem::MaybeUninit;
 
 // Note: Input coeffs are mutable since the assembly uses them as a scratchpad
 pub type InvTxfmFunc =
@@ -27,13 +28,13 @@ pub fn call_inverse_func<T: Pixel>(
   let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];
 
   // SAFETY: We write to the array below before reading from it.
-  let mut copied: Aligned<[T::Coeff; 32 * 32]> =
+  let mut copied: Aligned<[MaybeUninit<T::Coeff>; 32 * 32]> =
     unsafe { Aligned::uninitialized() };
 
   // Convert input to 16-bits.
   // TODO: Remove by changing inverse assembly to not overwrite its input
   for (a, b) in copied.data.iter_mut().zip(input) {
-    *a = *b;
+    a.write(*b);
   }
 
   // perform the inverse transform
@@ -57,13 +58,13 @@ pub fn call_inverse_hbd_func<T: Pixel>(
   let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];
 
   // SAFETY: We write to the array below before reading from it.
-  let mut copied: Aligned<[T::Coeff; 32 * 32]> =
+  let mut copied: Aligned<[MaybeUninit<T::Coeff>; 32 * 32]> =
     unsafe { Aligned::uninitialized() };
 
   // Convert input to 16-bits.
   // TODO: Remove by changing inverse assembly to not overwrite its input
   for (a, b) in copied.data.iter_mut().zip(input) {
-    *a = *b;
+    a.write(*b);
   }
 
   // perform the inverse transform
@@ -89,6 +90,7 @@ pub mod test {
   use crate::transform::TxSize::*;
   use crate::transform::*;
   use rand::{random, thread_rng, Rng};
+  use std::mem::MaybeUninit;
 
   pub fn pick_eob<T: Coefficient>(
     coeffs: &mut [T], tx_size: TxSize, tx_type: TxType, sub_h: usize,
@@ -147,21 +149,22 @@ pub mod test {
       let mut src_storage = [T::zero(); 64 * 64];
       let src = &mut src_storage[..tx_size.area()];
       let mut dst = Plane::from_slice(&[T::zero(); 64 * 64], 64);
-      // SAFETY: We write to the array below before reading from it.
-      let mut res_storage: Aligned<[i16; 64 * 64]> =
+      let mut res_storage: Aligned<[MaybeUninit<i16>; 64 * 64]> =
         unsafe { Aligned::uninitialized() };
       let res = &mut res_storage.data[..tx_size.area()];
       // SAFETY: We write to the array below before reading from it.
-      let mut freq_storage: Aligned<[T::Coeff; 64 * 64]> =
+      let mut freq_storage: Aligned<[MaybeUninit<T::Coeff>; 64 * 64]> =
         unsafe { Aligned::uninitialized() };
       let freq = &mut freq_storage.data[..tx_size.area()];
       for ((r, s), d) in
         res.iter_mut().zip(src.iter_mut()).zip(dst.data.iter_mut())
       {
         *s = T::cast_from(random::<u16>() >> (16 - bit_depth));
         *d = T::cast_from(random::<u16>() >> (16 - bit_depth));
-        *r = i16::cast_from(*s) - i16::cast_from(*d);
+        r.write(i16::cast_from(*s) - i16::cast_from(*d));
       }
+      // SAFETY: The loop just initialized res, and all three slices have the same length
+      let res = unsafe { slice_assume_init_mut(res) };
       forward_transform(
         res,
         freq,
@@ -171,6 +174,8 @@ pub mod test {
         bit_depth,
         CpuFeatureLevel::RUST,
       );
+      // SAFETY: forward_transform initialized freq
+      let freq = unsafe { slice_assume_init_mut(freq) };
 
       let eob: usize = pick_eob(freq, tx_size, tx_type, sub_h);
       let mut rust_dst = dst.clone();

diff --git a/src/asm/x86/quantize.rs b/src/asm/x86/quantize.rs
@@ -17,6 +17,7 @@ use crate::cpu_features::CpuFeatureLevel;
 use crate::quantize::*;
 use crate::transform::TxSize;
 use crate::util::*;
+use std::mem::MaybeUninit;
 
 type DequantizeFn = unsafe fn(
   qindex: u8,
@@ -37,21 +38,21 @@ cpu_function_lookup_table!(
 
 #[inline(always)]
 pub fn dequantize<T: Coefficient>(
-  qindex: u8, coeffs: &[T], eob: usize, rcoeffs: &mut [T], tx_size: TxSize,
-  bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8, cpu: CpuFeatureLevel,
+  qindex: u8, coeffs: &[T], eob: usize, rcoeffs: &mut [MaybeUninit<T>],
+  tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
+  cpu: CpuFeatureLevel,
 ) {
-  let call_rust = |rcoeffs: &mut [T]| {
+  let call_rust = |rcoeffs: &mut [MaybeUninit<T>]| {
     crate::quantize::rust::dequantize(
       qindex, coeffs, eob, rcoeffs, tx_size, bit_depth, dc_delta_q,
       ac_delta_q, cpu,
     );
   };
 
   #[cfg(any(feature = "check_asm", test))]
-  let ref_rcoeffs = {
+  let mut ref_rcoeffs = {
     let area = av1_get_coded_tx_size(tx_size).area();
-    let mut copy = vec![T::cast_from(0); area];
-    copy[..].copy_from_slice(&rcoeffs[..area]);
+    let mut copy = vec![MaybeUninit::new(T::cast_from(0)); area];
     call_rust(&mut copy);
     copy
   };
@@ -82,7 +83,9 @@ pub fn dequantize<T: Coefficient>(
   #[cfg(any(feature = "check_asm", test))]
   {
     let area = av1_get_coded_tx_size(tx_size).area();
-    assert_eq!(&rcoeffs[..area], &ref_rcoeffs[..]);
+    let rcoeffs = unsafe { assume_slice_init_mut(&mut rcoeffs[..area]) };
+    let ref_rcoeffs = unsafe { assume_slice_init_mut(&mut ref_rcoeffs[..]) };
+    assert_eq!(rcoeffs, ref_rcoeffs);
   }
 }
 
@@ -157,6 +160,7 @@ mod test {
   use super::*;
   use rand::distributions::{Distribution, Uniform};
   use rand::{thread_rng, Rng};
+  use std::mem::MaybeUninit;
 
   #[test]
   fn dequantize_test() {
@@ -190,7 +194,7 @@ mod test {
 
       for &eob in &eobs {
         let mut qcoeffs = Aligned::new([0i16; 32 * 32]);
-        let mut rcoeffs = Aligned::new([0i16; 32 * 32]);
+        let mut rcoeffs = Aligned::new([MaybeUninit::new(0i16); 32 * 32]);
 
         // Generate quantized coefficients up to the eob
         let between = Uniform::from(-i16::MAX..=i16::MAX);

diff --git a/src/asm/x86/transform/forward.rs b/src/asm/x86/transform/forward.rs
@@ -335,8 +335,8 @@ fn cast_mut<const N: usize, T>(x: &mut [T]) -> &mut [T; N] {
 #[allow(clippy::identity_op, clippy::erasing_op)]
 #[target_feature(enable = "avx2")]
 unsafe fn forward_transform_avx2<T: Coefficient>(
-  input: &[i16], output: &mut [T], stride: usize, tx_size: TxSize,
-  tx_type: TxType, bd: usize,
+  input: &[i16], output: &mut [MaybeUninit<T>], stride: usize,
+  tx_size: TxSize, tx_type: TxType, bd: usize,
 ) {
   // Note when assigning txfm_size_col, we use the txfm_size from the
   // row configuration and vice versa. This is intentionally done to
@@ -510,8 +510,8 @@ unsafe fn forward_transform_avx2<T: Coefficient>(
 ///
 /// - If called with an invalid combination of `tx_size` and `tx_type`
 pub fn forward_transform<T: Coefficient>(
-  input: &[i16], output: &mut [T], stride: usize, tx_size: TxSize,
-  tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
+  input: &[i16], output: &mut [MaybeUninit<T>], stride: usize,
+  tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
 ) {
   assert!(valid_av1_transform(tx_size, tx_type));
   if cpu >= CpuFeatureLevel::AVX2 {
@@ -528,7 +528,9 @@ pub fn forward_transform<T: Coefficient>(
 mod test {
   use crate::cpu_features::*;
   use crate::transform::{forward_transform, get_valid_txfm_types, TxSize};
+  use crate::util::assume_slice_init_mut;
   use rand::Rng;
+  use std::mem::MaybeUninit;
 
   // Ensure that the simd results match the rust code
   #[test]
@@ -560,8 +562,8 @@ mod test {
         (0..area).map(|_| rng.gen_range(-255..256)).collect();
 
       for &tx_type in get_valid_txfm_types(tx_size) {
-        let mut output_ref = vec![0i16; area];
-        let mut output_simd = vec![0i16; area];
+        let mut output_ref = vec![MaybeUninit::new(0i16); area];
+        let mut output_simd = vec![MaybeUninit::new(0i16); area];
 
         println!("Testing combination {:?}, {:?}", tx_size, tx_type);
         forward_transform(
@@ -573,6 +575,7 @@ mod test {
           8,
           CpuFeatureLevel::RUST,
         );
+        let output_ref = unsafe { assume_slice_init_mut(&mut output_ref[..]) };
         forward_transform(
           &input[..],
           &mut output_simd[..],
@@ -582,6 +585,8 @@ mod test {
           8,
           cpu,
         );
+        let output_simd =
+          unsafe { assume_slice_init_mut(&mut output_simd[..]) };
         assert_eq!(output_ref, output_simd)
       }
     }

diff --git a/src/context/block_unit.rs b/src/context/block_unit.rs
@@ -1861,8 +1861,7 @@ impl<'a> ContextWriter<'a> {
     &mut self, eob: usize, tx_size: TxSize, tx_class: TxClass, txs_ctx: usize,
     plane_type: usize, w: &mut W,
   ) {
-    let mut eob_extra: u32 = 0;
-    let eob_pt = Self::get_eob_pos_token(eob, &mut eob_extra);
+    let (eob_pt, eob_extra) = Self::get_eob_pos_token(eob);
     let eob_multi_size: usize = tx_size.area_log2() - 4;
     let eob_multi_ctx: usize = usize::from(tx_class != TX_CLASS_2D);
 

diff --git a/src/context/mod.rs b/src/context/mod.rs
@@ -164,16 +164,18 @@ pub const fn mv_class_base(mv_class: usize) -> u32 {
 pub fn log_in_base_2(n: u32) -> u8 {
   31 - cmp::min(31, n.leading_zeros() as u8)
 }
+
+/// Returns `(mv_class, offset)`
 #[inline(always)]
-pub fn get_mv_class(z: u32, offset: &mut u32) -> usize {
+pub fn get_mv_class(z: u32) -> (usize, u32) {
   let c = if z >= CLASS0_SIZE as u32 * 4096 {
     MV_CLASS_10
   } else {
     log_in_base_2(z >> 3) as usize
   };
 
-  *offset = z - mv_class_base(c);
-  c
+  let offset = z - mv_class_base(c);
+  (c, offset)
 }
 
 impl<'a> ContextWriter<'a> {
@@ -186,10 +188,9 @@ impl<'a> ContextWriter<'a> {
   ) {
     assert!(comp != 0);
     assert!((MV_LOW..=MV_UPP).contains(&comp));
-    let mut offset: u32 = 0;
     let sign: u32 = u32::from(comp < 0);
     let mag: u32 = if sign == 1 { -comp as u32 } else { comp as u32 };
-    let mv_class = get_mv_class(mag - 1, &mut offset);
+    let (mv_class, offset) = get_mv_class(mag - 1);
     let d = offset >> 3; // int mv data
     let fr = (offset >> 1) & 3; // fractional mv data
     let hp = offset & 1; // high precision mv data

diff --git a/src/context/transform_unit.rs b/src/context/transform_unit.rs
@@ -798,21 +798,23 @@ impl<'a> ContextWriter<'a> {
     av1_get_coded_tx_size(tx_size).height_log2()
   }
 
+  /// Returns `(eob_pt, eob_extra)`
+  ///
   /// # Panics
   ///
   /// - If `eob` is prior to the start of the group
   #[inline]
-  pub fn get_eob_pos_token(eob: usize, extra: &mut u32) -> u32 {
+  pub fn get_eob_pos_token(eob: usize) -> (u32, u32) {
     let t = if eob < 33 {
       eob_to_pos_small[eob] as u32
     } else {
       let e = cmp::min((eob - 1) >> 5, 16);
       eob_to_pos_large[e] as u32
     };
     assert!(eob as i32 >= k_eob_group_start[t as usize] as i32);
-    *extra = eob as u32 - k_eob_group_start[t as usize] as u32;
+    let extra = eob as u32 - k_eob_group_start[t as usize] as u32;
 
-    t
+    (t, extra)
   }
 
   pub fn get_nz_mag(levels: &[u8], bhl: usize, tx_class: TxClass) -> usize {