From 9d44453d4d80708efd3102e45fab9703e468ea04 Mon Sep 17 00:00:00 2001 From: David Michael Barr Date: Thu, 19 Oct 2023 23:00:46 +0900 Subject: [PATCH] Add TxType::WHT_WHT and unify inverse_transform_add --- src/asm/aarch64/transform/inverse.rs | 33 +++++++-------- src/asm/x86/transform/inverse.rs | 31 ++++++-------- src/transform/forward_shared.rs | 2 +- src/transform/inverse.rs | 63 +++++++--------------------- src/transform/mod.rs | 21 ++++++++-- 5 files changed, 59 insertions(+), 91 deletions(-) diff --git a/src/asm/aarch64/transform/inverse.rs b/src/asm/aarch64/transform/inverse.rs index b889b86365..cba27385ae 100644 --- a/src/asm/aarch64/transform/inverse.rs +++ b/src/asm/aarch64/transform/inverse.rs @@ -16,31 +16,26 @@ use crate::{Pixel, PixelType}; use crate::asm::shared::transform::inverse::*; use crate::asm::shared::transform::*; -#[inline] -pub fn inverse_transform_add_lossless( +pub fn inverse_transform_add( input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize, - bd: usize, cpu: CpuFeatureLevel, + tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel, ) { - match T::type_enum() { - PixelType::U8 => { - if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] { - return call_inverse_func(func, input, output, eob, 4, 4, bd); + if tx_type == TxType::WHT_WHT { + debug_assert!(tx_size == TxSize::TX_4X4); + match T::type_enum() { + PixelType::U8 => { + if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] { + return call_inverse_func(func, input, output, eob, 4, 4, bd); + } } - } - PixelType::U16 if bd == 10 => { - if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] { - return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd); + PixelType::U16 if bd == 10 => { + if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] { + return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd); + } } + PixelType::U16 => {} } - PixelType::U16 => {} } - rust::inverse_transform_add_lossless(input, output, eob, bd, cpu); -} - -pub fn inverse_transform_add( - input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize, - tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel, -) { match T::type_enum() { PixelType::U8 => { if let Some(func) = INV_TXFM_FNS[cpu.as_index()] diff --git a/src/asm/x86/transform/inverse.rs b/src/asm/x86/transform/inverse.rs index 007ba3e651..df99f4b4b3 100644 --- a/src/asm/x86/transform/inverse.rs +++ b/src/asm/x86/transform/inverse.rs @@ -16,30 +16,25 @@ use crate::{Pixel, PixelType}; use crate::asm::shared::transform::inverse::*; use crate::asm::shared::transform::*; -#[inline] -pub fn inverse_transform_add_lossless( +pub fn inverse_transform_add( input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize, - bd: usize, cpu: CpuFeatureLevel, + tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel, ) { - match T::type_enum() { - PixelType::U8 => { - if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] { - return call_inverse_func(func, input, output, eob, 4, 4, bd); + if tx_type == TxType::WHT_WHT { + debug_assert!(tx_size == TxSize::TX_4X4); + match T::type_enum() { + PixelType::U8 => { + if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] { + return call_inverse_func(func, input, output, eob, 4, 4, bd); + } } - } - PixelType::U16 => { - if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] { - return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd); + PixelType::U16 => { + if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] { + return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd); + } } } } - rust::inverse_transform_add_lossless(input, output, eob, bd, cpu); -} - -pub fn inverse_transform_add( - input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize, - tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel, -) { match T::type_enum() { PixelType::U8 => { if let Some(func) = INV_TXFM_FNS[cpu.as_index()] diff --git a/src/transform/forward_shared.rs b/src/transform/forward_shared.rs index 232af22866..8f55551cea 100644 --- a/src/transform/forward_shared.rs +++ b/src/transform/forward_shared.rs @@ -145,7 +145,7 @@ impl Txfm2DFlipCfg { use self::TxType::*; match tx_type { DCT_DCT | ADST_DCT | DCT_ADST | ADST_ADST | IDTX | V_DCT | H_DCT - | V_ADST | H_ADST => (false, false), + | V_ADST | H_ADST | WHT_WHT => (false, false), FLIPADST_DCT | FLIPADST_ADST | V_FLIPADST => (true, false), DCT_FLIPADST | ADST_FLIPADST | H_FLIPADST => (false, true), FLIPADST_FLIPADST => (true, true), diff --git a/src/transform/inverse.rs b/src/transform/inverse.rs index e4ca4bbcde..870e517f37 100644 --- a/src/transform/inverse.rs +++ b/src/transform/inverse.rs @@ -33,7 +33,7 @@ use super::TxType; /// # Panics /// /// - If `input` or `output` have fewer than 4 items. -pub fn av1_iwht4(input: &[i32], output: &mut [i32]) { +pub fn av1_iwht4(input: &[i32], output: &mut [i32], _range: usize) { assert!(input.len() >= 4); assert!(output.len() >= 4); @@ -1591,7 +1591,7 @@ fn av1_idct64(input: &[i32], output: &mut [i32], range: usize) { type InvTxfmFn = fn(input: &[i32], output: &mut [i32], range: usize); -static INV_TXFM_FNS: [[InvTxfmFn; 5]; 4] = [ +static INV_TXFM_FNS: [[InvTxfmFn; 5]; 5] = [ [av1_idct4, av1_idct8, av1_idct16, av1_idct32, av1_idct64], [ av1_iadst4, @@ -1614,6 +1614,13 @@ static INV_TXFM_FNS: [[InvTxfmFn; 5]; 4] = [ av1_iidentity32, |_, _, _| unimplemented!(), ], + [ + av1_iwht4, + |_, _, _| unimplemented!(), + |_, _, _| unimplemented!(), + |_, _, _| unimplemented!(), + |_, _, _| unimplemented!(), + ], ]; pub(crate) mod rust { @@ -1624,52 +1631,6 @@ pub(crate) mod rust { use simd_helpers::cold_for_target_arch; use std::cmp; - #[cold_for_target_arch("x86_64", "aarch64")] - pub fn inverse_transform_add_lossless( - input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, _eob: usize, - _bd: usize, _cpu: CpuFeatureLevel, - ) { - // - let input: &[T::Coeff] = &input[..4 * 4]; - let mut buffer = [0i32; 4 * 4]; - - // perform inv txfm on every row - for (r, buffer_slice) in buffer.chunks_exact_mut(4).enumerate() { - let mut temp_in: [i32; 4] = [0; 4]; - for (val, transposed) in input[r..] - .iter() - .map(|a| i32::cast_from(*a)) - .step_by(4) - .zip(temp_in.iter_mut()) - { - *transposed = val >> 2; - } - av1_iwht4(&temp_in, buffer_slice); - } - - // perform inv txfm on every col - for c in 0..4 { - let mut temp_in: [i32; 4] = [0; 4]; - let mut temp_out: [i32; 4] = [0; 4]; - for (val, transposed) in buffer[c..] - .iter() - .map(|a| i32::cast_from(*a)) - .step_by(4) - .zip(temp_in.iter_mut()) - { - *transposed = val; - } - av1_iwht4(&temp_in, &mut temp_out); - for (temp, out) in temp_out - .iter() - .zip(output.rows_iter_mut().map(|row| &mut row[c]).take(4)) - { - let v = i32::cast_from(*out) + *temp; - *out = T::cast_from(v); - } - } - } - #[cold_for_target_arch("x86_64", "aarch64")] pub fn inverse_transform_add( input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, _eob: usize, @@ -1686,6 +1647,7 @@ pub(crate) mod rust { let mut buffer = vec![0i32; width * height].into_boxed_slice(); let rect_type = get_rect_tx_log_ratio(width, height); let tx_types_1d = get_1d_tx_types(tx_type); + let lossless = tx_type == TxType::WHT_WHT; // perform inv txfm on every row let range = bd + 8; @@ -1705,6 +1667,8 @@ pub(crate) mod rust { { let val = if rect_type.abs() == 1 { round_shift(raw * INV_SQRT2, SQRT2_BITS) + } else if lossless { + raw >> 2 } else { raw }; @@ -1733,7 +1697,8 @@ pub(crate) mod rust { .zip(output.rows_iter_mut().map(|row| &mut row[c]).take(height)) { let v: i32 = (*out).as_(); - let v = clamp(v + round_shift(*temp, 4), 0, (1 << bd) - 1); + let r = if lossless { *temp } else { round_shift(*temp, 4) }; + let v = clamp(v + r, 0, (1 << bd) - 1); *out = T::cast_from(v); } } diff --git a/src/transform/mod.rs b/src/transform/mod.rs index fda9c1d3d3..be55e69598 100644 --- a/src/transform/mod.rs +++ b/src/transform/mod.rs @@ -16,7 +16,6 @@ pub mod forward_shared; pub use self::forward::forward_transform; pub use self::forward::forward_transform_lossless; pub use self::inverse::inverse_transform_add; -pub use self::inverse::inverse_transform_add_lossless; use crate::context::MI_SIZE_LOG2; use crate::partition::{BlockSize, BlockSize::*}; @@ -52,6 +51,7 @@ pub mod consts { } pub const TX_TYPES: usize = 16; +pub const TX_TYPES_PLUS_LL: usize = 17; #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord)] pub enum TxType { @@ -71,6 +71,7 @@ pub enum TxType { H_ADST = 13, V_FLIPADST = 14, H_FLIPADST = 15, + WHT_WHT = 16, } impl TxType { @@ -337,6 +338,7 @@ enum TxType1D { ADST, FLIPADST, IDTX, + WHT, } const fn get_1d_tx_types(tx_type: TxType) -> (TxType1D, TxType1D) { @@ -357,10 +359,11 @@ const fn get_1d_tx_types(tx_type: TxType) -> (TxType1D, TxType1D) { TxType::H_ADST => (TxType1D::IDTX, TxType1D::ADST), TxType::V_FLIPADST => (TxType1D::FLIPADST, TxType1D::IDTX), TxType::H_FLIPADST => (TxType1D::IDTX, TxType1D::FLIPADST), + TxType::WHT_WHT => (TxType1D::WHT, TxType1D::WHT), } } -const VTX_TAB: [TxType1D; TX_TYPES] = [ +const VTX_TAB: [TxType1D; TX_TYPES_PLUS_LL] = [ TxType1D::DCT, TxType1D::ADST, TxType1D::DCT, @@ -377,9 +380,10 @@ const VTX_TAB: [TxType1D; TX_TYPES] = [ TxType1D::IDTX, TxType1D::FLIPADST, TxType1D::IDTX, + TxType1D::WHT, ]; -const HTX_TAB: [TxType1D; TX_TYPES] = [ +const HTX_TAB: [TxType1D; TX_TYPES_PLUS_LL] = [ TxType1D::DCT, TxType1D::DCT, TxType1D::ADST, @@ -396,6 +400,7 @@ const HTX_TAB: [TxType1D; TX_TYPES] = [ TxType1D::ADST, TxType1D::IDTX, TxType1D::FLIPADST, + TxType1D::WHT, ]; #[inline] @@ -514,7 +519,15 @@ mod test { *r = i16::cast_from(*s) - i16::cast_from(*d); } forward_transform_lossless(res, freq, 4, cpu); - inverse_transform_add_lossless(freq, &mut dst.as_region_mut(), 15, 8, cpu); + inverse_transform_add( + freq, + &mut dst.as_region_mut(), + 15, + TX_4X4, + WHT_WHT, + 8, + cpu, + ); assert_eq!(&src[..], &dst.data[..]); }