From 82fdf7ffd09eb252c9580922f3093d4b9e391aa6 Mon Sep 17 00:00:00 2001 From: David Michael Barr Date: Sat, 21 Oct 2023 00:04:47 +0900 Subject: [PATCH] arm64: Move WHT_WHT transforms to common lookup table --- src/asm/aarch64/transform/inverse.rs | 70 ++++++++-------------------- 1 file changed, 20 insertions(+), 50 deletions(-) diff --git a/src/asm/aarch64/transform/inverse.rs b/src/asm/aarch64/transform/inverse.rs index e94d0b3c43..9c0b500dfa 100644 --- a/src/asm/aarch64/transform/inverse.rs +++ b/src/asm/aarch64/transform/inverse.rs @@ -19,22 +19,6 @@ pub fn inverse_transform_add( input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize, tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel, ) { - if tx_type == TxType::WHT_WHT { - debug_assert!(tx_size == TxSize::TX_4X4); - match T::type_enum() { - PixelType::U8 => { - if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] { - return call_inverse_func(func, input, output, eob, 4, 4, bd); - } - } - PixelType::U16 if bd == 10 => { - if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] { - return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd); - } - } - PixelType::U16 => {} - } - } match T::type_enum() { PixelType::U8 => { if let Some(func) = INV_TXFM_FNS[cpu.as_index()][tx_size][tx_type] { @@ -68,32 +52,6 @@ pub fn inverse_transform_add( rust::inverse_transform_add(input, output, eob, tx_size, tx_type, bd, cpu); } -extern { - fn rav1e_inv_txfm_add_wht_wht_4x4_8bpc_neon( - dst: *mut u8, dst_stride: libc::ptrdiff_t, coeff: *mut i16, eob: i32, - ); - fn rav1e_inv_txfm_add_wht_wht_4x4_16bpc_neon( - dst: *mut u16, dst_stride: libc::ptrdiff_t, coeff: *mut i16, eob: i32, - bitdepth_max: i32, - ); -} -const INV_TXFM_WHT_FN_NEON: Option = - Some(rav1e_inv_txfm_add_wht_wht_4x4_8bpc_neon as _); -const INV_TXFM_WHT_HBD_FN_NEON: Option = - Some(rav1e_inv_txfm_add_wht_wht_4x4_16bpc_neon as _); - -cpu_function_lookup_table!( - INV_TXFM_WHT_FN: [Option], - default: None, - [NEON] -); - -cpu_function_lookup_table!( - INV_TXFM_WHT_HBD_FN: [Option], - default: None, - [NEON] -); - macro_rules! decl_itx_fns { // Takes a 2d list of tx types for W and H ([$([$(($ENUM:expr, $TYPE1:ident, $TYPE2:ident)),*]),*], $W:expr, $H:expr, @@ -223,7 +181,7 @@ macro_rules! impl_itx_fns { }; ($TYPES64:tt, $DIMS64:tt, $TYPES32:tt, $DIMS32:tt, $TYPES16:tt, $DIMS16:tt, - $TYPES84:tt, $DIMS84:tt, $OPT:tt) => { + $TYPES84:tt, $DIMS84:tt, $TYPES4:tt, $DIMS4:tt, $OPT:tt) => { // Make 2d list of tx types for each set of dimensions. Each set of // dimensions uses a superset of the previous set of tx types. impl_itx_fns!([$TYPES64], $DIMS64, $OPT); @@ -232,11 +190,14 @@ macro_rules! impl_itx_fns { impl_itx_fns!( [$TYPES64, $TYPES32, $TYPES16, $TYPES84], $DIMS84, $OPT ); + impl_itx_fns!( + [$TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4], $DIMS4, $OPT + ); // Pool all of the dimensions together to create a table for each cpu // feature level. create_wxh_tables!( - [$DIMS64, $DIMS32, $DIMS16, $DIMS84], $OPT + [$DIMS64, $DIMS32, $DIMS16, $DIMS84, $DIMS4], $OPT ); }; } @@ -262,14 +223,17 @@ impl_itx_fns!( (TxType::FLIPADST_FLIPADST, flipadst, flipadst) ], [(16, 16)], - // 8x, 4x and 16x (minus 16x16) + // 8x, 4x and 16x (minus 16x16 and 4x4) [ (TxType::V_ADST, adst, identity), (TxType::H_ADST, identity, adst), (TxType::V_FLIPADST, flipadst, identity), (TxType::H_FLIPADST, identity, flipadst) ], - [(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8), (4, 4)], + [(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8)], + // 4x4 + [(TxType::WHT_WHT, wht, wht)], + [(4, 4)], [(neon, NEON)] ); @@ -295,7 +259,7 @@ macro_rules! impl_itx_hbd_fns { }; ($TYPES64:tt, $DIMS64:tt, $TYPES32:tt, $DIMS32:tt, $TYPES16:tt, $DIMS16:tt, - $TYPES84:tt, $DIMS84:tt, $OPT:tt) => { + $TYPES84:tt, $DIMS84:tt, $TYPES4:tt, $DIMS4:tt, $OPT:tt) => { // Make 2d list of tx types for each set of dimensions. Each set of // dimensions uses a superset of the previous set of tx types. impl_itx_hbd_fns!([$TYPES64], $DIMS64, $OPT); @@ -304,11 +268,14 @@ macro_rules! impl_itx_hbd_fns { impl_itx_hbd_fns!( [$TYPES64, $TYPES32, $TYPES16, $TYPES84], $DIMS84, $OPT ); + impl_itx_hbd_fns!( + [$TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4], $DIMS4, $OPT + ); // Pool all of the dimensions together to create a table for each cpu // feature level. create_wxh_hbd_tables!( - [$DIMS64, $DIMS32, $DIMS16, $DIMS84], $OPT + [$DIMS64, $DIMS32, $DIMS16, $DIMS84, $DIMS4], $OPT ); }; } @@ -334,14 +301,17 @@ impl_itx_hbd_fns!( (TxType::FLIPADST_FLIPADST, flipadst, flipadst) ], [(16, 16)], - // 8x, 4x and 16x (minus 16x16) + // 8x, 4x and 16x (minus 16x16 and 4x4) [ (TxType::V_ADST, adst, identity), (TxType::H_ADST, identity, adst), (TxType::V_FLIPADST, flipadst, identity), (TxType::H_FLIPADST, identity, flipadst) ], - [(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8), (4, 4)], + [(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8)], + // 4x4 + [(TxType::WHT_WHT, wht, wht)], + [(4, 4)], [(neon, NEON)] );