Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move WHT inverse transforms to common lookup table for x86_64 and aarch64 #3274

Merged
merged 3 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 40 additions & 73 deletions src/asm/aarch64/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,14 @@ use crate::transform::*;
use crate::{Pixel, PixelType};

use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: u16,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
if tx_type == TxType::WHT_WHT {
debug_assert!(tx_size == TxSize::TX_4X4);
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
}
}
PixelType::U16 if bd == 10 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
}
}
PixelType::U16 => {}
}
}
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_FNS[cpu.as_index()]
[get_tx_size_idx(tx_size)][get_tx_type_idx(tx_type)]
{
if let Some(func) = INV_TXFM_FNS[cpu.as_index()][tx_size][tx_type] {
return call_inverse_func(
func,
input,
Expand All @@ -53,9 +34,7 @@ pub fn inverse_transform_add<T: Pixel>(
}
}
PixelType::U16 if bd == 10 => {
if let Some(func) = INV_TXFM_HBD_FNS[cpu.as_index()]
[get_tx_size_idx(tx_size)][get_tx_type_idx(tx_type)]
{
if let Some(func) = INV_TXFM_HBD_FNS[cpu.as_index()][tx_size][tx_type] {
return call_inverse_hbd_func(
func,
input,
Expand All @@ -73,32 +52,6 @@ pub fn inverse_transform_add<T: Pixel>(
rust::inverse_transform_add(input, output, eob, tx_size, tx_type, bd, cpu);
}

extern {
fn rav1e_inv_txfm_add_wht_wht_4x4_8bpc_neon(
dst: *mut u8, dst_stride: libc::ptrdiff_t, coeff: *mut i16, eob: i32,
);
fn rav1e_inv_txfm_add_wht_wht_4x4_16bpc_neon(
dst: *mut u16, dst_stride: libc::ptrdiff_t, coeff: *mut i16, eob: i32,
bitdepth_max: i32,
);
}
const INV_TXFM_WHT_FN_NEON: Option<InvTxfmFunc> =
Some(rav1e_inv_txfm_add_wht_wht_4x4_8bpc_neon as _);
const INV_TXFM_WHT_HBD_FN_NEON: Option<InvTxfmHBDFunc> =
Some(rav1e_inv_txfm_add_wht_wht_4x4_16bpc_neon as _);

cpu_function_lookup_table!(
INV_TXFM_WHT_FN: [Option<InvTxfmFunc>],
default: None,
[NEON]
);

cpu_function_lookup_table!(
INV_TXFM_WHT_HBD_FN: [Option<InvTxfmHBDFunc>],
default: None,
[NEON]
);

macro_rules! decl_itx_fns {
// Takes a 2d list of tx types for W and H
([$([$(($ENUM:expr, $TYPE1:ident, $TYPE2:ident)),*]),*], $W:expr, $H:expr,
Expand All @@ -117,11 +70,11 @@ macro_rules! decl_itx_fns {
)*
)*
// Create a lookup table for the tx types declared above
const [<INV_TXFM_FNS_$W _$H _$OPT_UPPER>]: [Option<InvTxfmFunc>; TX_TYPES] = {
let mut out: [Option<InvTxfmFunc>; 16] = [None; 16];
const [<INV_TXFM_FNS_$W _$H _$OPT_UPPER>]: [Option<InvTxfmFunc>; TX_TYPES_PLUS_LL] = {
let mut out: [Option<InvTxfmFunc>; TX_TYPES_PLUS_LL] = [None; TX_TYPES_PLUS_LL];
$(
$(
out[get_tx_type_idx($ENUM)] = Some([<rav1e_inv_txfm_add_$TYPE2 _$TYPE1 _$W x $H _8bpc_$OPT_LOWER>]);
out[$ENUM as usize] = Some([<rav1e_inv_txfm_add_$TYPE2 _$TYPE1 _$W x $H _8bpc_$OPT_LOWER>]);
)*
)*
out
Expand All @@ -148,11 +101,11 @@ macro_rules! decl_itx_hbd_fns {
)*
)*
// Create a lookup table for the tx types declared above
const [<INV_TXFM_HBD_FNS_$W _$H _$OPT_UPPER>]: [Option<InvTxfmHBDFunc>; TX_TYPES] = {
let mut out: [Option<InvTxfmHBDFunc>; 16] = [None; 16];
const [<INV_TXFM_HBD_FNS_$W _$H _$OPT_UPPER>]: [Option<InvTxfmHBDFunc>; TX_TYPES_PLUS_LL] = {
let mut out: [Option<InvTxfmHBDFunc>; TX_TYPES_PLUS_LL] = [None; TX_TYPES_PLUS_LL];
$(
$(
out[get_tx_type_idx($ENUM)] = Some([<rav1e_inv_txfm_add_$TYPE2 _$TYPE1 _$W x $H _16bpc_$OPT_LOWER>]);
out[$ENUM as usize] = Some([<rav1e_inv_txfm_add_$TYPE2 _$TYPE1 _$W x $H _16bpc_$OPT_LOWER>]);
)*
)*
out
Expand All @@ -165,12 +118,13 @@ macro_rules! create_wxh_tables {
// Create a lookup table for each cpu feature
([$([$(($W:expr, $H:expr)),*]),*], $OPT_LOWER:ident, $OPT_UPPER:ident) => {
paste::item! {
const [<INV_TXFM_FNS_$OPT_UPPER>]: [[Option<InvTxfmFunc>; TX_TYPES]; 32] = {
let mut out: [[Option<InvTxfmFunc>; TX_TYPES]; 32] = [[None; TX_TYPES]; 32];
const [<INV_TXFM_FNS_$OPT_UPPER>]: [[Option<InvTxfmFunc>; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL] = {
let mut out: [[Option<InvTxfmFunc>; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL] =
[[None; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL];
// For each dimension, add an entry to the table
$(
$(
out[get_tx_size_idx(TxSize::[<TX_ $W X $H>])] = [<INV_TXFM_FNS_$W _$H _$OPT_UPPER>];
out[TxSize::[<TX_ $W X $H>] as usize] = [<INV_TXFM_FNS_$W _$H _$OPT_UPPER>];
)*
)*
out
Expand All @@ -190,12 +144,13 @@ macro_rules! create_wxh_hbd_tables {
// Create a lookup table for each cpu feature
([$([$(($W:expr, $H:expr)),*]),*], $OPT_LOWER:ident, $OPT_UPPER:ident) => {
paste::item! {
const [<INV_TXFM_HBD_FNS_$OPT_UPPER>]: [[Option<InvTxfmHBDFunc>; TX_TYPES]; 32] = {
let mut out: [[Option<InvTxfmHBDFunc>; TX_TYPES]; 32] = [[None; TX_TYPES]; 32];
const [<INV_TXFM_HBD_FNS_$OPT_UPPER>]: [[Option<InvTxfmHBDFunc>; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL] = {
let mut out: [[Option<InvTxfmHBDFunc>; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL] =
[[None; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL];
// For each dimension, add an entry to the table
$(
$(
out[get_tx_size_idx(TxSize::[<TX_ $W X $H>])] = [<INV_TXFM_HBD_FNS_$W _$H _$OPT_UPPER>];
out[TxSize::[<TX_ $W X $H>] as usize] = [<INV_TXFM_HBD_FNS_$W _$H _$OPT_UPPER>];
)*
)*
out
Expand Down Expand Up @@ -226,7 +181,7 @@ macro_rules! impl_itx_fns {
};

($TYPES64:tt, $DIMS64:tt, $TYPES32:tt, $DIMS32:tt, $TYPES16:tt, $DIMS16:tt,
$TYPES84:tt, $DIMS84:tt, $OPT:tt) => {
$TYPES84:tt, $DIMS84:tt, $TYPES4:tt, $DIMS4:tt, $OPT:tt) => {
// Make 2d list of tx types for each set of dimensions. Each set of
// dimensions uses a superset of the previous set of tx types.
impl_itx_fns!([$TYPES64], $DIMS64, $OPT);
Expand All @@ -235,11 +190,14 @@ macro_rules! impl_itx_fns {
impl_itx_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84], $DIMS84, $OPT
);
impl_itx_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4], $DIMS4, $OPT
);

// Pool all of the dimensions together to create a table for each cpu
// feature level.
create_wxh_tables!(
[$DIMS64, $DIMS32, $DIMS16, $DIMS84], $OPT
[$DIMS64, $DIMS32, $DIMS16, $DIMS84, $DIMS4], $OPT
);
};
}
Expand All @@ -265,20 +223,23 @@ impl_itx_fns!(
(TxType::FLIPADST_FLIPADST, flipadst, flipadst)
],
[(16, 16)],
// 8x, 4x and 16x (minus 16x16)
// 8x, 4x and 16x (minus 16x16 and 4x4)
[
(TxType::V_ADST, adst, identity),
(TxType::H_ADST, identity, adst),
(TxType::V_FLIPADST, flipadst, identity),
(TxType::H_FLIPADST, identity, flipadst)
],
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8), (4, 4)],
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8)],
// 4x4
[(TxType::WHT_WHT, wht, wht)],
[(4, 4)],
[(neon, NEON)]
);

cpu_function_lookup_table!(
INV_TXFM_FNS: [[[Option<InvTxfmFunc>; TX_TYPES]; 32]],
default: [[None; TX_TYPES]; 32],
INV_TXFM_FNS: [[[Option<InvTxfmFunc>; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL]],
default: [[None; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL],
[NEON]
);

Expand All @@ -298,7 +259,7 @@ macro_rules! impl_itx_hbd_fns {
};

($TYPES64:tt, $DIMS64:tt, $TYPES32:tt, $DIMS32:tt, $TYPES16:tt, $DIMS16:tt,
$TYPES84:tt, $DIMS84:tt, $OPT:tt) => {
$TYPES84:tt, $DIMS84:tt, $TYPES4:tt, $DIMS4:tt, $OPT:tt) => {
// Make 2d list of tx types for each set of dimensions. Each set of
// dimensions uses a superset of the previous set of tx types.
impl_itx_hbd_fns!([$TYPES64], $DIMS64, $OPT);
Expand All @@ -307,11 +268,14 @@ macro_rules! impl_itx_hbd_fns {
impl_itx_hbd_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84], $DIMS84, $OPT
);
impl_itx_hbd_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4], $DIMS4, $OPT
);

// Pool all of the dimensions together to create a table for each cpu
// feature level.
create_wxh_hbd_tables!(
[$DIMS64, $DIMS32, $DIMS16, $DIMS84], $OPT
[$DIMS64, $DIMS32, $DIMS16, $DIMS84, $DIMS4], $OPT
);
};
}
Expand All @@ -337,19 +301,22 @@ impl_itx_hbd_fns!(
(TxType::FLIPADST_FLIPADST, flipadst, flipadst)
],
[(16, 16)],
// 8x, 4x and 16x (minus 16x16)
// 8x, 4x and 16x (minus 16x16 and 4x4)
[
(TxType::V_ADST, adst, identity),
(TxType::H_ADST, identity, adst),
(TxType::V_FLIPADST, flipadst, identity),
(TxType::H_FLIPADST, identity, flipadst)
],
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8), (4, 4)],
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8)],
// 4x4
[(TxType::WHT_WHT, wht, wht)],
[(4, 4)],
[(neon, NEON)]
);

cpu_function_lookup_table!(
INV_TXFM_HBD_FNS: [[[Option<InvTxfmHBDFunc>; TX_TYPES]; 32]],
default: [[None; TX_TYPES]; 32],
INV_TXFM_HBD_FNS: [[[Option<InvTxfmHBDFunc>; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL]],
default: [[None; TX_TYPES_PLUS_LL]; TxSize::TX_SIZES_ALL],
[NEON]
);
3 changes: 1 addition & 2 deletions src/asm/shared/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ pub mod test {
let mut eob = 0u16;
let mut exit = 0;

// Wrap WHT_WHT (16) to DCT_DCT (0) scan table
let scan = av1_scan_orders[tx_size as usize][(tx_type as usize) & 15].scan;
let scan = av1_scan_orders[tx_size][tx_type].scan;

for (i, &pos) in scan.iter().enumerate() {
exit = i as u16;
Expand Down
31 changes: 24 additions & 7 deletions src/asm/shared/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,31 @@ pub mod forward;
pub mod inverse;

use crate::transform::*;
use std::ops::Index;

#[inline]
pub const fn get_tx_size_idx(tx_size: TxSize) -> usize {
(tx_size as usize) & 31
impl<T> Index<TxSize> for [T; TxSize::TX_SIZES_ALL] {
type Output = T;
#[inline]
fn index(&self, tx_size: TxSize) -> &Self::Output {
// SAFETY: values of TxSize are < TX_SIZES_ALL
unsafe { self.get_unchecked(tx_size as usize) }
}
}

#[inline]
pub const fn get_tx_type_idx(tx_type: TxType) -> usize {
// TX_TYPES is 2^4 or 16
(tx_type as usize) & (TX_TYPES - 1)
impl<T> Index<TxType> for [T; TX_TYPES] {
type Output = T;
#[inline]
fn index(&self, tx_type: TxType) -> &Self::Output {
// SAFETY: Wraps WHT_WHT to DCT_DCT
unsafe { self.get_unchecked((tx_type as usize) & 15) }
}
}

impl<T> Index<TxType> for [T; TX_TYPES_PLUS_LL] {
type Output = T;
#[inline]
fn index(&self, tx_type: TxType) -> &Self::Output {
// SAFETY: values of TxType are < TX_TYPES_PLUS_LL
unsafe { self.get_unchecked(tx_type as usize) }
}
}
Loading
Loading