Skip to content

Commit

Permalink
Add TxType::WHT_WHT and unify inverse_transform_add
Browse files Browse the repository at this point in the history
  • Loading branch information
barrbrain committed Oct 23, 2023
1 parent 69c042f commit 9d44453
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 91 deletions.
33 changes: 14 additions & 19 deletions src/asm/aarch64/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,26 @@ use crate::{Pixel, PixelType};
use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

#[inline]
pub fn inverse_transform_add_lossless<T: Pixel>(
pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
bd: usize, cpu: CpuFeatureLevel,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
if tx_type == TxType::WHT_WHT {
debug_assert!(tx_size == TxSize::TX_4X4);
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
}
}
}
PixelType::U16 if bd == 10 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
PixelType::U16 if bd == 10 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
}
}
PixelType::U16 => {}
}
PixelType::U16 => {}
}
rust::inverse_transform_add_lossless(input, output, eob, bd, cpu);
}

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_FNS[cpu.as_index()]
Expand Down
31 changes: 13 additions & 18 deletions src/asm/x86/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,25 @@ use crate::{Pixel, PixelType};
use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

#[inline]
pub fn inverse_transform_add_lossless<T: Pixel>(
pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
bd: usize, cpu: CpuFeatureLevel,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
if tx_type == TxType::WHT_WHT {
debug_assert!(tx_size == TxSize::TX_4X4);
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
}
}
}
PixelType::U16 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
PixelType::U16 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
}
}
}
}
rust::inverse_transform_add_lossless(input, output, eob, bd, cpu);
}

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_FNS[cpu.as_index()]
Expand Down
2 changes: 1 addition & 1 deletion src/transform/forward_shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ impl Txfm2DFlipCfg {
use self::TxType::*;
match tx_type {
DCT_DCT | ADST_DCT | DCT_ADST | ADST_ADST | IDTX | V_DCT | H_DCT
| V_ADST | H_ADST => (false, false),
| V_ADST | H_ADST | WHT_WHT => (false, false),
FLIPADST_DCT | FLIPADST_ADST | V_FLIPADST => (true, false),
DCT_FLIPADST | ADST_FLIPADST | H_FLIPADST => (false, true),
FLIPADST_FLIPADST => (true, true),
Expand Down
63 changes: 14 additions & 49 deletions src/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use super::TxType;
/// # Panics
///
/// - If `input` or `output` have fewer than 4 items.
pub fn av1_iwht4(input: &[i32], output: &mut [i32]) {
pub fn av1_iwht4(input: &[i32], output: &mut [i32], _range: usize) {
assert!(input.len() >= 4);
assert!(output.len() >= 4);

Expand Down Expand Up @@ -1591,7 +1591,7 @@ fn av1_idct64(input: &[i32], output: &mut [i32], range: usize) {

type InvTxfmFn = fn(input: &[i32], output: &mut [i32], range: usize);

static INV_TXFM_FNS: [[InvTxfmFn; 5]; 4] = [
static INV_TXFM_FNS: [[InvTxfmFn; 5]; 5] = [
[av1_idct4, av1_idct8, av1_idct16, av1_idct32, av1_idct64],
[
av1_iadst4,
Expand All @@ -1614,6 +1614,13 @@ static INV_TXFM_FNS: [[InvTxfmFn; 5]; 4] = [
av1_iidentity32,
|_, _, _| unimplemented!(),
],
[
av1_iwht4,
|_, _, _| unimplemented!(),
|_, _, _| unimplemented!(),
|_, _, _| unimplemented!(),
|_, _, _| unimplemented!(),
],
];

pub(crate) mod rust {
Expand All @@ -1624,52 +1631,6 @@ pub(crate) mod rust {
use simd_helpers::cold_for_target_arch;
use std::cmp;

#[cold_for_target_arch("x86_64", "aarch64")]
pub fn inverse_transform_add_lossless<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, _eob: usize,
_bd: usize, _cpu: CpuFeatureLevel,
) {
// <https://aomediacodec.github.io/av1-spec/#2d-inverse-transform-process>
let input: &[T::Coeff] = &input[..4 * 4];
let mut buffer = [0i32; 4 * 4];

// perform inv txfm on every row
for (r, buffer_slice) in buffer.chunks_exact_mut(4).enumerate() {
let mut temp_in: [i32; 4] = [0; 4];
for (val, transposed) in input[r..]
.iter()
.map(|a| i32::cast_from(*a))
.step_by(4)
.zip(temp_in.iter_mut())
{
*transposed = val >> 2;
}
av1_iwht4(&temp_in, buffer_slice);
}

// perform inv txfm on every col
for c in 0..4 {
let mut temp_in: [i32; 4] = [0; 4];
let mut temp_out: [i32; 4] = [0; 4];
for (val, transposed) in buffer[c..]
.iter()
.map(|a| i32::cast_from(*a))
.step_by(4)
.zip(temp_in.iter_mut())
{
*transposed = val;
}
av1_iwht4(&temp_in, &mut temp_out);
for (temp, out) in temp_out
.iter()
.zip(output.rows_iter_mut().map(|row| &mut row[c]).take(4))
{
let v = i32::cast_from(*out) + *temp;
*out = T::cast_from(v);
}
}
}

#[cold_for_target_arch("x86_64", "aarch64")]
pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, _eob: usize,
Expand All @@ -1686,6 +1647,7 @@ pub(crate) mod rust {
let mut buffer = vec![0i32; width * height].into_boxed_slice();
let rect_type = get_rect_tx_log_ratio(width, height);
let tx_types_1d = get_1d_tx_types(tx_type);
let lossless = tx_type == TxType::WHT_WHT;

// perform inv txfm on every row
let range = bd + 8;
Expand All @@ -1705,6 +1667,8 @@ pub(crate) mod rust {
{
let val = if rect_type.abs() == 1 {
round_shift(raw * INV_SQRT2, SQRT2_BITS)
} else if lossless {
raw >> 2
} else {
raw
};
Expand Down Expand Up @@ -1733,7 +1697,8 @@ pub(crate) mod rust {
.zip(output.rows_iter_mut().map(|row| &mut row[c]).take(height))
{
let v: i32 = (*out).as_();
let v = clamp(v + round_shift(*temp, 4), 0, (1 << bd) - 1);
let r = if lossless { *temp } else { round_shift(*temp, 4) };
let v = clamp(v + r, 0, (1 << bd) - 1);
*out = T::cast_from(v);
}
}
Expand Down
21 changes: 17 additions & 4 deletions src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ pub mod forward_shared;
pub use self::forward::forward_transform;
pub use self::forward::forward_transform_lossless;
pub use self::inverse::inverse_transform_add;
pub use self::inverse::inverse_transform_add_lossless;

use crate::context::MI_SIZE_LOG2;
use crate::partition::{BlockSize, BlockSize::*};
Expand Down Expand Up @@ -52,6 +51,7 @@ pub mod consts {
}

pub const TX_TYPES: usize = 16;
pub const TX_TYPES_PLUS_LL: usize = 17;

#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
pub enum TxType {
Expand All @@ -71,6 +71,7 @@ pub enum TxType {
H_ADST = 13,
V_FLIPADST = 14,
H_FLIPADST = 15,
WHT_WHT = 16,
}

impl TxType {
Expand Down Expand Up @@ -337,6 +338,7 @@ enum TxType1D {
ADST,
FLIPADST,
IDTX,
WHT,
}

const fn get_1d_tx_types(tx_type: TxType) -> (TxType1D, TxType1D) {
Expand All @@ -357,10 +359,11 @@ const fn get_1d_tx_types(tx_type: TxType) -> (TxType1D, TxType1D) {
TxType::H_ADST => (TxType1D::IDTX, TxType1D::ADST),
TxType::V_FLIPADST => (TxType1D::FLIPADST, TxType1D::IDTX),
TxType::H_FLIPADST => (TxType1D::IDTX, TxType1D::FLIPADST),
TxType::WHT_WHT => (TxType1D::WHT, TxType1D::WHT),
}
}

const VTX_TAB: [TxType1D; TX_TYPES] = [
const VTX_TAB: [TxType1D; TX_TYPES_PLUS_LL] = [
TxType1D::DCT,
TxType1D::ADST,
TxType1D::DCT,
Expand All @@ -377,9 +380,10 @@ const VTX_TAB: [TxType1D; TX_TYPES] = [
TxType1D::IDTX,
TxType1D::FLIPADST,
TxType1D::IDTX,
TxType1D::WHT,
];

const HTX_TAB: [TxType1D; TX_TYPES] = [
const HTX_TAB: [TxType1D; TX_TYPES_PLUS_LL] = [
TxType1D::DCT,
TxType1D::DCT,
TxType1D::ADST,
Expand All @@ -396,6 +400,7 @@ const HTX_TAB: [TxType1D; TX_TYPES] = [
TxType1D::ADST,
TxType1D::IDTX,
TxType1D::FLIPADST,
TxType1D::WHT,
];

#[inline]
Expand Down Expand Up @@ -514,7 +519,15 @@ mod test {
*r = i16::cast_from(*s) - i16::cast_from(*d);
}
forward_transform_lossless(res, freq, 4, cpu);
inverse_transform_add_lossless(freq, &mut dst.as_region_mut(), 15, 8, cpu);
inverse_transform_add(
freq,
&mut dst.as_region_mut(),
15,
TX_4X4,
WHT_WHT,
8,
cpu,
);

assert_eq!(&src[..], &dst.data[..]);
}
Expand Down

0 comments on commit 9d44453

Please sign in to comment.