@@ -19,21 +19,6 @@ pub fn inverse_transform_add<T: Pixel>(
1919 input : & [ T :: Coeff ] , output : & mut PlaneRegionMut < ' _ , T > , eob : u16 ,
2020 tx_size : TxSize , tx_type : TxType , bd : usize , cpu : CpuFeatureLevel ,
2121) {
22- if tx_type == TxType :: WHT_WHT {
23- debug_assert ! ( tx_size == TxSize :: TX_4X4 ) ;
24- match T :: type_enum ( ) {
25- PixelType :: U8 => {
26- if let Some ( func) = INV_TXFM_WHT_FN [ cpu. as_index ( ) ] {
27- return call_inverse_func ( func, input, output, eob, 4 , 4 , bd) ;
28- }
29- }
30- PixelType :: U16 => {
31- if let Some ( func) = INV_TXFM_WHT_HBD_FN [ cpu. as_index ( ) ] {
32- return call_inverse_hbd_func ( func, input, output, eob, 4 , 4 , bd) ;
33- }
34- }
35- }
36- }
3722 match T :: type_enum ( ) {
3823 PixelType :: U8 => {
3924 if let Some ( func) = INV_TXFM_FNS [ cpu. as_index ( ) ] [ tx_size] [ tx_type] {
@@ -81,44 +66,6 @@ pub fn inverse_transform_add<T: Pixel>(
8166 rust:: inverse_transform_add ( input, output, eob, tx_size, tx_type, bd, cpu) ;
8267}
8368
84- extern {
85- fn rav1e_inv_txfm_add_wht_wht_4x4_8bpc_avx2 (
86- dst : * mut u8 , dst_stride : libc:: ptrdiff_t , coeff : * mut i16 , eob : i32 ,
87- ) ;
88- fn rav1e_inv_txfm_add_wht_wht_4x4_8bpc_sse2 (
89- dst : * mut u8 , dst_stride : libc:: ptrdiff_t , coeff : * mut i16 , eob : i32 ,
90- ) ;
91- fn rav1e_inv_txfm_add_wht_wht_4x4_16bpc_avx2 (
92- dst : * mut u16 , dst_stride : libc:: ptrdiff_t , coeff : * mut i16 , eob : i32 ,
93- bitdepth_max : i32 ,
94- ) ;
95- fn rav1e_inv_txfm_add_wht_wht_4x4_16bpc_sse2 (
96- dst : * mut u16 , dst_stride : libc:: ptrdiff_t , coeff : * mut i16 , eob : i32 ,
97- bitdepth_max : i32 ,
98- ) ;
99- }
100-
101- const INV_TXFM_WHT_FN_AVX2 : Option < InvTxfmFunc > =
102- Some ( rav1e_inv_txfm_add_wht_wht_4x4_8bpc_avx2 as _ ) ;
103- const INV_TXFM_WHT_FN_SSE2 : Option < InvTxfmFunc > =
104- Some ( rav1e_inv_txfm_add_wht_wht_4x4_8bpc_sse2 as _ ) ;
105- const INV_TXFM_WHT_HBD_FN_AVX2 : Option < InvTxfmHBDFunc > =
106- Some ( rav1e_inv_txfm_add_wht_wht_4x4_16bpc_avx2 as _ ) ;
107- const INV_TXFM_WHT_HBD_FN_SSE2 : Option < InvTxfmHBDFunc > =
108- Some ( rav1e_inv_txfm_add_wht_wht_4x4_16bpc_sse2 as _ ) ;
109-
110- cpu_function_lookup_table ! (
111- INV_TXFM_WHT_FN : [ Option <InvTxfmFunc >] ,
112- default : None ,
113- [ SSE2 , AVX2 ]
114- ) ;
115-
116- cpu_function_lookup_table ! (
117- INV_TXFM_WHT_HBD_FN : [ Option <InvTxfmHBDFunc >] ,
118- default : None ,
119- [ SSE2 , AVX2 ]
120- ) ;
121-
12269macro_rules! decl_itx_fns {
12370 // Takes a 2d list of tx types for W and H
12471 ( [ $( [ $( ( $ENUM: expr, $TYPE1: ident, $TYPE2: ident) ) ,* ] ) ,* ] , $W: expr, $H: expr,
@@ -249,7 +196,7 @@ macro_rules! impl_itx_fns {
249196 } ;
250197
251198 ( $TYPES64: tt, $DIMS64: tt, $TYPES32: tt, $DIMS32: tt, $TYPES16: tt, $DIMS16: tt,
252- $TYPES84: tt, $DIMS84: tt, $OPT: tt) => {
199+ $TYPES84: tt, $DIMS84: tt, $TYPES4 : tt , $DIMS4 : tt , $ OPT: tt) => {
253200 // Make 2d list of tx types for each set of dimensions. Each set of
254201 // dimensions uses a superset of the previous set of tx types.
255202 impl_itx_fns!( [ $TYPES64] , $DIMS64, $OPT) ;
@@ -258,15 +205,53 @@ macro_rules! impl_itx_fns {
258205 impl_itx_fns!(
259206 [ $TYPES64, $TYPES32, $TYPES16, $TYPES84] , $DIMS84, $OPT
260207 ) ;
208+ impl_itx_fns!(
209+ [ $TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4] , $DIMS4, $OPT
210+ ) ;
261211
262212 // Pool all of the dimensions together to create a table for each cpu
263213 // feature level.
264214 create_wxh_tables!(
265- [ $DIMS64, $DIMS32, $DIMS16, $DIMS84] , $OPT
215+ [ $DIMS64, $DIMS32, $DIMS16, $DIMS84, $DIMS4 ] , $OPT
266216 ) ;
267217 } ;
268218}
269219
220+ impl_itx_fns ! (
221+ // 64x
222+ [ ( TxType :: DCT_DCT , dct, dct) ] ,
223+ [ ( 64 , 64 ) , ( 64 , 32 ) , ( 32 , 64 ) , ( 16 , 64 ) , ( 64 , 16 ) ] ,
224+ // 32x
225+ [ ( TxType :: IDTX , identity, identity) ] ,
226+ [ ( 32 , 32 ) , ( 32 , 16 ) , ( 16 , 32 ) , ( 32 , 8 ) , ( 8 , 32 ) ] ,
227+ // 16x16
228+ [
229+ ( TxType :: DCT_ADST , dct, adst) ,
230+ ( TxType :: ADST_DCT , adst, dct) ,
231+ ( TxType :: DCT_FLIPADST , dct, flipadst) ,
232+ ( TxType :: FLIPADST_DCT , flipadst, dct) ,
233+ ( TxType :: V_DCT , dct, identity) ,
234+ ( TxType :: H_DCT , identity, dct) ,
235+ ( TxType :: ADST_ADST , adst, adst) ,
236+ ( TxType :: ADST_FLIPADST , adst, flipadst) ,
237+ ( TxType :: FLIPADST_ADST , flipadst, adst) ,
238+ ( TxType :: FLIPADST_FLIPADST , flipadst, flipadst)
239+ ] ,
240+ [ ( 16 , 16 ) ] ,
241+ // 8x, 4x and 16x (minus 16x16 and 4x4)
242+ [
243+ ( TxType :: V_ADST , adst, identity) ,
244+ ( TxType :: H_ADST , identity, adst) ,
245+ ( TxType :: V_FLIPADST , flipadst, identity) ,
246+ ( TxType :: H_FLIPADST , identity, flipadst)
247+ ] ,
248+ [ ( 16 , 8 ) , ( 8 , 16 ) , ( 16 , 4 ) , ( 4 , 16 ) , ( 8 , 8 ) , ( 8 , 4 ) , ( 4 , 8 ) ] ,
249+ // 4x4
250+ [ ( TxType :: WHT_WHT , wht, wht) ] ,
251+ [ ( 4 , 4 ) ] ,
252+ [ ( avx2, AVX2 ) ]
253+ ) ;
254+
270255impl_itx_fns ! (
271256 // 64x
272257 [ ( TxType :: DCT_DCT , dct, dct) ] ,
@@ -296,13 +281,35 @@ impl_itx_fns!(
296281 ( TxType :: H_FLIPADST , identity, flipadst)
297282 ] ,
298283 [ ( 16 , 8 ) , ( 8 , 16 ) , ( 16 , 4 ) , ( 4 , 16 ) , ( 8 , 8 ) , ( 8 , 4 ) , ( 4 , 8 ) , ( 4 , 4 ) ] ,
299- [ ( avx512icl, AVX512ICL ) , ( avx2, AVX2 ) , ( ssse3, SSSE3 ) ]
284+ // 4x4
285+ [ ] ,
286+ [ ] ,
287+ [ ( avx512icl, AVX512ICL ) , ( ssse3, SSSE3 ) ]
288+ ) ;
289+
290+ impl_itx_fns ! (
291+ // 64x
292+ [ ] ,
293+ [ ] ,
294+ // 32x
295+ [ ] ,
296+ [ ] ,
297+ // 16x16
298+ [ ] ,
299+ [ ] ,
300+ // 8x, 4x and 16x (minus 16x16 and 4x4)
301+ [ ] ,
302+ [ ] ,
303+ // 4x4
304+ [ ( TxType :: WHT_WHT , wht, wht) ] ,
305+ [ ( 4 , 4 ) ] ,
306+ [ ( sse2, SSE2 ) ]
300307) ;
301308
302309cpu_function_lookup_table ! (
303310 INV_TXFM_FNS : [ [ [ Option <InvTxfmFunc >; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] ] ,
304311 default : [ [ None ; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] ,
305- [ SSSE3 , AVX2 , AVX512ICL ]
312+ [ SSE2 , SSSE3 , AVX2 , AVX512ICL ]
306313) ;
307314
308315macro_rules! impl_itx_hbd_fns {
@@ -321,7 +328,7 @@ macro_rules! impl_itx_hbd_fns {
321328 } ;
322329
323330 ( $TYPES64: tt, $DIMS64: tt, $TYPES32: tt, $DIMS32: tt, $TYPES16: tt, $DIMS16: tt,
324- $TYPES84: tt, $DIMS84: tt, $EXT: ident, $OPT: tt) => {
331+ $TYPES84: tt, $DIMS84: tt, $TYPES4 : tt , $DIMS4 : tt , $ EXT: ident, $OPT: tt) => {
325332 // Make 2d list of tx types for each set of dimensions. Each set of
326333 // dimensions uses a superset of the previous set of tx types.
327334 impl_itx_hbd_fns!( [ $TYPES64] , $DIMS64, $OPT) ;
@@ -330,11 +337,14 @@ macro_rules! impl_itx_hbd_fns {
330337 impl_itx_hbd_fns!(
331338 [ $TYPES64, $TYPES32, $TYPES16, $TYPES84] , $DIMS84, $OPT
332339 ) ;
340+ impl_itx_hbd_fns!(
341+ [ $TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4] , $DIMS4, $OPT
342+ ) ;
333343
334344 // Pool all of the dimensions together to create a table for each cpu
335345 // feature level.
336346 create_wxh_hbd_tables!(
337- [ $DIMS64, $DIMS32, $DIMS16, $DIMS84] , $EXT, $OPT
347+ [ $DIMS64, $DIMS32, $DIMS16, $DIMS84, $DIMS4 ] , $EXT, $OPT
338348 ) ;
339349 } ;
340350}
@@ -368,6 +378,9 @@ impl_itx_hbd_fns!(
368378 ( TxType :: H_FLIPADST , identity, flipadst)
369379 ] ,
370380 [ ( 16 , 8 ) , ( 8 , 16 ) , ( 8 , 8 ) ] ,
381+ // 4x4
382+ [ ] ,
383+ [ ] ,
371384 _10,
372385 [ ( 10 , avx512icl, AVX512ICL ) ]
373386) ;
@@ -401,7 +414,10 @@ impl_itx_hbd_fns!(
401414 ( TxType :: H_FLIPADST , identity, flipadst)
402415 ] ,
403416 [ ( 16 , 8 ) , ( 8 , 16 ) , ( 16 , 4 ) , ( 4 , 16 ) , ( 8 , 8 ) , ( 8 , 4 ) , ( 4 , 8 ) , ( 4 , 4 ) ] ,
404- _10,
417+ // 4x4
418+ [ ] ,
419+ [ ] ,
420+ _10_,
405421 [ ( 10 , avx2, AVX2 ) ]
406422) ;
407423
@@ -434,14 +450,71 @@ impl_itx_hbd_fns!(
434450 ( TxType :: H_FLIPADST , identity, flipadst)
435451 ] ,
436452 [ ( 16 , 8 ) , ( 8 , 16 ) , ( 16 , 4 ) , ( 4 , 16 ) , ( 8 , 8 ) , ( 8 , 4 ) , ( 4 , 8 ) , ( 4 , 4 ) ] ,
453+ // 4x4
454+ [ ] ,
455+ [ ] ,
437456 _10,
438457 [ ( 16 , sse4, SSE4_1 ) ]
439458) ;
440459
460+ impl_itx_hbd_fns ! (
461+ // 64x
462+ [ ] ,
463+ [ ] ,
464+ // 32x
465+ [ ] ,
466+ [ ] ,
467+ // 16x16
468+ [ ] ,
469+ [ ] ,
470+ // 8x, 4x and 16x (minus 16x16 and 4x4)
471+ [ ] ,
472+ [ ] ,
473+ // 4x4
474+ [ ( TxType :: WHT_WHT , wht, wht) ] ,
475+ [ ( 4 , 4 ) ] ,
476+ _16,
477+ [ ( 16 , sse2, SSE2 ) , ( 16 , avx2, AVX2 ) ]
478+ ) ;
479+
480+ const INV_TXFM_HBD_FNS_10_SSE2 : [ [ Option < InvTxfmHBDFunc > ; TX_TYPES_PLUS_LL ] ;
481+ TxSize :: TX_SIZES_ALL ] = INV_TXFM_HBD_FNS_16_SSE2 ;
482+ const INV_TXFM_HBD_FNS_12_SSE2 : [ [ Option < InvTxfmHBDFunc > ; TX_TYPES_PLUS_LL ] ;
483+ TxSize :: TX_SIZES_ALL ] = INV_TXFM_HBD_FNS_16_SSE2 ;
484+
485+ const fn merge_hbd_fns (
486+ a : [ [ Option < InvTxfmHBDFunc > ; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] ,
487+ b : [ [ Option < InvTxfmHBDFunc > ; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] ,
488+ ) -> [ [ Option < InvTxfmHBDFunc > ; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] {
489+ let mut out = b;
490+ let mut tx_size = 0 ;
491+ loop {
492+ let mut tx_type = 0 ;
493+ loop {
494+ if a[ tx_size] [ tx_type] . is_some ( ) {
495+ out[ tx_size] [ tx_type] = a[ tx_size] [ tx_type] ;
496+ }
497+ tx_type += 1 ;
498+ if tx_type == TX_TYPES_PLUS_LL {
499+ break ;
500+ }
501+ }
502+ tx_size += 1 ;
503+ if tx_size == TxSize :: TX_SIZES_ALL {
504+ break ;
505+ }
506+ }
507+ out
508+ }
509+
510+ const INV_TXFM_HBD_FNS_10_AVX2 : [ [ Option < InvTxfmHBDFunc > ; TX_TYPES_PLUS_LL ] ;
511+ TxSize :: TX_SIZES_ALL ] =
512+ merge_hbd_fns ( INV_TXFM_HBD_FNS_10__AVX2 , INV_TXFM_HBD_FNS_16_AVX2 ) ;
513+
441514cpu_function_lookup_table ! (
442515 INV_TXFM_HBD_FNS_10 : [ [ [ Option <InvTxfmHBDFunc >; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] ] ,
443516 default : [ [ None ; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] ,
444- [ SSE4_1 , AVX2 , AVX512ICL ]
517+ [ SSE2 , SSE4_1 , AVX2 , AVX512ICL ]
445518) ;
446519
447520impl_itx_hbd_fns ! (
@@ -472,12 +545,19 @@ impl_itx_hbd_fns!(
472545 ( TxType :: H_FLIPADST , identity, flipadst)
473546 ] ,
474547 [ ( 16 , 8 ) , ( 8 , 16 ) , ( 16 , 4 ) , ( 4 , 16 ) , ( 8 , 8 ) , ( 8 , 4 ) , ( 4 , 8 ) , ( 4 , 4 ) ] ,
475- _12,
548+ // 4x4
549+ [ ] ,
550+ [ ] ,
551+ _12_,
476552 [ ( 12 , avx2, AVX2 ) ]
477553) ;
478554
555+ const INV_TXFM_HBD_FNS_12_AVX2 : [ [ Option < InvTxfmHBDFunc > ; TX_TYPES_PLUS_LL ] ;
556+ TxSize :: TX_SIZES_ALL ] =
557+ merge_hbd_fns ( INV_TXFM_HBD_FNS_12__AVX2 , INV_TXFM_HBD_FNS_16_AVX2 ) ;
558+
479559cpu_function_lookup_table ! (
480560 INV_TXFM_HBD_FNS_12 : [ [ [ Option <InvTxfmHBDFunc >; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] ] ,
481561 default : [ [ None ; TX_TYPES_PLUS_LL ] ; TxSize :: TX_SIZES_ALL ] ,
482- [ AVX2 ]
562+ [ SSE2 , AVX2 ]
483563) ;
0 commit comments