@@ -459,8 +459,94 @@ namespace Simd
459
459
Yuv420pToBgraV2<true >(y, yStride, u, uStride, v, vStride, width, height, bgra, bgraStride, alpha, yuvType);
460
460
else
461
461
Yuv420pToBgraV2<false >(y, yStride, u, uStride, v, vStride, width, height, bgra, bgraStride, alpha, yuvType);
462
+ #endif
463
+ }
464
+
465
+ // -------------------------------------------------------------------------------------------------
466
+
467
+ template <bool align, class T > SIMD_INLINE void YuvToRgba16 (__m256i y16, __m256i u16 , __m256i v16, const __m256i& a_0, __m256i* rgba)
468
+ {
469
+ const __m256i b16 = YuvToBlue16<T>(y16, u16 );
470
+ const __m256i g16 = YuvToGreen16<T>(y16, u16 , v16);
471
+ const __m256i r16 = YuvToRed16<T>(y16, v16);
472
+ const __m256i rg8 = _mm256_or_si256 (r16, _mm256_slli_si256 (g16, 1 ));
473
+ const __m256i ba8 = _mm256_or_si256 (b16, a_0);
474
+ __m256i rgba0 = _mm256_unpacklo_epi16 (rg8, ba8);
475
+ __m256i rgba1 = _mm256_unpackhi_epi16 (rg8, ba8);
476
+ Permute2x128 (rgba0, rgba1);
477
+ Store<align>(rgba + 0 , rgba0);
478
+ Store<align>(rgba + 1 , rgba1);
479
+ }
480
+
481
+ template <bool align, class T > SIMD_INLINE void YuvToRgba (__m256i y8, __m256i u8 , __m256i v8, const __m256i& a_0, __m256i* rgba)
482
+ {
483
+ YuvToRgba16<align, T>(UnpackY<T, 0 >(y8), UnpackUV<T, 0 >(u8 ), UnpackUV<T, 0 >(v8), a_0, rgba + 0 );
484
+ YuvToRgba16<align, T>(UnpackY<T, 1 >(y8), UnpackUV<T, 1 >(u8 ), UnpackUV<T, 1 >(v8), a_0, rgba + 2 );
485
+ }
486
+
487
+ template <bool align, class T > SIMD_INLINE void Yuv444pToRgbaV2 (const uint8_t * y, const uint8_t * u, const uint8_t * v, const __m256i& a_0, uint8_t * rgba)
488
+ {
489
+ YuvToRgba<align, T>(LoadPermuted<align>((__m256i*)y), LoadPermuted<align>((__m256i*)u), LoadPermuted<align>((__m256i*)v), a_0, (__m256i*)rgba);
490
+ }
491
+
492
+ template <bool align, class T > void Yuv444pToRgbaV2 (const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
493
+ size_t width, size_t height, uint8_t * rgba, size_t rgbaStride, uint8_t alpha)
494
+ {
495
+ assert (width >= A);
496
+ if (align)
497
+ {
498
+ assert (Aligned (y) && Aligned (yStride) && Aligned (u) && Aligned (uStride));
499
+ assert (Aligned (v) && Aligned (vStride) && Aligned (rgba) && Aligned (rgbaStride));
500
+ }
501
+
502
+ __m256i a_0 = _mm256_slli_si256 (_mm256_set1_epi16 (alpha), 1 );
503
+ size_t bodyWidth = AlignLo (width, A);
504
+ size_t tail = width - bodyWidth;
505
+ for (size_t row = 0 ; row < height; ++row)
506
+ {
507
+ for (size_t colYuv = 0 , colRgba = 0 ; colYuv < bodyWidth; colYuv += A, colRgba += QA)
508
+ {
509
+ Yuv444pToRgbaV2<align, T>(y + colYuv, u + colYuv, v + colYuv, a_0, rgba + colRgba);
510
+ }
511
+ if (tail)
512
+ {
513
+ size_t col = width - A;
514
+ Yuv444pToRgbaV2<false , T>(y + col, u + col, v + col, a_0, rgba + 4 * col);
515
+ }
516
+ y += yStride;
517
+ u += uStride;
518
+ v += vStride;
519
+ rgba += rgbaStride;
520
+ }
521
+ }
522
+
523
+ template <bool align> void Yuv444pToRgbaV2 (const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
524
+ size_t width, size_t height, uint8_t * rgba, size_t rgbaStride, uint8_t alpha, SimdYuvType yuvType)
525
+ {
526
+ switch (yuvType)
527
+ {
528
+ case SimdYuvBt601: Yuv444pToRgbaV2<align, Base::Bt601>(y, yStride, u, uStride, v, vStride, width, height, rgba, rgbaStride, alpha); break ;
529
+ case SimdYuvBt709: Yuv444pToRgbaV2<align, Base::Bt709>(y, yStride, u, uStride, v, vStride, width, height, rgba, rgbaStride, alpha); break ;
530
+ case SimdYuvBt2020: Yuv444pToRgbaV2<align, Base::Bt2020>(y, yStride, u, uStride, v, vStride, width, height, rgba, rgbaStride, alpha); break ;
531
+ case SimdYuvTrect871: Yuv444pToRgbaV2<align, Base::Trect871>(y, yStride, u, uStride, v, vStride, width, height, rgba, rgbaStride, alpha); break ;
532
+ default :
533
+ assert (0 );
534
+ }
535
+ }
536
+
537
+ void Yuv444pToRgbaV2 (const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
538
+ size_t width, size_t height, uint8_t * rgba, size_t rgbaStride, uint8_t alpha, SimdYuvType yuvType)
539
+ {
540
+ #if defined(SIMD_X86_ENABLE) && defined(NDEBUG) && defined(_MSC_VER) && _MSC_VER <= 1900
541
+ Sse41::Yuv444pToRgbaV2 (y, yStride, u, uStride, v, vStride, width, height, rgba, rgbaStride, alpha, yuvType);
542
+ #else
543
+ if (Aligned (y) && Aligned (yStride) && Aligned (u) && Aligned (uStride)
544
+ && Aligned (v) && Aligned (vStride) && Aligned (rgba) && Aligned (rgbaStride))
545
+ Yuv444pToRgbaV2<true >(y, yStride, u, uStride, v, vStride, width, height, rgba, rgbaStride, alpha, yuvType);
546
+ else
547
+ Yuv444pToRgbaV2<false >(y, yStride, u, uStride, v, vStride, width, height, rgba, rgbaStride, alpha, yuvType);
462
548
#endif
463
549
}
464
550
}
465
- #endif // SIMD_AVX2_ENABLE
551
+ #endif
466
552
}
0 commit comments