Skip to content

Commit 5cf49e0

Browse files
authored
struct Rav1dFrameContext_lf::level: Change [u8; 4]s to u8s and remove unsafes and UB (#1273)
Previously usages of `level` were UB. Accesses to the disjoint `[0..2]` Y and `[2..4]` UV elements were done `unsafe`ly since they overlapped, but this meant that the other safe `DisjointMut` usages of `level` were unchecked, overlapping with those writes, and thus UB. This fixes that by making the elements just `u8`s instead of `[u8; 4]`s, multiplying all previous indices/offsets by 4. Thus we can now index the `[0..2]` Y and `[2..4]` UV elements independently with `DisjointMut`'s safe APIs. We can also remove `fn unaligned_lvl_slice`, since that now just translates to a normal offset by `y`, just not multiplied by 4 like the `x`s were. We can also allocate only 3 extra bytes for asm like C does, rather than a whole extra element (4 bytes). And finally, we can move the `DisjointMut` immutable indexing to `fn loop_filter_sb128_rust`, where it can be fine-grained and not overlap with other uses.
2 parents 7e0a9d8 + c298743 commit 5cf49e0

File tree

7 files changed

+66
-169
lines changed

7 files changed

+66
-169
lines changed

lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ pub mod src {
5454
pub(crate) mod pixels;
5555
pub(crate) mod relaxed_atomic;
5656
pub(crate) mod strided;
57-
mod unstable_extensions;
5857
pub(crate) mod with_offset;
5958
pub(crate) mod wrap_fn_ptr;
6059
// TODO(kkysen) Temporarily `pub(crate)` due to a `pub use` until TAIT.

src/decode.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4520,10 +4520,10 @@ pub(crate) fn rav1d_decode_frame_init(c: &Rav1dContext, fc: &Rav1dFrameContext)
45204520
f.lf.mask.clear();
45214521
// TODO: Fallible allocation.
45224522
f.lf.mask.resize_with(num_sb128 as usize, Default::default);
4523-
// over-allocate one element (4 bytes) since some of the SIMD implementations
4523+
// over-allocate by 3 bytes since some of the SIMD implementations
45244524
// index this from the level type and can thus over-read by up to 3 bytes.
45254525
f.lf.level
4526-
.resize(num_sb128 as usize * 32 * 32 + 1, [0u8; 4]); // TODO: Fallible allocation
4526+
.resize_with(4 * num_sb128 as usize * 32 * 32 + 3, Default::default); // TODO: Fallible allocation
45274527
if c.fc.len() > 1 {
45284528
// TODO: Fallible allocation
45294529
f.frame_thread

src/internal.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,7 @@ impl TxLpfRightEdge {
693693
#[derive(Default)]
694694
#[repr(C)]
695695
pub struct Rav1dFrameContext_lf {
696-
pub level: DisjointMut<Vec<[u8; 4]>>,
696+
pub level: DisjointMut<Vec<u8>>,
697697
pub mask: Vec<Av1Filter>, /* len = w*h */
698698
pub lr_mask: Vec<Av1Restoration>,
699699
pub lim_lut: Align16<Av1FilterLUT>,

src/lf_apply.rs

Lines changed: 26 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ pub(crate) fn rav1d_copy_lpf<BD: BitDepth>(
370370
fn filter_plane_cols_y<BD: BitDepth>(
371371
f: &Rav1dFrameData,
372372
have_left: bool,
373-
lvl: WithOffset<&[[u8; 4]]>,
373+
lvl: WithOffset<&DisjointMut<Vec<u8>>>,
374374
mask: &[[[RelaxedAtomic<u16>; 2]; 3]; 32],
375375
y_dst: Rav1dPictureDataComponentOffset,
376376
w: usize,
@@ -398,16 +398,16 @@ fn filter_plane_cols_y<BD: BitDepth>(
398398
} else {
399399
mask.each_ref().map(|[_, b]| b.get() as u32)
400400
};
401-
let lvl = lvl + x;
402-
lf_sb.y.h.call::<BD>(f, y_dst(x), &hmask, lvl, 0, len);
401+
let lvl = |y| lvl + (4 * x + y);
402+
lf_sb.y.h.call::<BD>(f, y_dst(x), &hmask, lvl(0), len);
403403
}
404404
}
405405

406406
#[inline]
407407
fn filter_plane_rows_y<BD: BitDepth>(
408408
f: &Rav1dFrameData,
409409
have_top: bool,
410-
lvl: WithOffset<&[[u8; 4]]>,
410+
lvl: WithOffset<&DisjointMut<Vec<u8>>>,
411411
b4_stride: usize,
412412
mask: &[[[RelaxedAtomic<u16>; 2]; 3]; 32],
413413
y_dst: Rav1dPictureDataComponentOffset,
@@ -430,16 +430,16 @@ fn filter_plane_rows_y<BD: BitDepth>(
430430
let vmask = mask[y % mask.len()] // To elide the bounds check.
431431
.each_ref()
432432
.map(|[a, b]| a.get() as u32 | ((b.get() as u32) << 16));
433-
let lvl = lvl + i * b4_stride;
434-
lf_sb.y.v.call::<BD>(f, y_dst(i), &vmask, lvl, 1, w);
433+
let lvl = |y| lvl + (4 * i * b4_stride + y);
434+
lf_sb.y.v.call::<BD>(f, y_dst(i), &vmask, lvl(1), w);
435435
}
436436
}
437437

438438
#[inline]
439439
fn filter_plane_cols_uv<BD: BitDepth>(
440440
f: &Rav1dFrameData,
441441
have_left: bool,
442-
lvl: WithOffset<&[[u8; 4]]>,
442+
lvl: WithOffset<&DisjointMut<Vec<u8>>>,
443443
mask: &[[[RelaxedAtomic<u16>; 2]; 2]; 32],
444444
u_dst: Rav1dPictureDataComponentOffset,
445445
v_dst: Rav1dPictureDataComponentOffset,
@@ -472,17 +472,17 @@ fn filter_plane_cols_uv<BD: BitDepth>(
472472
mask.each_ref().map(|[_, b]| b.get() as u32)
473473
};
474474
let hmask = [hmask[0], hmask[1], 0];
475-
let lvl = lvl + x;
476-
lf_sb.uv.h.call::<BD>(f, u_dst(x), &hmask, lvl, 2, len);
477-
lf_sb.uv.h.call::<BD>(f, v_dst(x), &hmask, lvl, 3, len);
475+
let lvl = |y| lvl + (4 * x + y);
476+
lf_sb.uv.h.call::<BD>(f, u_dst(x), &hmask, lvl(2), len);
477+
lf_sb.uv.h.call::<BD>(f, v_dst(x), &hmask, lvl(3), len);
478478
}
479479
}
480480

481481
#[inline]
482482
fn filter_plane_rows_uv<BD: BitDepth>(
483483
f: &Rav1dFrameData,
484484
have_top: bool,
485-
lvl: WithOffset<&[[u8; 4]]>,
485+
lvl: WithOffset<&DisjointMut<Vec<u8>>>,
486486
b4_stride: usize,
487487
mask: &[[[RelaxedAtomic<u16>; 2]; 2]; 32],
488488
u_dst: Rav1dPictureDataComponentOffset,
@@ -510,9 +510,9 @@ fn filter_plane_rows_uv<BD: BitDepth>(
510510
.each_ref()
511511
.map(|[a, b]| a.get() as u32 | ((b.get() as u32) << (16 >> ss_hor)));
512512
let vmask = [vmask[0], vmask[1], 0];
513-
let lvl = lvl + i * b4_stride;
514-
lf_sb.uv.v.call::<BD>(f, u_dst(i), &vmask, lvl, 2, w);
515-
lf_sb.uv.v.call::<BD>(f, v_dst(i), &vmask, lvl, 3, w);
513+
let lvl = |y| lvl + (4 * i * b4_stride + y);
514+
lf_sb.uv.v.call::<BD>(f, u_dst(i), &vmask, lvl(2), w);
515+
lf_sb.uv.v.call::<BD>(f, v_dst(i), &vmask, lvl(3), w);
516516
}
517517
}
518518

@@ -624,20 +624,16 @@ pub(crate) fn rav1d_loopfilter_sbrow_cols<BD: BitDepth>(
624624
}
625625
}
626626
let lflvl = &f.lf.mask[lflvl_offset..];
627-
let lvl = &*f
628-
.lf
629-
.level
630-
.index((f.b4_stride * sby as isize * sbsz as isize) as usize..);
631627
let lvl = WithOffset {
632-
data: lvl,
633-
offset: 0,
628+
data: &f.lf.level,
629+
offset: 4 * f.b4_stride as usize * (sby * sbsz) as usize,
634630
};
635631
have_left = false;
636632
for x in 0..f.sb128w as usize {
637633
filter_plane_cols_y::<BD>(
638634
f,
639635
have_left,
640-
lvl + x * 32,
636+
lvl + 4 * x * 32,
641637
&lflvl[x].filter_y[0],
642638
py + x * 128,
643639
cmp::min(32, f.w4 - x as c_int * 32) as usize,
@@ -649,20 +645,16 @@ pub(crate) fn rav1d_loopfilter_sbrow_cols<BD: BitDepth>(
649645
if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 {
650646
return;
651647
}
652-
let lvl = &*f
653-
.lf
654-
.level
655-
.index((f.b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..);
656648
let lvl = WithOffset {
657-
data: lvl,
658-
offset: 0,
649+
data: &f.lf.level,
650+
offset: 4 * f.b4_stride as usize * (sby * sbsz >> ss_ver) as usize,
659651
};
660652
have_left = false;
661653
for x in 0..f.sb128w as usize {
662654
filter_plane_cols_uv::<BD>(
663655
f,
664656
have_left,
665-
lvl + x * (32 >> ss_hor),
657+
lvl + 4 * x * (32 >> ss_hor),
666658
&lflvl[x].filter_uv[0],
667659
pu + x * (128 >> ss_hor),
668660
pv + x * (128 >> ss_hor),
@@ -694,19 +686,15 @@ pub(crate) fn rav1d_loopfilter_sbrow_rows<BD: BitDepth>(
694686
let endy4: c_uint = (starty4 + cmp::min(f.h4 - sby * sbsz, sbsz)) as c_uint;
695687
let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver;
696688

697-
let lvl = &*f
698-
.lf
699-
.level
700-
.index((f.b4_stride * sby as isize * sbsz as isize) as usize..);
701689
let lvl = WithOffset {
702-
data: lvl,
703-
offset: 0,
690+
data: &f.lf.level,
691+
offset: 4 * f.b4_stride as usize * (sby * sbsz) as usize,
704692
};
705693
for x in 0..f.sb128w as usize {
706694
filter_plane_rows_y::<BD>(
707695
f,
708696
have_top,
709-
lvl + x * 32,
697+
lvl + 4 * x * 32,
710698
f.b4_stride as usize,
711699
&lflvl[x].filter_y[1],
712700
p[0] + 128 * x,
@@ -721,20 +709,16 @@ pub(crate) fn rav1d_loopfilter_sbrow_rows<BD: BitDepth>(
721709
return;
722710
}
723711

724-
let lvl = &*f
725-
.lf
726-
.level
727-
.index((f.b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..);
728712
let lvl = WithOffset {
729-
data: lvl,
730-
offset: 0,
713+
data: &f.lf.level,
714+
offset: 4 * f.b4_stride as usize * (sby * sbsz >> ss_ver) as usize,
731715
};
732716
let [_, pu, pv] = p;
733717
for x in 0..f.sb128w as usize {
734718
filter_plane_rows_uv::<BD>(
735719
f,
736720
have_top,
737-
lvl + x * (32 >> ss_hor),
721+
lvl + 4 * x * (32 >> ss_hor),
738722
f.b4_stride as usize,
739723
&lflvl[x].filter_uv[1],
740724
pu + (x * 128 >> ss_hor),

src/lf_mask.rs

Lines changed: 22 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ fn mask_edges_chroma(
388388

389389
pub(crate) fn rav1d_create_lf_mask_intra(
390390
lflvl: &Av1Filter,
391-
level_cache: &DisjointMut<Vec<[u8; 4]>>,
391+
level_cache: &DisjointMut<Vec<u8>>,
392392
b4_stride: ptrdiff_t,
393393
filter_level: &Align16<[[[u8; 2]; 8]; 4]>,
394394
b: Bxy,
@@ -416,16 +416,11 @@ pub(crate) fn rav1d_create_lf_mask_intra(
416416
let mut level_cache_off = by * b4_stride + bx;
417417
for _y in 0..bh4 {
418418
for x in 0..bw4 {
419-
let idx = level_cache_off + x;
420-
assert!(idx < level_cache.len());
421-
// SAFETY: The Y portion of this element (indices 0 and 1) is not
422-
// concurrently accessed by any other threads and the assert above ensures
423-
// that it is in bounds.
424-
unsafe {
425-
let cur = level_cache.as_mut_ptr().add(idx);
426-
(*cur)[0] = filter_level[0][0][0];
427-
(*cur)[1] = filter_level[1][0][0];
428-
}
419+
let idx = 4 * (level_cache_off + x);
420+
// `0.., ..2` is for Y
421+
let lvl = &mut *level_cache.index_mut((idx + 0.., ..2));
422+
lvl[0] = filter_level[0][0][0];
423+
lvl[1] = filter_level[1][0][0];
429424
}
430425
level_cache_off += b4_stride;
431426
}
@@ -459,16 +454,11 @@ pub(crate) fn rav1d_create_lf_mask_intra(
459454
let mut level_cache_off = (by >> ss_ver) * b4_stride + (bx >> ss_hor);
460455
for _y in 0..cbh4 {
461456
for x in 0..cbw4 {
462-
let idx = level_cache_off + x;
463-
assert!(idx < level_cache.len());
464-
// SAFETY: The UV portion of this element (indices 2 and 3) is not concurrently
465-
// accessed by any other threads and the assert above ensures that it is in
466-
// bounds.
467-
unsafe {
468-
let cur = level_cache.as_mut_ptr().add(idx);
469-
(*cur)[2] = filter_level[2][0][0];
470-
(*cur)[3] = filter_level[3][0][0];
471-
}
457+
let idx = 4 * (level_cache_off + x);
458+
// `2.., ..2` is for UV
459+
let lvl = &mut *level_cache.index_mut((idx + 2.., ..2));
460+
lvl[0] = filter_level[2][0][0];
461+
lvl[1] = filter_level[3][0][0];
472462
}
473463
level_cache_off += b4_stride;
474464
}
@@ -491,7 +481,7 @@ pub(crate) fn rav1d_create_lf_mask_intra(
491481
#[inline(never)]
492482
pub(crate) fn rav1d_create_lf_mask_inter(
493483
lflvl: &Av1Filter,
494-
level_cache: &DisjointMut<Vec<[u8; 4]>>,
484+
level_cache: &DisjointMut<Vec<u8>>,
495485
b4_stride: ptrdiff_t,
496486
filter_level: &Align16<[[[u8; 2]; 8]; 4]>,
497487
r#ref: usize,
@@ -524,16 +514,11 @@ pub(crate) fn rav1d_create_lf_mask_inter(
524514
let mut level_cache_off = by * b4_stride + bx;
525515
for _y in 0..bh4 {
526516
for x in 0..bw4 {
527-
let idx = level_cache_off + x;
528-
assert!(idx < level_cache.len());
529-
// SAFETY: The Y portion of this element (indices 0 and 1) is not
530-
// concurrently accessed by any other threads and the assert above ensures
531-
// that it is in bounds.
532-
unsafe {
533-
let cur = level_cache.as_mut_ptr().add(idx);
534-
(*cur)[0] = filter_level[0][r#ref][is_gmv];
535-
(*cur)[1] = filter_level[1][r#ref][is_gmv];
536-
}
517+
let idx = 4 * (level_cache_off + x);
518+
// `0.., ..2` is for Y
519+
let lvl = &mut *level_cache.index_mut((idx + 0.., ..2));
520+
lvl[0] = filter_level[0][r#ref][is_gmv];
521+
lvl[1] = filter_level[1][r#ref][is_gmv];
537522
}
538523
level_cache_off += b4_stride;
539524
}
@@ -578,16 +563,11 @@ pub(crate) fn rav1d_create_lf_mask_inter(
578563
let mut level_cache_off = (by >> ss_ver) * b4_stride + (bx >> ss_hor);
579564
for _y in 0..cbh4 {
580565
for x in 0..cbw4 {
581-
let idx = level_cache_off + x;
582-
assert!(idx < level_cache.len());
583-
// SAFETY: The UV part of this element (indices 2 and 3) is not concurrently
584-
// accessed by any other threads and the assert above ensures that it is in
585-
// bounds.
586-
unsafe {
587-
let cur = level_cache.as_mut_ptr().add(idx);
588-
(*cur)[2] = filter_level[2][r#ref][is_gmv];
589-
(*cur)[3] = filter_level[3][r#ref][is_gmv];
590-
}
566+
let idx = 4 * (level_cache_off + x);
567+
// `2.., ..2` is for UV
568+
let lvl = &mut *level_cache.index_mut((idx + 2.., ..2));
569+
lvl[0] = filter_level[2][r#ref][is_gmv];
570+
lvl[1] = filter_level[3][r#ref][is_gmv];
591571
}
592572
level_cache_off += b4_stride;
593573
}

0 commit comments

Comments
 (0)