Skip to content

Commit

Permalink
struct Rav1dFrameContext_lf::level: Change [u8; 4]s to u8s and …
Browse files Browse the repository at this point in the history
…remove `unsafe`s and UB (#1273)

Previously usages of `level` were UB. Accesses to the disjoint `[0..2]`
Y and `[2..4]` UV elements were done `unsafe`ly since they overlapped,
but this meant that the other safe `DisjointMut` usages of `level` were
unchecked, overlapping with those writes, and thus UB.

This fixes that by making the elements just `u8`s instead of `[u8; 4]`s,
multiplying all previous indices/offsets by 4. Thus we can now index the
`[0..2]` Y and `[2..4]` UV elements independently with `DisjointMut`'s
safe APIs.

We can also remove `fn unaligned_lvl_slice`, since that now just
translates to a normal offset by `y`, just not multiplied by 4 like the
`x`s were. We can also allocate only 3 extra bytes for asm like C does,
rather than a whole extra element (4 bytes).

And finally, we can move the `DisjointMut` immutable indexing to `fn
loop_filter_sb128_rust`, where it can be fine-grained and not overlap
with other uses.
  • Loading branch information
kkysen authored Jul 1, 2024
2 parents 7e0a9d8 + c298743 commit 5cf49e0
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 169 deletions.
1 change: 0 additions & 1 deletion lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ pub mod src {
pub(crate) mod pixels;
pub(crate) mod relaxed_atomic;
pub(crate) mod strided;
mod unstable_extensions;
pub(crate) mod with_offset;
pub(crate) mod wrap_fn_ptr;
// TODO(kkysen) Temporarily `pub(crate)` due to a `pub use` until TAIT.
Expand Down
4 changes: 2 additions & 2 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4520,10 +4520,10 @@ pub(crate) fn rav1d_decode_frame_init(c: &Rav1dContext, fc: &Rav1dFrameContext)
f.lf.mask.clear();
// TODO: Fallible allocation.
f.lf.mask.resize_with(num_sb128 as usize, Default::default);
// over-allocate one element (4 bytes) since some of the SIMD implementations
// over-allocate by 3 bytes since some of the SIMD implementations
// index this from the level type and can thus over-read by up to 3 bytes.
f.lf.level
.resize(num_sb128 as usize * 32 * 32 + 1, [0u8; 4]); // TODO: Fallible allocation
.resize_with(4 * num_sb128 as usize * 32 * 32 + 3, Default::default); // TODO: Fallible allocation
if c.fc.len() > 1 {
// TODO: Fallible allocation
f.frame_thread
Expand Down
2 changes: 1 addition & 1 deletion src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,7 @@ impl TxLpfRightEdge {
#[derive(Default)]
#[repr(C)]
pub struct Rav1dFrameContext_lf {
pub level: DisjointMut<Vec<[u8; 4]>>,
pub level: DisjointMut<Vec<u8>>,
pub mask: Vec<Av1Filter>, /* len = w*h */
pub lr_mask: Vec<Av1Restoration>,
pub lim_lut: Align16<Av1FilterLUT>,
Expand Down
68 changes: 26 additions & 42 deletions src/lf_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ pub(crate) fn rav1d_copy_lpf<BD: BitDepth>(
fn filter_plane_cols_y<BD: BitDepth>(
f: &Rav1dFrameData,
have_left: bool,
lvl: WithOffset<&[[u8; 4]]>,
lvl: WithOffset<&DisjointMut<Vec<u8>>>,
mask: &[[[RelaxedAtomic<u16>; 2]; 3]; 32],
y_dst: Rav1dPictureDataComponentOffset,
w: usize,
Expand Down Expand Up @@ -398,16 +398,16 @@ fn filter_plane_cols_y<BD: BitDepth>(
} else {
mask.each_ref().map(|[_, b]| b.get() as u32)
};
let lvl = lvl + x;
lf_sb.y.h.call::<BD>(f, y_dst(x), &hmask, lvl, 0, len);
let lvl = |y| lvl + (4 * x + y);
lf_sb.y.h.call::<BD>(f, y_dst(x), &hmask, lvl(0), len);
}
}

#[inline]
fn filter_plane_rows_y<BD: BitDepth>(
f: &Rav1dFrameData,
have_top: bool,
lvl: WithOffset<&[[u8; 4]]>,
lvl: WithOffset<&DisjointMut<Vec<u8>>>,
b4_stride: usize,
mask: &[[[RelaxedAtomic<u16>; 2]; 3]; 32],
y_dst: Rav1dPictureDataComponentOffset,
Expand All @@ -430,16 +430,16 @@ fn filter_plane_rows_y<BD: BitDepth>(
let vmask = mask[y % mask.len()] // To elide the bounds check.
.each_ref()
.map(|[a, b]| a.get() as u32 | ((b.get() as u32) << 16));
let lvl = lvl + i * b4_stride;
lf_sb.y.v.call::<BD>(f, y_dst(i), &vmask, lvl, 1, w);
let lvl = |y| lvl + (4 * i * b4_stride + y);
lf_sb.y.v.call::<BD>(f, y_dst(i), &vmask, lvl(1), w);
}
}

#[inline]
fn filter_plane_cols_uv<BD: BitDepth>(
f: &Rav1dFrameData,
have_left: bool,
lvl: WithOffset<&[[u8; 4]]>,
lvl: WithOffset<&DisjointMut<Vec<u8>>>,
mask: &[[[RelaxedAtomic<u16>; 2]; 2]; 32],
u_dst: Rav1dPictureDataComponentOffset,
v_dst: Rav1dPictureDataComponentOffset,
Expand Down Expand Up @@ -472,17 +472,17 @@ fn filter_plane_cols_uv<BD: BitDepth>(
mask.each_ref().map(|[_, b]| b.get() as u32)
};
let hmask = [hmask[0], hmask[1], 0];
let lvl = lvl + x;
lf_sb.uv.h.call::<BD>(f, u_dst(x), &hmask, lvl, 2, len);
lf_sb.uv.h.call::<BD>(f, v_dst(x), &hmask, lvl, 3, len);
let lvl = |y| lvl + (4 * x + y);
lf_sb.uv.h.call::<BD>(f, u_dst(x), &hmask, lvl(2), len);
lf_sb.uv.h.call::<BD>(f, v_dst(x), &hmask, lvl(3), len);
}
}

#[inline]
fn filter_plane_rows_uv<BD: BitDepth>(
f: &Rav1dFrameData,
have_top: bool,
lvl: WithOffset<&[[u8; 4]]>,
lvl: WithOffset<&DisjointMut<Vec<u8>>>,
b4_stride: usize,
mask: &[[[RelaxedAtomic<u16>; 2]; 2]; 32],
u_dst: Rav1dPictureDataComponentOffset,
Expand Down Expand Up @@ -510,9 +510,9 @@ fn filter_plane_rows_uv<BD: BitDepth>(
.each_ref()
.map(|[a, b]| a.get() as u32 | ((b.get() as u32) << (16 >> ss_hor)));
let vmask = [vmask[0], vmask[1], 0];
let lvl = lvl + i * b4_stride;
lf_sb.uv.v.call::<BD>(f, u_dst(i), &vmask, lvl, 2, w);
lf_sb.uv.v.call::<BD>(f, v_dst(i), &vmask, lvl, 3, w);
let lvl = |y| lvl + (4 * i * b4_stride + y);
lf_sb.uv.v.call::<BD>(f, u_dst(i), &vmask, lvl(2), w);
lf_sb.uv.v.call::<BD>(f, v_dst(i), &vmask, lvl(3), w);
}
}

Expand Down Expand Up @@ -624,20 +624,16 @@ pub(crate) fn rav1d_loopfilter_sbrow_cols<BD: BitDepth>(
}
}
let lflvl = &f.lf.mask[lflvl_offset..];
let lvl = &*f
.lf
.level
.index((f.b4_stride * sby as isize * sbsz as isize) as usize..);
let lvl = WithOffset {
data: lvl,
offset: 0,
data: &f.lf.level,
offset: 4 * f.b4_stride as usize * (sby * sbsz) as usize,
};
have_left = false;
for x in 0..f.sb128w as usize {
filter_plane_cols_y::<BD>(
f,
have_left,
lvl + x * 32,
lvl + 4 * x * 32,
&lflvl[x].filter_y[0],
py + x * 128,
cmp::min(32, f.w4 - x as c_int * 32) as usize,
Expand All @@ -649,20 +645,16 @@ pub(crate) fn rav1d_loopfilter_sbrow_cols<BD: BitDepth>(
if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 {
return;
}
let lvl = &*f
.lf
.level
.index((f.b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..);
let lvl = WithOffset {
data: lvl,
offset: 0,
data: &f.lf.level,
offset: 4 * f.b4_stride as usize * (sby * sbsz >> ss_ver) as usize,
};
have_left = false;
for x in 0..f.sb128w as usize {
filter_plane_cols_uv::<BD>(
f,
have_left,
lvl + x * (32 >> ss_hor),
lvl + 4 * x * (32 >> ss_hor),
&lflvl[x].filter_uv[0],
pu + x * (128 >> ss_hor),
pv + x * (128 >> ss_hor),
Expand Down Expand Up @@ -694,19 +686,15 @@ pub(crate) fn rav1d_loopfilter_sbrow_rows<BD: BitDepth>(
let endy4: c_uint = (starty4 + cmp::min(f.h4 - sby * sbsz, sbsz)) as c_uint;
let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver;

let lvl = &*f
.lf
.level
.index((f.b4_stride * sby as isize * sbsz as isize) as usize..);
let lvl = WithOffset {
data: lvl,
offset: 0,
data: &f.lf.level,
offset: 4 * f.b4_stride as usize * (sby * sbsz) as usize,
};
for x in 0..f.sb128w as usize {
filter_plane_rows_y::<BD>(
f,
have_top,
lvl + x * 32,
lvl + 4 * x * 32,
f.b4_stride as usize,
&lflvl[x].filter_y[1],
p[0] + 128 * x,
Expand All @@ -721,20 +709,16 @@ pub(crate) fn rav1d_loopfilter_sbrow_rows<BD: BitDepth>(
return;
}

let lvl = &*f
.lf
.level
.index((f.b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..);
let lvl = WithOffset {
data: lvl,
offset: 0,
data: &f.lf.level,
offset: 4 * f.b4_stride as usize * (sby * sbsz >> ss_ver) as usize,
};
let [_, pu, pv] = p;
for x in 0..f.sb128w as usize {
filter_plane_rows_uv::<BD>(
f,
have_top,
lvl + x * (32 >> ss_hor),
lvl + 4 * x * (32 >> ss_hor),
f.b4_stride as usize,
&lflvl[x].filter_uv[1],
pu + (x * 128 >> ss_hor),
Expand Down
64 changes: 22 additions & 42 deletions src/lf_mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ fn mask_edges_chroma(

pub(crate) fn rav1d_create_lf_mask_intra(
lflvl: &Av1Filter,
level_cache: &DisjointMut<Vec<[u8; 4]>>,
level_cache: &DisjointMut<Vec<u8>>,
b4_stride: ptrdiff_t,
filter_level: &Align16<[[[u8; 2]; 8]; 4]>,
b: Bxy,
Expand Down Expand Up @@ -416,16 +416,11 @@ pub(crate) fn rav1d_create_lf_mask_intra(
let mut level_cache_off = by * b4_stride + bx;
for _y in 0..bh4 {
for x in 0..bw4 {
let idx = level_cache_off + x;
assert!(idx < level_cache.len());
// SAFETY: The Y portion of this element (indices 0 and 1) is not
// concurrently accessed by any other threads and the assert above ensures
// that it is in bounds.
unsafe {
let cur = level_cache.as_mut_ptr().add(idx);
(*cur)[0] = filter_level[0][0][0];
(*cur)[1] = filter_level[1][0][0];
}
let idx = 4 * (level_cache_off + x);
// `0.., ..2` is for Y
let lvl = &mut *level_cache.index_mut((idx + 0.., ..2));
lvl[0] = filter_level[0][0][0];
lvl[1] = filter_level[1][0][0];
}
level_cache_off += b4_stride;
}
Expand Down Expand Up @@ -459,16 +454,11 @@ pub(crate) fn rav1d_create_lf_mask_intra(
let mut level_cache_off = (by >> ss_ver) * b4_stride + (bx >> ss_hor);
for _y in 0..cbh4 {
for x in 0..cbw4 {
let idx = level_cache_off + x;
assert!(idx < level_cache.len());
// SAFETY: The UV portion of this element (indices 2 and 3) is not concurrently
// accessed by any other threads and the assert above ensures that it is in
// bounds.
unsafe {
let cur = level_cache.as_mut_ptr().add(idx);
(*cur)[2] = filter_level[2][0][0];
(*cur)[3] = filter_level[3][0][0];
}
let idx = 4 * (level_cache_off + x);
// `2.., ..2` is for UV
let lvl = &mut *level_cache.index_mut((idx + 2.., ..2));
lvl[0] = filter_level[2][0][0];
lvl[1] = filter_level[3][0][0];
}
level_cache_off += b4_stride;
}
Expand All @@ -491,7 +481,7 @@ pub(crate) fn rav1d_create_lf_mask_intra(
#[inline(never)]
pub(crate) fn rav1d_create_lf_mask_inter(
lflvl: &Av1Filter,
level_cache: &DisjointMut<Vec<[u8; 4]>>,
level_cache: &DisjointMut<Vec<u8>>,
b4_stride: ptrdiff_t,
filter_level: &Align16<[[[u8; 2]; 8]; 4]>,
r#ref: usize,
Expand Down Expand Up @@ -524,16 +514,11 @@ pub(crate) fn rav1d_create_lf_mask_inter(
let mut level_cache_off = by * b4_stride + bx;
for _y in 0..bh4 {
for x in 0..bw4 {
let idx = level_cache_off + x;
assert!(idx < level_cache.len());
// SAFETY: The Y portion of this element (indices 0 and 1) is not
// concurrently accessed by any other threads and the assert above ensures
// that it is in bounds.
unsafe {
let cur = level_cache.as_mut_ptr().add(idx);
(*cur)[0] = filter_level[0][r#ref][is_gmv];
(*cur)[1] = filter_level[1][r#ref][is_gmv];
}
let idx = 4 * (level_cache_off + x);
// `0.., ..2` is for Y
let lvl = &mut *level_cache.index_mut((idx + 0.., ..2));
lvl[0] = filter_level[0][r#ref][is_gmv];
lvl[1] = filter_level[1][r#ref][is_gmv];
}
level_cache_off += b4_stride;
}
Expand Down Expand Up @@ -578,16 +563,11 @@ pub(crate) fn rav1d_create_lf_mask_inter(
let mut level_cache_off = (by >> ss_ver) * b4_stride + (bx >> ss_hor);
for _y in 0..cbh4 {
for x in 0..cbw4 {
let idx = level_cache_off + x;
assert!(idx < level_cache.len());
// SAFETY: The UV part of this element (indices 2 and 3) is not concurrently
// accessed by any other threads and the assert above ensures that it is in
// bounds.
unsafe {
let cur = level_cache.as_mut_ptr().add(idx);
(*cur)[2] = filter_level[2][r#ref][is_gmv];
(*cur)[3] = filter_level[3][r#ref][is_gmv];
}
let idx = 4 * (level_cache_off + x);
// `2.., ..2` is for UV
let lvl = &mut *level_cache.index_mut((idx + 2.., ..2));
lvl[0] = filter_level[2][r#ref][is_gmv];
lvl[1] = filter_level[3][r#ref][is_gmv];
}
level_cache_off += b4_stride;
}
Expand Down
Loading

0 comments on commit 5cf49e0

Please sign in to comment.