Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fn resize: Make safe w/ WithOffset<PicOrBuf<AlignedVec64<u8>>> #1268

Merged
merged 3 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 79 additions & 55 deletions src/lf_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@ use crate::src::lr_apply::LR_RESTORE_V;
use crate::src::lr_apply::LR_RESTORE_Y;
use crate::src::relaxed_atomic::RelaxedAtomic;
use crate::src::strided::Strided as _;
use libc::ptrdiff_t;
use crate::src::strided::WithStride;
use crate::src::with_offset::WithOffset;
use std::array;
use std::cmp;
use std::ffi::c_int;
use std::ffi::c_uint;

// The loop filter buffer stores 12 rows of pixels. A superblock block will
// contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above
// and 2 below) the final 4 rows are used to swap the bottom of the last
// stripe with the top of the next super block row.
unsafe fn backup_lpf<BD: BitDepth>(
/// The loop filter buffer stores 12 rows of pixels.
/// A superblock block will contain at most 2 stripes.
/// Each stripe requires 4 rows pixels (2 above and 2 below).
/// The final 4 rows are used to swap the bottom of
/// the last stripe with the top of the next super block row.
fn backup_lpf<BD: BitDepth>(
c: &Rav1dContext,
dst: &DisjointMut<AlignedVec64<u8>>,
mut dst_offset: usize, // in pixel units
dst_stride: ptrdiff_t,
mut dst: WithOffset<WithStride<&DisjointMut<AlignedVec64<u8>>>>,
mut src: Rav1dPictureDataComponentOffset,
ss_ver: c_int,
sb128: u8,
Expand Down Expand Up @@ -56,35 +56,36 @@ unsafe fn backup_lpf<BD: BitDepth>(
if c.tc.len() == 1 {
if row != 0 {
let top = 4 << sb128;
let px_abs_stride = BD::pxstride(dst_stride.unsigned_abs());
let px_abs_stride = dst.pixel_stride::<BD>().unsigned_abs();
let top_size = top * px_abs_stride;
// Copy the top part of the stored loop filtered pixels from the
// previous sb row needed above the first stripe of this sb row.
let (dst_idx, src_idx) = if dst_stride < 0 {
let (dst_idx, src_idx) = if dst.stride() < 0 {
(
dst_offset - 3 * px_abs_stride,
dst_offset - top_size - 3 * px_abs_stride,
dst.offset - 3 * px_abs_stride,
dst.offset - top_size - 3 * px_abs_stride,
)
} else {
(dst_offset, dst_offset + top_size)
(dst.offset, dst.offset + top_size)
};

for i in 0..4 {
BD::pixel_copy(
&mut dst.mut_slice_as((dst_idx + i * px_abs_stride.., ..dst_w)),
&dst.slice_as((src_idx + i * px_abs_stride.., ..dst_w)),
&mut dst
.data
.mut_slice_as((dst_idx + i * px_abs_stride.., ..dst_w)),
&dst.data.slice_as((src_idx + i * px_abs_stride.., ..dst_w)),
dst_w,
);
}
}
dst_offset = (dst_offset as isize + 4 * BD::pxstride(dst_stride)) as usize;
dst += 4 * dst.pixel_stride::<BD>();
}
if lr_backup != 0 && frame_hdr.size.width[0] != frame_hdr.size.width[1] {
while row + stripe_h <= row_h {
let n_lines = 4 - (row + stripe_h + 1 == h) as c_int;
dsp.mc.resize.call::<BD>(
dst.mut_slice_as((dst_offset.., ..dst_w)).as_mut_ptr(),
dst_stride,
WithOffset::buf(dst),
src,
dst_w,
n_lines as usize,
Expand All @@ -96,48 +97,47 @@ unsafe fn backup_lpf<BD: BitDepth>(
row += stripe_h; // unmodified stripe_h for the 1st stripe
stripe_h = 64 >> ss_ver;
src += stripe_h as isize * src.pixel_stride::<BD>();
dst_offset =
(dst_offset as isize + n_lines as isize * BD::pxstride(dst_stride)) as usize;
dst += n_lines as isize * dst.pixel_stride::<BD>();

if n_lines == 3 {
let dst_abs_px_stride = BD::pxstride(dst_stride.unsigned_abs());
let (src_idx, dst_idx) = if dst_stride < 0 {
(dst_offset + dst_abs_px_stride, dst_offset)
let dst_abs_px_stride = dst.pixel_stride::<BD>().unsigned_abs();
let (src_idx, dst_idx) = if dst.stride() < 0 {
(dst.offset + dst_abs_px_stride, dst.offset)
} else {
(dst_offset - dst_abs_px_stride, dst_offset)
(dst.offset - dst_abs_px_stride, dst.offset)
};
BD::pixel_copy(
&mut dst.mut_slice_as((dst_idx.., ..dst_w)),
&dst.slice_as((src_idx.., ..dst_w)),
&mut dst.data.mut_slice_as((dst_idx.., ..dst_w)),
&dst.data.slice_as((src_idx.., ..dst_w)),
dst_w,
);
dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize;
dst += dst.pixel_stride::<BD>();
}
}
} else {
while row + stripe_h <= row_h {
let n_lines = 4 - (row + stripe_h + 1 == h) as c_int;
for i in 0..4 {
let dst_abs_px_stride = BD::pxstride(dst_stride.unsigned_abs());
let dst_abs_px_stride = dst.pixel_stride::<BD>().unsigned_abs();
if i != n_lines {
BD::pixel_copy(
&mut dst.mut_slice_as((dst_offset.., ..src_w)),
&mut dst.data.mut_slice_as((dst.offset.., ..src_w)),
&src.slice::<BD>(src_w),
src_w,
);
} else {
let (src_idx, dst_idx) = if dst_stride < 0 {
(dst_offset + dst_abs_px_stride, dst_offset)
let (src_idx, dst_idx) = if dst.stride() < 0 {
(dst.offset + dst_abs_px_stride, dst.offset)
} else {
(dst_offset - dst_abs_px_stride, dst_offset)
(dst.offset - dst_abs_px_stride, dst.offset)
};
BD::pixel_copy(
&mut dst.mut_slice_as((dst_idx.., ..src_w)),
&dst.slice_as((src_idx.., ..src_w)),
&mut dst.data.mut_slice_as((dst_idx.., ..src_w)),
&dst.data.slice_as((src_idx.., ..src_w)),
src_w,
)
}
dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize;
dst += dst.pixel_stride::<BD>();
src += src.pixel_stride::<BD>();
}
row += stripe_h; // unmodified stripe_h for the 1st stripe
Expand All @@ -147,7 +147,7 @@ unsafe fn backup_lpf<BD: BitDepth>(
};
}

pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
pub(crate) fn rav1d_copy_lpf<BD: BitDepth>(
c: &Rav1dContext,
f: &Rav1dFrameData,
src: [Rav1dPictureDataComponentOffset; 3],
Expand Down Expand Up @@ -180,9 +180,13 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_Y as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
&f.lf.lr_line_buf,
dst[0].offset,
dst[0].stride(),
WithOffset {
data: WithStride {
buf: &f.lf.lr_line_buf,
stride: dst[0].stride(),
},
offset: dst[0].offset,
},
src[0] - (offset_y as isize * src[0].pixel_stride::<BD>()),
0,
seq_hdr.sb128,
Expand All @@ -206,9 +210,13 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
let cdef_line_start = (f.lf.cdef_lpf_line[0] as isize + cmp::min(y_span, 0)) as usize;
backup_lpf::<BD>(
c,
&f.lf.cdef_line_buf,
cdef_line_start + (cdef_off_y - cmp::min(y_span, 0)) as usize,
src[0].stride(),
WithOffset {
data: WithStride {
buf: &f.lf.cdef_line_buf,
stride: src[0].stride(),
},
offset: cdef_line_start + (cdef_off_y - cmp::min(y_span, 0)) as usize,
},
src[0] - (offset_y as isize * src[0].pixel_stride::<BD>()),
0,
seq_hdr.sb128,
Expand Down Expand Up @@ -241,9 +249,13 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_U as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
&f.lf.lr_line_buf,
dst[1].offset,
dst[1].stride(),
WithOffset {
data: WithStride {
buf: &f.lf.lr_line_buf,
stride: dst[1].stride(),
},
offset: dst[1].offset,
},
src[1] - (offset_uv as isize * src[1].pixel_stride::<BD>()),
ss_ver,
seq_hdr.sb128,
Expand All @@ -267,9 +279,13 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
(f.lf.cdef_lpf_line[1] as isize + cmp::min(uv_span, 0)) as usize;
backup_lpf::<BD>(
c,
&f.lf.cdef_line_buf,
cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize,
src[1].stride(),
WithOffset {
data: WithStride {
buf: &f.lf.cdef_line_buf,
stride: src[1].stride(),
},
offset: cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize,
},
src[1] - (offset_uv as isize * src[1].pixel_stride::<BD>()),
ss_ver,
seq_hdr.sb128,
Expand All @@ -291,9 +307,13 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_V as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
&f.lf.lr_line_buf,
dst[2].offset,
dst[2].stride(),
WithOffset {
data: WithStride {
buf: &f.lf.lr_line_buf,
stride: dst[2].stride(),
},
offset: dst[2].offset,
},
src[2] - (offset_uv as isize * src[2].pixel_stride::<BD>()),
ss_ver,
seq_hdr.sb128,
Expand All @@ -317,9 +337,13 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
(f.lf.cdef_lpf_line[2] as isize + cmp::min(uv_span, 0)) as usize;
backup_lpf::<BD>(
c,
&f.lf.cdef_line_buf,
cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize,
src[2].stride(),
WithOffset {
data: WithStride {
buf: &f.lf.cdef_line_buf,
stride: src[2].stride(),
},
offset: cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize,
},
src[2] - (offset_uv as isize * src[2].pixel_stride::<BD>()),
ss_ver,
seq_hdr.sb128,
Expand Down
46 changes: 30 additions & 16 deletions src/mc.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![deny(unsafe_op_in_unsafe_fn)]

use crate::include::common::bitdepth::AsPrimitive;
use crate::include::common::bitdepth::BitDepth;
use crate::include::common::bitdepth::DynPixel;
Expand All @@ -7,6 +9,7 @@ use crate::include::dav1d::headers::Rav1dFilterMode;
use crate::include::dav1d::headers::Rav1dPixelLayoutSubSampled;
use crate::include::dav1d::picture::Rav1dPictureDataComponent;
use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset;
use crate::src::align::AlignedVec64;
use crate::src::cpu::CpuFlags;
use crate::src::enum_map::enum_map;
use crate::src::enum_map::enum_map_ty;
Expand All @@ -18,11 +21,13 @@ use crate::src::internal::SCRATCH_INTER_INTRA_BUF_LEN;
use crate::src::internal::SCRATCH_LAP_LEN;
use crate::src::internal::SEG_MASK_LEN;
use crate::src::levels::Filter2d;
use crate::src::pic_or_buf::PicOrBuf;
use crate::src::strided::Strided as _;
use crate::src::tables::dav1d_mc_subpel_filters;
use crate::src::tables::dav1d_mc_warp_filter;
use crate::src::tables::dav1d_obmc_masks;
use crate::src::tables::dav1d_resize_filter;
use crate::src::with_offset::WithOffset;
use crate::src::wrap_fn_ptr::wrap_fn_ptr;
use std::cmp;
use std::ffi::c_int;
Expand Down Expand Up @@ -988,9 +993,8 @@ fn emu_edge_rust<BD: BitDepth>(
}
}

unsafe fn resize_rust<BD: BitDepth>(
dst: *mut BD::Pixel,
dst_stride: isize,
fn resize_rust<BD: BitDepth>(
dst: WithOffset<PicOrBuf<AlignedVec64<u8>>>,
src: Rav1dPictureDataComponentOffset,
dst_w: usize,
h: usize,
Expand All @@ -1003,10 +1007,13 @@ unsafe fn resize_rust<BD: BitDepth>(
for y in 0..h {
let mut mx = mx0;
let mut src_x = -1 - 3;
let dst = dst.offset(y as isize * BD::pxstride(dst_stride));
let dst = dst + (y as isize * dst.pixel_stride::<BD>());
let src = src + (y as isize * src.pixel_stride::<BD>());
let src = &*src.slice::<BD>(src_w);
let dst = slice::from_raw_parts_mut(dst, dst_w);
let dst = match dst.data {
PicOrBuf::Pic(pic) => &mut *pic.slice_mut::<BD, _>((dst.offset.., ..dst_w)),
PicOrBuf::Buf(buf) => &mut *buf.mut_slice_as((dst.offset.., ..dst_w)),
};
Comment on lines +1013 to +1016
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to move this logic into a slice_mut method on PicOrBuf? I expect that this pattern of matching on the enum and then doing the same slice operation on each variant would be a common usage of this type. If you don't add it in this PR I'd likely end up doing so in #1239.

Copy link
Collaborator Author

@kkysen kkysen Jul 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, because this uses lifetime extension and the guard types are not the same, only the lifetime extended slices from dereferencing the guards. Lifetime extension doesn't work through functions, so only a macro would work here, and I don't think that's worth it (Maybe it could? What do you think? Postfix macros would be nice here.). I tried a while to get this kind of thing to work until I realized that I'm pretty sure it's impossible. I guess a closure could also work, but that's unwieldy.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nah, I didn't catch that this was doing lifetime extension and that we had different guard types. So doing the match locally makes sense, I don't think a macro is worth it here

for dst_x in 0..dst_w {
let f = &dav1d_resize_filter[(mx >> 8) as usize];
dst[dst_x] = bd.iclip_pixel(
Expand Down Expand Up @@ -1496,13 +1503,13 @@ wrap_fn_ptr!(pub unsafe extern "C" fn resize(
mx: i32,
bitdepth_max: i32,
_src: *const FFISafe<Rav1dPictureDataComponentOffset>,
_dst: *const FFISafe<WithOffset<PicOrBuf<AlignedVec64<u8>>>>,
) -> ());

impl resize::Fn {
pub unsafe fn call<BD: BitDepth>(
pub fn call<BD: BitDepth>(
&self,
dst: *mut BD::Pixel,
dst_stride: isize,
dst: WithOffset<PicOrBuf<AlignedVec64<u8>>>,
src: Rav1dPictureDataComponentOffset,
dst_w: usize,
h: usize,
Expand All @@ -1511,17 +1518,22 @@ impl resize::Fn {
mx: i32,
bd: BD,
) {
let dst = dst.cast();
let dst_ptr = dst.as_mut_ptr::<BD>().cast();
let dst_stride = dst.stride();
let src_ptr = src.as_ptr::<BD>().cast();
let src_stride = src.stride();
let dst_w = dst_w as c_int;
let h = h as c_int;
let src_w = src_w as c_int;
let bd = bd.into_c();
let src = FFISafe::new(&src);
self.get()(
dst, dst_stride, src_ptr, src_stride, dst_w, h, src_w, dx, mx, bd, src,
)
let dst = FFISafe::new(&dst);
// SAFETY: Fallback `fn resize_rust` is safe; asm is supposed to do the same.
unsafe {
self.get()(
dst_ptr, dst_stride, src_ptr, src_stride, dst_w, h, src_w, dx, mx, bd, src, dst,
)
}
}
}

Expand Down Expand Up @@ -1915,8 +1927,8 @@ unsafe extern "C" fn emu_edge_c_erased<BD: BitDepth>(
}

unsafe extern "C" fn resize_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
dst_stride: isize,
_dst_ptr: *mut DynPixel,
_dst_stride: isize,
_src_ptr: *const DynPixel,
_src_stride: isize,
dst_w: i32,
Expand All @@ -1926,15 +1938,17 @@ unsafe extern "C" fn resize_c_erased<BD: BitDepth>(
mx0: i32,
bitdepth_max: i32,
src: *const FFISafe<Rav1dPictureDataComponentOffset>,
dst: *const FFISafe<WithOffset<PicOrBuf<AlignedVec64<u8>>>>,
) {
let dst = dst.cast();
// SAFETY: Was passed as `FFISafe::new(_)` in `resize::Fn::call`.
let dst = *unsafe { FFISafe::get(dst) };
// SAFETY: Was passed as `FFISafe::new(_)` in `resize::Fn::call`.
let src = *unsafe { FFISafe::get(src) };
let dst_w = dst_w as usize;
let h = h as usize;
let src_w = src_w as usize;
let bd = BD::from_c(bitdepth_max);
resize_rust(dst, dst_stride, src, dst_w, h, src_w, dx, mx0, bd)
resize_rust(dst, src, dst_w, h, src_w, dx, mx0, bd)
}

impl Rav1dMCDSPContext {
Expand Down
Loading
Loading