diff --git a/src/lf_apply.rs b/src/lf_apply.rs index c39ba36b8..b7ed2fac1 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -12,21 +12,21 @@ use crate::src::lr_apply::LR_RESTORE_V; use crate::src::lr_apply::LR_RESTORE_Y; use crate::src::relaxed_atomic::RelaxedAtomic; use crate::src::strided::Strided as _; -use libc::ptrdiff_t; +use crate::src::strided::WithStride; +use crate::src::with_offset::WithOffset; use std::array; use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; -// The loop filter buffer stores 12 rows of pixels. A superblock block will -// contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above -// and 2 below) the final 4 rows are used to swap the bottom of the last -// stripe with the top of the next super block row. -unsafe fn backup_lpf( +/// The loop filter buffer stores 12 rows of pixels. +/// A superblock block will contain at most 2 stripes. +/// Each stripe requires 4 rows pixels (2 above and 2 below). +/// The final 4 rows are used to swap the bottom of +/// the last stripe with the top of the next super block row. +fn backup_lpf( c: &Rav1dContext, - dst: &DisjointMut>, - mut dst_offset: usize, // in pixel units - dst_stride: ptrdiff_t, + mut dst: WithOffset>>>, mut src: Rav1dPictureDataComponentOffset, ss_ver: c_int, sb128: u8, @@ -56,35 +56,36 @@ unsafe fn backup_lpf( if c.tc.len() == 1 { if row != 0 { let top = 4 << sb128; - let px_abs_stride = BD::pxstride(dst_stride.unsigned_abs()); + let px_abs_stride = dst.pixel_stride::().unsigned_abs(); let top_size = top * px_abs_stride; // Copy the top part of the stored loop filtered pixels from the // previous sb row needed above the first stripe of this sb row. - let (dst_idx, src_idx) = if dst_stride < 0 { + let (dst_idx, src_idx) = if dst.stride() < 0 { ( - dst_offset - 3 * px_abs_stride, - dst_offset - top_size - 3 * px_abs_stride, + dst.offset - 3 * px_abs_stride, + dst.offset - top_size - 3 * px_abs_stride, ) } else { - (dst_offset, dst_offset + top_size) + (dst.offset, dst.offset + top_size) }; for i in 0..4 { BD::pixel_copy( - &mut dst.mut_slice_as((dst_idx + i * px_abs_stride.., ..dst_w)), - &dst.slice_as((src_idx + i * px_abs_stride.., ..dst_w)), + &mut dst + .data + .mut_slice_as((dst_idx + i * px_abs_stride.., ..dst_w)), + &dst.data.slice_as((src_idx + i * px_abs_stride.., ..dst_w)), dst_w, ); } } - dst_offset = (dst_offset as isize + 4 * BD::pxstride(dst_stride)) as usize; + dst += 4 * dst.pixel_stride::(); } if lr_backup != 0 && frame_hdr.size.width[0] != frame_hdr.size.width[1] { while row + stripe_h <= row_h { let n_lines = 4 - (row + stripe_h + 1 == h) as c_int; dsp.mc.resize.call::( - dst.mut_slice_as((dst_offset.., ..dst_w)).as_mut_ptr(), - dst_stride, + WithOffset::buf(dst), src, dst_w, n_lines as usize, @@ -96,48 +97,47 @@ unsafe fn backup_lpf( row += stripe_h; // unmodified stripe_h for the 1st stripe stripe_h = 64 >> ss_ver; src += stripe_h as isize * src.pixel_stride::(); - dst_offset = - (dst_offset as isize + n_lines as isize * BD::pxstride(dst_stride)) as usize; + dst += n_lines as isize * dst.pixel_stride::(); if n_lines == 3 { - let dst_abs_px_stride = BD::pxstride(dst_stride.unsigned_abs()); - let (src_idx, dst_idx) = if dst_stride < 0 { - (dst_offset + dst_abs_px_stride, dst_offset) + let dst_abs_px_stride = dst.pixel_stride::().unsigned_abs(); + let (src_idx, dst_idx) = if dst.stride() < 0 { + (dst.offset + dst_abs_px_stride, dst.offset) } else { - (dst_offset - dst_abs_px_stride, dst_offset) + (dst.offset - dst_abs_px_stride, dst.offset) }; BD::pixel_copy( - &mut dst.mut_slice_as((dst_idx.., ..dst_w)), - &dst.slice_as((src_idx.., ..dst_w)), + &mut dst.data.mut_slice_as((dst_idx.., ..dst_w)), + &dst.data.slice_as((src_idx.., ..dst_w)), dst_w, ); - dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize; + dst += dst.pixel_stride::(); } } } else { while row + stripe_h <= row_h { let n_lines = 4 - (row + stripe_h + 1 == h) as c_int; for i in 0..4 { - let dst_abs_px_stride = BD::pxstride(dst_stride.unsigned_abs()); + let dst_abs_px_stride = dst.pixel_stride::().unsigned_abs(); if i != n_lines { BD::pixel_copy( - &mut dst.mut_slice_as((dst_offset.., ..src_w)), + &mut dst.data.mut_slice_as((dst.offset.., ..src_w)), &src.slice::(src_w), src_w, ); } else { - let (src_idx, dst_idx) = if dst_stride < 0 { - (dst_offset + dst_abs_px_stride, dst_offset) + let (src_idx, dst_idx) = if dst.stride() < 0 { + (dst.offset + dst_abs_px_stride, dst.offset) } else { - (dst_offset - dst_abs_px_stride, dst_offset) + (dst.offset - dst_abs_px_stride, dst.offset) }; BD::pixel_copy( - &mut dst.mut_slice_as((dst_idx.., ..src_w)), - &dst.slice_as((src_idx.., ..src_w)), + &mut dst.data.mut_slice_as((dst_idx.., ..src_w)), + &dst.data.slice_as((src_idx.., ..src_w)), src_w, ) } - dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize; + dst += dst.pixel_stride::(); src += src.pixel_stride::(); } row += stripe_h; // unmodified stripe_h for the 1st stripe @@ -147,7 +147,7 @@ unsafe fn backup_lpf( }; } -pub(crate) unsafe fn rav1d_copy_lpf( +pub(crate) fn rav1d_copy_lpf( c: &Rav1dContext, f: &Rav1dFrameData, src: [Rav1dPictureDataComponentOffset; 3], @@ -180,9 +180,13 @@ pub(crate) unsafe fn rav1d_copy_lpf( if restore_planes & LR_RESTORE_Y as c_int != 0 || resize == 0 { backup_lpf::( c, - &f.lf.lr_line_buf, - dst[0].offset, - dst[0].stride(), + WithOffset { + data: WithStride { + buf: &f.lf.lr_line_buf, + stride: dst[0].stride(), + }, + offset: dst[0].offset, + }, src[0] - (offset_y as isize * src[0].pixel_stride::()), 0, seq_hdr.sb128, @@ -206,9 +210,13 @@ pub(crate) unsafe fn rav1d_copy_lpf( let cdef_line_start = (f.lf.cdef_lpf_line[0] as isize + cmp::min(y_span, 0)) as usize; backup_lpf::( c, - &f.lf.cdef_line_buf, - cdef_line_start + (cdef_off_y - cmp::min(y_span, 0)) as usize, - src[0].stride(), + WithOffset { + data: WithStride { + buf: &f.lf.cdef_line_buf, + stride: src[0].stride(), + }, + offset: cdef_line_start + (cdef_off_y - cmp::min(y_span, 0)) as usize, + }, src[0] - (offset_y as isize * src[0].pixel_stride::()), 0, seq_hdr.sb128, @@ -241,9 +249,13 @@ pub(crate) unsafe fn rav1d_copy_lpf( if restore_planes & LR_RESTORE_U as c_int != 0 || resize == 0 { backup_lpf::( c, - &f.lf.lr_line_buf, - dst[1].offset, - dst[1].stride(), + WithOffset { + data: WithStride { + buf: &f.lf.lr_line_buf, + stride: dst[1].stride(), + }, + offset: dst[1].offset, + }, src[1] - (offset_uv as isize * src[1].pixel_stride::()), ss_ver, seq_hdr.sb128, @@ -267,9 +279,13 @@ pub(crate) unsafe fn rav1d_copy_lpf( (f.lf.cdef_lpf_line[1] as isize + cmp::min(uv_span, 0)) as usize; backup_lpf::( c, - &f.lf.cdef_line_buf, - cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize, - src[1].stride(), + WithOffset { + data: WithStride { + buf: &f.lf.cdef_line_buf, + stride: src[1].stride(), + }, + offset: cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize, + }, src[1] - (offset_uv as isize * src[1].pixel_stride::()), ss_ver, seq_hdr.sb128, @@ -291,9 +307,13 @@ pub(crate) unsafe fn rav1d_copy_lpf( if restore_planes & LR_RESTORE_V as c_int != 0 || resize == 0 { backup_lpf::( c, - &f.lf.lr_line_buf, - dst[2].offset, - dst[2].stride(), + WithOffset { + data: WithStride { + buf: &f.lf.lr_line_buf, + stride: dst[2].stride(), + }, + offset: dst[2].offset, + }, src[2] - (offset_uv as isize * src[2].pixel_stride::()), ss_ver, seq_hdr.sb128, @@ -317,9 +337,13 @@ pub(crate) unsafe fn rav1d_copy_lpf( (f.lf.cdef_lpf_line[2] as isize + cmp::min(uv_span, 0)) as usize; backup_lpf::( c, - &f.lf.cdef_line_buf, - cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize, - src[2].stride(), + WithOffset { + data: WithStride { + buf: &f.lf.cdef_line_buf, + stride: src[2].stride(), + }, + offset: cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize, + }, src[2] - (offset_uv as isize * src[2].pixel_stride::()), ss_ver, seq_hdr.sb128, diff --git a/src/mc.rs b/src/mc.rs index 870025792..7834afad3 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -1,3 +1,5 @@ +#![deny(unsafe_op_in_unsafe_fn)] + use crate::include::common::bitdepth::AsPrimitive; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::DynPixel; @@ -7,6 +9,7 @@ use crate::include::dav1d::headers::Rav1dFilterMode; use crate::include::dav1d::headers::Rav1dPixelLayoutSubSampled; use crate::include::dav1d::picture::Rav1dPictureDataComponent; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; +use crate::src::align::AlignedVec64; use crate::src::cpu::CpuFlags; use crate::src::enum_map::enum_map; use crate::src::enum_map::enum_map_ty; @@ -18,11 +21,13 @@ use crate::src::internal::SCRATCH_INTER_INTRA_BUF_LEN; use crate::src::internal::SCRATCH_LAP_LEN; use crate::src::internal::SEG_MASK_LEN; use crate::src::levels::Filter2d; +use crate::src::pic_or_buf::PicOrBuf; use crate::src::strided::Strided as _; use crate::src::tables::dav1d_mc_subpel_filters; use crate::src::tables::dav1d_mc_warp_filter; use crate::src::tables::dav1d_obmc_masks; use crate::src::tables::dav1d_resize_filter; +use crate::src::with_offset::WithOffset; use crate::src::wrap_fn_ptr::wrap_fn_ptr; use std::cmp; use std::ffi::c_int; @@ -988,9 +993,8 @@ fn emu_edge_rust( } } -unsafe fn resize_rust( - dst: *mut BD::Pixel, - dst_stride: isize, +fn resize_rust( + dst: WithOffset>>, src: Rav1dPictureDataComponentOffset, dst_w: usize, h: usize, @@ -1003,10 +1007,13 @@ unsafe fn resize_rust( for y in 0..h { let mut mx = mx0; let mut src_x = -1 - 3; - let dst = dst.offset(y as isize * BD::pxstride(dst_stride)); + let dst = dst + (y as isize * dst.pixel_stride::()); let src = src + (y as isize * src.pixel_stride::()); let src = &*src.slice::(src_w); - let dst = slice::from_raw_parts_mut(dst, dst_w); + let dst = match dst.data { + PicOrBuf::Pic(pic) => &mut *pic.slice_mut::((dst.offset.., ..dst_w)), + PicOrBuf::Buf(buf) => &mut *buf.mut_slice_as((dst.offset.., ..dst_w)), + }; for dst_x in 0..dst_w { let f = &dav1d_resize_filter[(mx >> 8) as usize]; dst[dst_x] = bd.iclip_pixel( @@ -1496,13 +1503,13 @@ wrap_fn_ptr!(pub unsafe extern "C" fn resize( mx: i32, bitdepth_max: i32, _src: *const FFISafe, + _dst: *const FFISafe>>>, ) -> ()); impl resize::Fn { - pub unsafe fn call( + pub fn call( &self, - dst: *mut BD::Pixel, - dst_stride: isize, + dst: WithOffset>>, src: Rav1dPictureDataComponentOffset, dst_w: usize, h: usize, @@ -1511,7 +1518,8 @@ impl resize::Fn { mx: i32, bd: BD, ) { - let dst = dst.cast(); + let dst_ptr = dst.as_mut_ptr::().cast(); + let dst_stride = dst.stride(); let src_ptr = src.as_ptr::().cast(); let src_stride = src.stride(); let dst_w = dst_w as c_int; @@ -1519,9 +1527,13 @@ impl resize::Fn { let src_w = src_w as c_int; let bd = bd.into_c(); let src = FFISafe::new(&src); - self.get()( - dst, dst_stride, src_ptr, src_stride, dst_w, h, src_w, dx, mx, bd, src, - ) + let dst = FFISafe::new(&dst); + // SAFETY: Fallback `fn resize_rust` is safe; asm is supposed to do the same. + unsafe { + self.get()( + dst_ptr, dst_stride, src_ptr, src_stride, dst_w, h, src_w, dx, mx, bd, src, dst, + ) + } } } @@ -1915,8 +1927,8 @@ unsafe extern "C" fn emu_edge_c_erased( } unsafe extern "C" fn resize_c_erased( - dst: *mut DynPixel, - dst_stride: isize, + _dst_ptr: *mut DynPixel, + _dst_stride: isize, _src_ptr: *const DynPixel, _src_stride: isize, dst_w: i32, @@ -1926,15 +1938,17 @@ unsafe extern "C" fn resize_c_erased( mx0: i32, bitdepth_max: i32, src: *const FFISafe, + dst: *const FFISafe>>>, ) { - let dst = dst.cast(); + // SAFETY: Was passed as `FFISafe::new(_)` in `resize::Fn::call`. + let dst = *unsafe { FFISafe::get(dst) }; // SAFETY: Was passed as `FFISafe::new(_)` in `resize::Fn::call`. let src = *unsafe { FFISafe::get(src) }; let dst_w = dst_w as usize; let h = h as usize; let src_w = src_w as usize; let bd = BD::from_c(bitdepth_max); - resize_rust(dst, dst_stride, src, dst_w, h, src_w, dx, mx0, bd) + resize_rust(dst, src, dst_w, h, src_w, dx, mx0, bd) } impl Rav1dMCDSPContext { diff --git a/src/pic_or_buf.rs b/src/pic_or_buf.rs index 149f8dd1e..a27923cf1 100644 --- a/src/pic_or_buf.rs +++ b/src/pic_or_buf.rs @@ -4,6 +4,7 @@ use crate::src::disjoint_mut::DisjointMut; use crate::src::pixels::Pixels; use crate::src::strided::Strided; use crate::src::strided::WithStride; +use crate::src::with_offset::WithOffset; pub enum PicOrBuf<'a, T: AsMutPtr> { Pic(&'a Rav1dPictureDataComponent), @@ -43,3 +44,19 @@ impl<'a, T: AsMutPtr> Strided for PicOrBuf<'a, T> { } } } + +impl<'a, T: AsMutPtr> WithOffset> { + pub fn pic(pic: WithOffset<&'a Rav1dPictureDataComponent>) -> Self { + Self { + data: PicOrBuf::Pic(pic.data), + offset: pic.offset, + } + } + + pub fn buf(buf: WithOffset>>) -> Self { + Self { + data: PicOrBuf::Buf(buf.data), + offset: buf.offset, + } + } +} diff --git a/src/recon.rs b/src/recon.rs index ec9f13c26..2a5e36e70 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -86,6 +86,7 @@ use crate::src::tables::dav1d_txtp_from_uvmode; use crate::src::tables::TxfmInfo; use crate::src::wedge::dav1d_ii_masks; use crate::src::wedge::dav1d_wedge_masks; +use crate::src::with_offset::WithOffset; use assert_matches::debug_assert_matches; use libc::intptr_t; use std::array; @@ -3714,7 +3715,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_cdef( rav1d_cdef_brow::(c, tc, f, p, mask_offset, start, end, false, sby); } -pub(crate) unsafe fn rav1d_filter_sbrow_resize( +pub(crate) fn rav1d_filter_sbrow_resize( _c: &Rav1dContext, f: &Rav1dFrameData, _t: &mut Rav1dTaskContext, @@ -3741,8 +3742,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_resize( let img_h = f.cur.p.h - sbsz * 4 * sby + ss_ver >> ss_ver; f.dsp.mc.resize.call::( - dst.as_mut_ptr::(), - dst.stride(), + WithOffset::pic(dst), src, dst_w as usize, (cmp::min(img_h, h_end) + h_start) as usize,