From 8a433b40cee5adea11e1a23165df699f9b6e98c0 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Thu, 29 Feb 2024 14:53:55 -0800 Subject: [PATCH] `Rav1dFrameContext_lf::cdef_line`: Convert pointers to offsets --- include/common/bitdepth.rs | 35 +++++++++++++++++++ src/cdef_apply.rs | 70 +++++++++++++++++++++----------------- src/decode.rs | 45 ++++++++++++++++-------- src/internal.rs | 8 ++--- src/recon.rs | 4 +-- 5 files changed, 110 insertions(+), 52 deletions(-) diff --git a/include/common/bitdepth.rs b/include/common/bitdepth.rs index 4970f02a5..c9c785026 100644 --- a/include/common/bitdepth.rs +++ b/include/common/bitdepth.rs @@ -12,6 +12,7 @@ use std::ops::Div; use std::ops::Mul; use std::ops::Rem; use std::ops::Shr; +use std::slice; use to_method::To as _; pub trait FromPrimitive { @@ -96,6 +97,16 @@ impl BPC { Self::BPC16 } } + + pub fn pxstride(self, n: T) -> T + where + T: Copy + From + Div, + { + match self { + BPC::BPC8 => n, + BPC::BPC16 => n / 2.into(), + } + } } pub trait BitDepth: Clone + Copy { @@ -193,6 +204,30 @@ pub trait BitDepth: Clone + Copy { fn get_intermediate_bits(&self) -> u8; + fn cast_pixel_slice(bytes: &[u8]) -> &[Self::Pixel] { + let len = Self::pxstride(bytes.len()); + + assert!(bytes.len() % len == 0); + assert!(bytes.as_ptr() as usize % mem::align_of::() == 0); + + // SAFETY: We've checked that alignment and the number of elements is + // correct, and the new length returned by `pxstride` will either be + // `len` or `len / 2`, which is guaranteed to be in bounds. + unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), len) } + } + + fn cast_pixel_slice_mut(bytes: &mut [u8]) -> &mut [Self::Pixel] { + let len = Self::pxstride(bytes.len()); + + assert!(bytes.len() % len == 0); + assert!(bytes.as_ptr() as usize % mem::align_of::() == 0); + + // SAFETY: We've checked that alignment and the number of elements is + // correct, and the new length returned by `pxstride` will either be + // `len` or `len / 2`, which is guaranteed to be in bounds. + unsafe { slice::from_raw_parts_mut(bytes.as_mut_ptr().cast(), len) } + } + const PREP_BIAS: i16; unsafe fn select(bd: &BitDepthUnion) -> &T::T diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 9d28304b9..9b197d422 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -33,7 +33,8 @@ impl Backup2x8Flags { } unsafe fn backup2lines( - dst: &[*mut BD::Pixel; 3], + dst_buf: &mut [BD::Pixel], + dst_off: [usize; 3], src: &[*mut BD::Pixel; 3], stride: &[ptrdiff_t; 2], layout: Rav1dPixelLayout, @@ -42,13 +43,13 @@ unsafe fn backup2lines( let len = 2 * y_stride.unsigned_abs(); if y_stride < 0 { BD::pixel_copy( - slice::from_raw_parts_mut(dst[0].offset(y_stride), len), + &mut dst_buf[dst_off[0].wrapping_add_signed(y_stride)..][..len], slice::from_raw_parts(src[0].offset(7 * y_stride), len), len, ); } else { BD::pixel_copy( - slice::from_raw_parts_mut(dst[0], len), + &mut dst_buf[dst_off[0]..][..len], slice::from_raw_parts(src[0].offset(6 * y_stride), len), len, ); @@ -65,12 +66,12 @@ unsafe fn backup2lines( }; BD::pixel_copy( - slice::from_raw_parts_mut(dst[1].offset(uv_stride), len), + &mut dst_buf[dst_off[1].wrapping_add_signed(uv_stride)..][..len], slice::from_raw_parts(src[1].offset(uv_off * uv_stride), len), len, ); BD::pixel_copy( - slice::from_raw_parts_mut(dst[2].offset(uv_stride), len), + &mut dst_buf[dst_off[2].wrapping_add_signed(uv_stride)..][..len], slice::from_raw_parts(src[2].offset(uv_off * uv_stride), len), len, ); @@ -82,12 +83,12 @@ unsafe fn backup2lines( }; BD::pixel_copy( - slice::from_raw_parts_mut(dst[1], len), + &mut dst_buf[dst_off[1]..][..len], slice::from_raw_parts(src[1].offset(uv_off * uv_stride), len), len, ); BD::pixel_copy( - slice::from_raw_parts_mut(dst[2], len), + &mut dst_buf[dst_off[2]..][..len], slice::from_raw_parts(src[2].offset(uv_off * uv_stride), len), len, ); @@ -157,7 +158,7 @@ fn adjust_strength(strength: c_int, var: c_uint) -> c_int { pub(crate) unsafe fn rav1d_cdef_brow( c: &Rav1dContext, tc: &mut Rav1dTaskContext, - f: &Rav1dFrameData, + f: &mut Rav1dFrameData, p: &[*mut BD::Pixel; 3], lflvl_offset: i32, by_start: c_int, @@ -194,6 +195,8 @@ pub(crate) unsafe fn rav1d_cdef_brow( let y_stride: ptrdiff_t = BD::pxstride(f.cur.stride[0]); let uv_stride: ptrdiff_t = BD::pxstride(f.cur.stride[1]); + let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf); + let mut bit = false; for by in (by_start..by_end).step_by(2) { let tf = tc.top_pre_cdef_toggle != 0; @@ -206,15 +209,15 @@ pub(crate) unsafe fn rav1d_cdef_brow( && edges.contains(CdefEdgeFlags::HAVE_BOTTOM) { // backup pre-filter data for next iteration - let cdef_top_bak: [*mut BD::Pixel; 3] = [ - (f.lf.cdef_line[!tf as usize][0] as *mut BD::Pixel) - .offset(have_tt as isize * sby as isize * 4 * y_stride), - (f.lf.cdef_line[!tf as usize][1] as *mut BD::Pixel) - .offset(have_tt as isize * sby as isize * 8 * uv_stride), - (f.lf.cdef_line[!tf as usize][2] as *mut BD::Pixel) - .offset(have_tt as isize * sby as isize * 8 * uv_stride), + let cdef_top_bak = [ + f.lf.cdef_line[!tf as usize][0] + .wrapping_add_signed(have_tt as isize * sby as isize * 4 * y_stride), + f.lf.cdef_line[!tf as usize][1] + .wrapping_add_signed(have_tt as isize * sby as isize * 8 * uv_stride), + f.lf.cdef_line[!tf as usize][2] + .wrapping_add_signed(have_tt as isize * sby as isize * 8 * uv_stride), ]; - backup2lines::(&cdef_top_bak, &ptrs, &f.cur.stride, layout); + backup2lines::(cdef_line_buf, cdef_top_bak, &ptrs, &f.cur.stride, layout); } let mut lr_bak = @@ -327,9 +330,11 @@ pub(crate) unsafe fn rav1d_cdef_brow( bot = bptrs[0].offset(8 * y_stride as isize); st_y = false; } else if !sbrow_start && by + 2 >= by_end { - top = f.lf.cdef_line[tf as usize][0] - .cast::() - .offset((sby * 4) as isize * y_stride + (bx * 4) as isize); + offset = (sby * 4) as isize * y_stride + (bx * 4) as isize; + top = cdef_line_buf + .as_mut_ptr() + .add(f.lf.cdef_line[tf as usize][0]) + .offset(offset); if resize { offset = (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize; bot = f.lf.cdef_lpf_line[0].cast::().offset(offset); @@ -344,10 +349,12 @@ pub(crate) unsafe fn rav1d_cdef_brow( } if st_y { - offset = (sby * 4) as isize * y_stride; - top = f.lf.cdef_line[tf as usize][0] - .cast::() - .offset(have_tt as isize * offset + (bx * 4) as isize); + offset = have_tt as isize * (sby * 4) as isize * y_stride + + (bx * 4) as isize; + top = cdef_line_buf + .as_mut_ptr() + .add(f.lf.cdef_line[tf as usize][0]) + .offset(offset); bot = bptrs[0].offset(8 * y_stride as isize); } @@ -415,8 +422,9 @@ pub(crate) unsafe fn rav1d_cdef_brow( } else if !sbrow_start && by + 2 >= by_end { let top_offset: ptrdiff_t = (sby * 8) as isize * uv_stride + (bx * 4 >> ss_hor) as isize; - top = f.lf.cdef_line[tf as usize][pl] - .cast::() + top = cdef_line_buf + .as_mut_ptr() + .add(f.lf.cdef_line[tf as usize][pl]) .offset(top_offset); if resize { offset = (sby * 4 + 2) as isize * uv_stride @@ -437,12 +445,12 @@ pub(crate) unsafe fn rav1d_cdef_brow( } if st_uv { - let offset_0 = (sby * 8) as isize * uv_stride; - top = - f.lf.cdef_line[tf as usize][pl].cast::().offset( - have_tt as isize * offset_0 - + (bx * 4 >> ss_hor) as isize, - ); + let offset = have_tt as isize * (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize; + top = cdef_line_buf + .as_mut_ptr() + .add(f.lf.cdef_line[tf as usize][pl]) + .offset(offset); bot = bptrs[pl].offset((8 >> ss_ver) * uv_stride); } diff --git a/src/decode.rs b/src/decode.rs index 5adc42b79..ae8041d32 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -3,6 +3,7 @@ use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynCoef; use crate::include::common::bitdepth::DynPixel; +use crate::include::common::bitdepth::BPC; use crate::include::common::intops::apply_sign64; use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; @@ -4394,29 +4395,43 @@ pub(crate) unsafe fn rav1d_decode_frame_init( // failure: // f.lf.cdef_buf_plane_sz = [0, 0]; f.lf.cdef_line_buf.resize(alloc_sz, 0); - let mut ptr = f.lf.cdef_line_buf.as_mut_ptr(); - ptr = ptr.offset(32); + let bpc = BPC::from_bitdepth_max(f.bitdepth_max); + let y_stride_px = bpc.pxstride(f.cur.stride[0]); + let uv_stride_px = bpc.pxstride(f.cur.stride[1]); + + let mut offset = bpc.pxstride(32usize); if y_stride < 0 { - f.lf.cdef_line[0][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel; - f.lf.cdef_line[1][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 3))) as *mut DynPixel; + f.lf.cdef_line[0][0] = + offset.wrapping_add_signed(-(y_stride_px * (f.sbh as isize * 4 - 1))); + f.lf.cdef_line[1][0] = + offset.wrapping_add_signed(-(y_stride_px * (f.sbh as isize * 4 - 3))); } else { - f.lf.cdef_line[0][0] = ptr.offset(y_stride * 0) as *mut DynPixel; - f.lf.cdef_line[1][0] = ptr.offset(y_stride * 2) as *mut DynPixel; + f.lf.cdef_line[0][0] = offset.wrapping_add_signed(y_stride_px * 0); + f.lf.cdef_line[1][0] = offset.wrapping_add_signed(y_stride_px * 2); } - ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4); + offset = offset.wrapping_add_signed(y_stride_px.abs() * f.sbh as isize * 4); if uv_stride < 0 { - f.lf.cdef_line[0][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel; - f.lf.cdef_line[0][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 3))) as *mut DynPixel; - f.lf.cdef_line[1][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 5))) as *mut DynPixel; - f.lf.cdef_line[1][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 7))) as *mut DynPixel; + f.lf.cdef_line[0][1] = + offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 1))); + f.lf.cdef_line[0][2] = + offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 3))); + f.lf.cdef_line[1][1] = + offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 5))); + f.lf.cdef_line[1][2] = + offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 7))); } else { - f.lf.cdef_line[0][1] = ptr.offset(uv_stride * 0) as *mut DynPixel; - f.lf.cdef_line[0][2] = ptr.offset(uv_stride * 2) as *mut DynPixel; - f.lf.cdef_line[1][1] = ptr.offset(uv_stride * 4) as *mut DynPixel; - f.lf.cdef_line[1][2] = ptr.offset(uv_stride * 6) as *mut DynPixel; + f.lf.cdef_line[0][1] = offset.wrapping_add_signed(uv_stride_px * 0); + f.lf.cdef_line[0][2] = offset.wrapping_add_signed(uv_stride_px * 2); + f.lf.cdef_line[1][1] = offset.wrapping_add_signed(uv_stride_px * 4); + f.lf.cdef_line[1][2] = offset.wrapping_add_signed(uv_stride_px * 6); } + let mut ptr = + f.lf.cdef_line_buf + .as_mut_ptr() + .add(32) + .offset(y_stride.abs() * f.sbh as isize * 4); if need_cdef_lpf_copy != 0 { ptr = ptr.offset(uv_stride.abs() * f.sbh as isize * 8); if y_stride < 0 { diff --git a/src/internal.rs b/src/internal.rs index f070472ff..61fb0c87b 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -323,7 +323,7 @@ pub(crate) struct Rav1dFrameContext_bd_fn { pub filter_sbrow_deblock_cols: filter_sbrow_fn, pub filter_sbrow_deblock_rows: filter_sbrow_fn, pub filter_sbrow_cdef: - unsafe fn(&Rav1dContext, &Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (), + unsafe fn(&Rav1dContext, &mut Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (), pub filter_sbrow_resize: filter_sbrow_fn, pub filter_sbrow_lr: filter_sbrow_fn, pub backup_ipred_edge: backup_ipred_edge_fn, @@ -435,9 +435,9 @@ pub struct Rav1dFrameContext_lf { pub tx_lpf_right_edge: Vec, /* len = h*2 */ pub cdef_line_buf: AlignedVec32, /* AlignedVec32 */ pub lr_line_buf: *mut u8, - pub cdef_line: [[*mut DynPixel; 3]; 2], /* [2 pre/post][3 plane] */ - pub cdef_lpf_line: [*mut DynPixel; 3], /* plane */ - pub lr_lpf_line: [*mut DynPixel; 3], /* plane */ + pub cdef_line: [[usize; 3]; 2], /* [2 pre/post][3 plane] */ + pub cdef_lpf_line: [*mut DynPixel; 3], /* plane */ + pub lr_lpf_line: [*mut DynPixel; 3], /* plane */ // in-loop filter per-frame state keeping pub start_of_tile_row: *mut u8, diff --git a/src/recon.rs b/src/recon.rs index 69df46acd..aed76e2ea 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -4568,7 +4568,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( pub(crate) unsafe fn rav1d_filter_sbrow_cdef( c: &Rav1dContext, - f: &Rav1dFrameData, + f: &mut Rav1dFrameData, tc: &mut Rav1dTaskContext, sby: c_int, ) { @@ -4721,10 +4721,10 @@ pub(crate) unsafe fn rav1d_filter_sbrow( rav1d_filter_sbrow_deblock_cols::(c, f, t, sby); rav1d_filter_sbrow_deblock_rows::(c, f, t, sby); let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); - let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); if seq_hdr.cdef != 0 { rav1d_filter_sbrow_cdef::(c, f, t, sby); } + let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); if frame_hdr.size.width[0] != frame_hdr.size.width[1] { rav1d_filter_sbrow_resize::(c, f, t, sby); }