Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

struct Rav1dFrameContext_lf::cdef_line: Convert pointers to offsets #777

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions include/common/bitdepth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::ops::Div;
use std::ops::Mul;
use std::ops::Rem;
use std::ops::Shr;
use std::slice;

pub trait FromPrimitive<T> {
fn from_prim(t: T) -> Self;
Expand Down Expand Up @@ -207,6 +208,34 @@ pub trait BitDepth: Clone + Copy {

fn get_intermediate_bits(&self) -> u8;

fn cast_pixel_slice(bytes: &[u8]) -> &[Self::Pixel] {
let size = mem::size_of::<Self::Pixel>();

// Check that the number of elements is a multiple of the new element
// size and that the alignment is correct for the new element type.
debug_assert!(bytes.len() % size == 0);
assert!(bytes.as_ptr() as usize % mem::align_of::<Self::Pixel>() == 0);

let len = bytes.len() / size;

// SAFETY: We've checked that alignment and the number of elements is correct.
unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), len) }
}

fn cast_pixel_slice_mut(bytes: &mut [u8]) -> &mut [Self::Pixel] {
let size = mem::size_of::<Self::Pixel>();

// Check that the number of elements is a multiple of the new element
// size and that the alignment is correct for the new element type.
debug_assert!(bytes.len() % size == 0);
assert!(bytes.as_ptr() as usize % mem::align_of::<Self::Pixel>() == 0);

let len = bytes.len() / size;

// SAFETY: We've checked that alignment and the number of elements is correct.
unsafe { slice::from_raw_parts_mut(bytes.as_mut_ptr().cast(), len) }
}

const PREP_BIAS: i16;

unsafe fn select<T>(bd: &BitDepthUnion<T>) -> &T::T<Self>
Expand Down
70 changes: 39 additions & 31 deletions src/cdef_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ impl Backup2x8Flags {
}

unsafe fn backup2lines<BD: BitDepth>(
dst: &[*mut BD::Pixel; 3],
dst_buf: &mut [BD::Pixel],
dst_off: [usize; 3],
src: &[*mut BD::Pixel; 3],
stride: &[ptrdiff_t; 2],
layout: Rav1dPixelLayout,
Expand All @@ -42,13 +43,13 @@ unsafe fn backup2lines<BD: BitDepth>(
let len = 2 * y_stride.unsigned_abs();
if y_stride < 0 {
BD::pixel_copy(
slice::from_raw_parts_mut(dst[0].offset(y_stride), len),
&mut dst_buf[dst_off[0].wrapping_add_signed(y_stride)..][..len],
slice::from_raw_parts(src[0].offset(7 * y_stride), len),
len,
);
} else {
BD::pixel_copy(
slice::from_raw_parts_mut(dst[0], len),
&mut dst_buf[dst_off[0]..][..len],
slice::from_raw_parts(src[0].offset(6 * y_stride), len),
len,
);
Expand All @@ -65,12 +66,12 @@ unsafe fn backup2lines<BD: BitDepth>(
};

BD::pixel_copy(
slice::from_raw_parts_mut(dst[1].offset(uv_stride), len),
&mut dst_buf[dst_off[1].wrapping_add_signed(uv_stride)..][..len],
slice::from_raw_parts(src[1].offset(uv_off * uv_stride), len),
len,
);
BD::pixel_copy(
slice::from_raw_parts_mut(dst[2].offset(uv_stride), len),
&mut dst_buf[dst_off[2].wrapping_add_signed(uv_stride)..][..len],
slice::from_raw_parts(src[2].offset(uv_off * uv_stride), len),
len,
);
Expand All @@ -82,12 +83,12 @@ unsafe fn backup2lines<BD: BitDepth>(
};

BD::pixel_copy(
slice::from_raw_parts_mut(dst[1], len),
&mut dst_buf[dst_off[1]..][..len],
slice::from_raw_parts(src[1].offset(uv_off * uv_stride), len),
len,
);
BD::pixel_copy(
slice::from_raw_parts_mut(dst[2], len),
&mut dst_buf[dst_off[2]..][..len],
slice::from_raw_parts(src[2].offset(uv_off * uv_stride), len),
len,
);
Expand Down Expand Up @@ -157,7 +158,7 @@ fn adjust_strength(strength: c_int, var: c_uint) -> c_int {
pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
c: &Rav1dContext,
tc: &mut Rav1dTaskContext,
f: &Rav1dFrameData,
f: &mut Rav1dFrameData,
p: &[*mut BD::Pixel; 3],
lflvl_offset: i32,
by_start: c_int,
Expand Down Expand Up @@ -194,6 +195,8 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let y_stride: ptrdiff_t = BD::pxstride(f.cur.stride[0]);
let uv_stride: ptrdiff_t = BD::pxstride(f.cur.stride[1]);

let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf);

let mut bit = false;
for by in (by_start..by_end).step_by(2) {
let tf = tc.top_pre_cdef_toggle != 0;
Expand All @@ -206,15 +209,15 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
&& edges.contains(CdefEdgeFlags::HAVE_BOTTOM)
{
// backup pre-filter data for next iteration
let cdef_top_bak: [*mut BD::Pixel; 3] = [
(f.lf.cdef_line[!tf as usize][0] as *mut BD::Pixel)
.offset(have_tt as isize * sby as isize * 4 * y_stride),
(f.lf.cdef_line[!tf as usize][1] as *mut BD::Pixel)
.offset(have_tt as isize * sby as isize * 8 * uv_stride),
(f.lf.cdef_line[!tf as usize][2] as *mut BD::Pixel)
.offset(have_tt as isize * sby as isize * 8 * uv_stride),
let cdef_top_bak = [
f.lf.cdef_line[!tf as usize][0]
.wrapping_add_signed(have_tt as isize * sby as isize * 4 * y_stride),
f.lf.cdef_line[!tf as usize][1]
.wrapping_add_signed(have_tt as isize * sby as isize * 8 * uv_stride),
f.lf.cdef_line[!tf as usize][2]
.wrapping_add_signed(have_tt as isize * sby as isize * 8 * uv_stride),
];
backup2lines::<BD>(&cdef_top_bak, &ptrs, &f.cur.stride, layout);
backup2lines::<BD>(cdef_line_buf, cdef_top_bak, &ptrs, &f.cur.stride, layout);
}

let mut lr_bak =
Expand Down Expand Up @@ -327,9 +330,11 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
bot = bptrs[0].offset(8 * y_stride as isize);
st_y = false;
} else if !sbrow_start && by + 2 >= by_end {
top = f.lf.cdef_line[tf as usize][0]
.cast::<BD::Pixel>()
.offset((sby * 4) as isize * y_stride + (bx * 4) as isize);
offset = (sby * 4) as isize * y_stride + (bx * 4) as isize;
top = cdef_line_buf
.as_mut_ptr()
.add(f.lf.cdef_line[tf as usize][0])
.offset(offset);
if resize {
offset = (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize;
bot = f.lf.cdef_lpf_line[0].cast::<BD::Pixel>().offset(offset);
Expand All @@ -344,10 +349,12 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
}

if st_y {
offset = (sby * 4) as isize * y_stride;
top = f.lf.cdef_line[tf as usize][0]
.cast::<BD::Pixel>()
.offset(have_tt as isize * offset + (bx * 4) as isize);
offset = have_tt as isize * (sby * 4) as isize * y_stride
+ (bx * 4) as isize;
top = cdef_line_buf
.as_mut_ptr()
.add(f.lf.cdef_line[tf as usize][0])
.offset(offset);
bot = bptrs[0].offset(8 * y_stride as isize);
}

Expand Down Expand Up @@ -415,8 +422,9 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
} else if !sbrow_start && by + 2 >= by_end {
let top_offset: ptrdiff_t = (sby * 8) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
top = f.lf.cdef_line[tf as usize][pl]
.cast::<BD::Pixel>()
top = cdef_line_buf
.as_mut_ptr()
.add(f.lf.cdef_line[tf as usize][pl])
.offset(top_offset);
if resize {
offset = (sby * 4 + 2) as isize * uv_stride
Expand All @@ -437,12 +445,12 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
}

if st_uv {
let offset_0 = (sby * 8) as isize * uv_stride;
top =
f.lf.cdef_line[tf as usize][pl].cast::<BD::Pixel>().offset(
have_tt as isize * offset_0
+ (bx * 4 >> ss_hor) as isize,
);
let offset = have_tt as isize * (sby * 8) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
top = cdef_line_buf
.as_mut_ptr()
.add(f.lf.cdef_line[tf as usize][pl])
.offset(offset);
bot = bptrs[pl].offset((8 >> ss_ver) * uv_stride);
}

Expand Down
45 changes: 30 additions & 15 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::include::common::bitdepth::BitDepth16;
use crate::include::common::bitdepth::BitDepth8;
use crate::include::common::bitdepth::DynCoef;
use crate::include::common::bitdepth::DynPixel;
use crate::include::common::bitdepth::BPC;
use crate::include::common::intops::apply_sign64;
use crate::include::common::intops::iclip;
use crate::include::common::intops::iclip_u8;
Expand Down Expand Up @@ -4391,29 +4392,43 @@ pub(crate) unsafe fn rav1d_decode_frame_init(
alloc_sz += (uv_stride.unsigned_abs() * 8 * f.sbh as usize) << need_cdef_lpf_copy;
// TODO: Fallible allocation.
f.lf.cdef_line_buf.resize(alloc_sz, 0);
let mut ptr = f.lf.cdef_line_buf.as_mut_ptr();

ptr = ptr.offset(32);
let bpc = BPC::from_bitdepth_max(f.bitdepth_max);
let y_stride_px = bpc.pxstride(f.cur.stride[0]);
let uv_stride_px = bpc.pxstride(f.cur.stride[1]);

let mut offset = bpc.pxstride(32usize);
if y_stride < 0 {
f.lf.cdef_line[0][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel;
f.lf.cdef_line[1][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 3))) as *mut DynPixel;
f.lf.cdef_line[0][0] =
offset.wrapping_add_signed(-(y_stride_px * (f.sbh as isize * 4 - 1)));
f.lf.cdef_line[1][0] =
offset.wrapping_add_signed(-(y_stride_px * (f.sbh as isize * 4 - 3)));
} else {
f.lf.cdef_line[0][0] = ptr.offset(y_stride * 0) as *mut DynPixel;
f.lf.cdef_line[1][0] = ptr.offset(y_stride * 2) as *mut DynPixel;
f.lf.cdef_line[0][0] = offset.wrapping_add_signed(y_stride_px * 0);
f.lf.cdef_line[1][0] = offset.wrapping_add_signed(y_stride_px * 2);
}
ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4);
offset = offset.wrapping_add_signed(y_stride_px.abs() * f.sbh as isize * 4);
if uv_stride < 0 {
f.lf.cdef_line[0][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel;
f.lf.cdef_line[0][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 3))) as *mut DynPixel;
f.lf.cdef_line[1][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 5))) as *mut DynPixel;
f.lf.cdef_line[1][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 7))) as *mut DynPixel;
f.lf.cdef_line[0][1] =
offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 1)));
f.lf.cdef_line[0][2] =
offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 3)));
f.lf.cdef_line[1][1] =
offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 5)));
f.lf.cdef_line[1][2] =
offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 7)));
} else {
f.lf.cdef_line[0][1] = ptr.offset(uv_stride * 0) as *mut DynPixel;
f.lf.cdef_line[0][2] = ptr.offset(uv_stride * 2) as *mut DynPixel;
f.lf.cdef_line[1][1] = ptr.offset(uv_stride * 4) as *mut DynPixel;
f.lf.cdef_line[1][2] = ptr.offset(uv_stride * 6) as *mut DynPixel;
f.lf.cdef_line[0][1] = offset.wrapping_add_signed(uv_stride_px * 0);
f.lf.cdef_line[0][2] = offset.wrapping_add_signed(uv_stride_px * 2);
f.lf.cdef_line[1][1] = offset.wrapping_add_signed(uv_stride_px * 4);
f.lf.cdef_line[1][2] = offset.wrapping_add_signed(uv_stride_px * 6);
}

let mut ptr =
f.lf.cdef_line_buf
.as_mut_ptr()
.add(32)
.offset(y_stride.abs() * f.sbh as isize * 4);
if need_cdef_lpf_copy != 0 {
ptr = ptr.offset(uv_stride.abs() * f.sbh as isize * 8);
if y_stride < 0 {
Expand Down
8 changes: 4 additions & 4 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ pub(crate) struct Rav1dFrameContext_bd_fn {
pub filter_sbrow_deblock_cols: filter_sbrow_fn,
pub filter_sbrow_deblock_rows: filter_sbrow_fn,
pub filter_sbrow_cdef:
unsafe fn(&Rav1dContext, &Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (),
unsafe fn(&Rav1dContext, &mut Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (),
pub filter_sbrow_resize: filter_sbrow_fn,
pub filter_sbrow_lr: filter_sbrow_fn,
pub backup_ipred_edge: backup_ipred_edge_fn,
Expand Down Expand Up @@ -462,9 +462,9 @@ pub struct Rav1dFrameContext_lf {
pub tx_lpf_right_edge: TxLpfRightEdge,
pub cdef_line_buf: AlignedVec32<u8>, /* AlignedVec32<DynPixel> */
pub lr_line_buf: *mut u8,
pub cdef_line: [[*mut DynPixel; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [*mut DynPixel; 3], /* plane */
pub lr_lpf_line: [*mut DynPixel; 3], /* plane */
pub cdef_line: [[usize; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [*mut DynPixel; 3], /* plane */
pub lr_lpf_line: [*mut DynPixel; 3], /* plane */

// in-loop filter per-frame state keeping
pub start_of_tile_row: *mut u8,
Expand Down
4 changes: 2 additions & 2 deletions src/recon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4568,7 +4568,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows<BD: BitDepth>(

pub(crate) unsafe fn rav1d_filter_sbrow_cdef<BD: BitDepth>(
c: &Rav1dContext,
f: &Rav1dFrameData,
f: &mut Rav1dFrameData,
tc: &mut Rav1dTaskContext,
sby: c_int,
) {
Expand Down Expand Up @@ -4721,10 +4721,10 @@ pub(crate) unsafe fn rav1d_filter_sbrow<BD: BitDepth>(
rav1d_filter_sbrow_deblock_cols::<BD>(c, f, t, sby);
rav1d_filter_sbrow_deblock_rows::<BD>(c, f, t, sby);
let seq_hdr = &***f.seq_hdr.as_ref().unwrap();
let frame_hdr = &***f.frame_hdr.as_ref().unwrap();
if seq_hdr.cdef != 0 {
rav1d_filter_sbrow_cdef::<BD>(c, f, t, sby);
}
let frame_hdr = &***f.frame_hdr.as_ref().unwrap();
if frame_hdr.size.width[0] != frame_hdr.size.width[1] {
rav1d_filter_sbrow_resize::<BD>(c, f, t, sby);
}
Expand Down
Loading