Skip to content

Commit

Permalink
Rav1dFrameContext_lf::cdef_line_buf: Make into AlignedVec32
Browse files Browse the repository at this point in the history
  • Loading branch information
randomPoison committed Mar 4, 2024
1 parent b40cfa5 commit b344fd2
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 63 deletions.
1 change: 1 addition & 0 deletions src/align.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,5 @@ impl<T: Copy, C: AlignedByteChunk> Default for AlignedVec<T, C> {
}
}

pub type AlignedVec32<T> = AlignedVec<T, Align32<[u8; 32]>>;
pub type AlignedVec64<T> = AlignedVec<T, Align64<[u8; 64]>>;
94 changes: 36 additions & 58 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4387,74 +4387,52 @@ pub(crate) unsafe fn rav1d_decode_frame_init(
let mut uv_stride = f.cur.stride[1];
let has_resize = (frame_hdr.size.width[0] != frame_hdr.size.width[1]) as c_int;
let need_cdef_lpf_copy = (c.tc.len() > 1 && has_resize != 0) as c_int;
if y_stride * f.sbh as isize * 4 != f.lf.cdef_buf_plane_sz[0] as isize
|| uv_stride * f.sbh as isize * 8 != f.lf.cdef_buf_plane_sz[1] as isize
|| need_cdef_lpf_copy != f.lf.need_cdef_lpf_copy
|| f.sbh != f.lf.cdef_buf_sbh
{
rav1d_free_aligned(f.lf.cdef_line_buf as *mut c_void);
let mut alloc_sz: usize = 64;
alloc_sz += (y_stride.unsigned_abs() * 4 * f.sbh as usize) << need_cdef_lpf_copy;
alloc_sz += (uv_stride.unsigned_abs() * 8 * f.sbh as usize) << need_cdef_lpf_copy;
f.lf.cdef_line_buf = rav1d_alloc_aligned(alloc_sz, 32) as *mut u8;
let mut ptr = f.lf.cdef_line_buf;
if ptr.is_null() {
f.lf.cdef_buf_plane_sz[1] = 0;
f.lf.cdef_buf_plane_sz[0] = f.lf.cdef_buf_plane_sz[1];
return Err(ENOMEM);
}
let mut alloc_sz: usize = 64;
alloc_sz += (y_stride.unsigned_abs() * 4 * f.sbh as usize) << need_cdef_lpf_copy;
alloc_sz += (uv_stride.unsigned_abs() * 8 * f.sbh as usize) << need_cdef_lpf_copy;
// TODO: Fallible allocation.
f.lf.cdef_line_buf.resize(alloc_sz, 0);
let mut ptr = f.lf.cdef_line_buf.as_mut_ptr();

ptr = ptr.offset(32);
if y_stride < 0 {
f.lf.cdef_line[0][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel;
f.lf.cdef_line[1][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 3))) as *mut DynPixel;
} else {
f.lf.cdef_line[0][0] = ptr.offset(y_stride * 0) as *mut DynPixel;
f.lf.cdef_line[1][0] = ptr.offset(y_stride * 2) as *mut DynPixel;
}
ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4);
if uv_stride < 0 {
f.lf.cdef_line[0][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel;
f.lf.cdef_line[0][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 3))) as *mut DynPixel;
f.lf.cdef_line[1][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 5))) as *mut DynPixel;
f.lf.cdef_line[1][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 7))) as *mut DynPixel;
} else {
f.lf.cdef_line[0][1] = ptr.offset(uv_stride * 0) as *mut DynPixel;
f.lf.cdef_line[0][2] = ptr.offset(uv_stride * 2) as *mut DynPixel;
f.lf.cdef_line[1][1] = ptr.offset(uv_stride * 4) as *mut DynPixel;
f.lf.cdef_line[1][2] = ptr.offset(uv_stride * 6) as *mut DynPixel;
}

ptr = ptr.offset(32);
if need_cdef_lpf_copy != 0 {
ptr = ptr.offset(uv_stride.abs() * f.sbh as isize * 8);
if y_stride < 0 {
f.lf.cdef_line[0][0] =
f.lf.cdef_lpf_line[0] =
ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel;
f.lf.cdef_line[1][0] =
ptr.offset(-(y_stride * (f.sbh as isize * 4 - 3))) as *mut DynPixel;
} else {
f.lf.cdef_line[0][0] = ptr.offset(y_stride * 0) as *mut DynPixel;
f.lf.cdef_line[1][0] = ptr.offset(y_stride * 2) as *mut DynPixel;
f.lf.cdef_lpf_line[0] = ptr as *mut DynPixel;
}
ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4);
if uv_stride < 0 {
f.lf.cdef_line[0][1] =
f.lf.cdef_lpf_line[1] =
ptr.offset(-(uv_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel;
f.lf.cdef_lpf_line[2] =
ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel;
f.lf.cdef_line[0][2] =
ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 3))) as *mut DynPixel;
f.lf.cdef_line[1][1] =
ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 5))) as *mut DynPixel;
f.lf.cdef_line[1][2] =
ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 7))) as *mut DynPixel;
} else {
f.lf.cdef_line[0][1] = ptr.offset(uv_stride * 0) as *mut DynPixel;
f.lf.cdef_line[0][2] = ptr.offset(uv_stride * 2) as *mut DynPixel;
f.lf.cdef_line[1][1] = ptr.offset(uv_stride * 4) as *mut DynPixel;
f.lf.cdef_line[1][2] = ptr.offset(uv_stride * 6) as *mut DynPixel;
}

if need_cdef_lpf_copy != 0 {
ptr = ptr.offset(uv_stride.abs() * f.sbh as isize * 8);
if y_stride < 0 {
f.lf.cdef_lpf_line[0] =
ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel;
} else {
f.lf.cdef_lpf_line[0] = ptr as *mut DynPixel;
}
ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4);
if uv_stride < 0 {
f.lf.cdef_lpf_line[1] =
ptr.offset(-(uv_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel;
f.lf.cdef_lpf_line[2] =
ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel;
} else {
f.lf.cdef_lpf_line[1] = ptr as *mut DynPixel;
f.lf.cdef_lpf_line[2] = ptr.offset(uv_stride * f.sbh as isize * 4) as *mut DynPixel;
}
f.lf.cdef_lpf_line[1] = ptr as *mut DynPixel;
f.lf.cdef_lpf_line[2] = ptr.offset(uv_stride * f.sbh as isize * 4) as *mut DynPixel;
}

f.lf.cdef_buf_plane_sz[0] = y_stride as c_int * f.sbh * 4;
f.lf.cdef_buf_plane_sz[1] = uv_stride as c_int * f.sbh * 8;
f.lf.need_cdef_lpf_copy = need_cdef_lpf_copy;
f.lf.cdef_buf_sbh = f.sbh;
}

let sb128 = seq_hdr.sb128;
Expand Down
5 changes: 1 addition & 4 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,14 +428,12 @@ pub struct Rav1dFrameContext_lf {
pub level: Vec<[u8; 4]>,
pub mask: Vec<Av1Filter>, /* len = w*h */
pub lr_mask: Vec<Av1Restoration>,
pub cdef_buf_plane_sz: [c_int; 2], /* stride*sbh*4 */
pub cdef_buf_sbh: c_int,
pub lr_buf_plane_sz: [c_int; 2], /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */
pub lim_lut: Align16<Av1FilterLUT>,
pub last_sharpness: c_int,
pub lvl: [[[[u8; 2]; 8]; 4]; 8], /* [8 seg_id][4 dir][8 ref][2 is_gmv] */
pub tx_lpf_right_edge: Vec<u8>, /* len = h*2 */
pub cdef_line_buf: *mut u8,
pub cdef_line_buf: AlignedVec32<u8>, /* AlignedVec32<DynPixel> */
pub lr_line_buf: *mut u8,
pub cdef_line: [[*mut DynPixel; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [*mut DynPixel; 3], /* plane */
Expand All @@ -444,7 +442,6 @@ pub struct Rav1dFrameContext_lf {
// in-loop filter per-frame state keeping
pub start_of_tile_row: *mut u8,
pub start_of_tile_row_sz: c_int,
pub need_cdef_lpf_copy: c_int,
pub p: [*mut DynPixel; 3],
pub sr_p: [*mut DynPixel; 3],
pub restore_planes: c_int, // enum LrRestorePlanes
Expand Down
7 changes: 6 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,11 @@ pub(crate) unsafe fn rav1d_open(c_out: &mut *mut Rav1dContext, s: &Rav1dSettings
f.task_thread.pending_tasks = Default::default();
}
(&mut f.task_thread.ttd as *mut Arc<TaskThreadData>).write(Arc::clone(&(*c).task_thread));
addr_of_mut!(f.lf.level).write(Default::default());
addr_of_mut!(f.lf.mask).write(Default::default());
addr_of_mut!(f.lf.lr_mask).write(Default::default());
addr_of_mut!(f.lf.tx_lpf_right_edge).write(Default::default());
addr_of_mut!(f.lf.cdef_line_buf).write(Default::default());
f.lf.last_sharpness = -(1 as c_int);
rav1d_refmvs_init(&mut f.rf);
n = n.wrapping_add(1);
Expand Down Expand Up @@ -903,7 +908,7 @@ impl Drop for Rav1dContext {
let _ = mem::take(&mut f.lf.tx_lpf_right_edge); // TODO: remove when context is owned
free(f.lf.start_of_tile_row as *mut c_void);
rav1d_refmvs_clear(&mut f.rf);
rav1d_free_aligned(f.lf.cdef_line_buf as *mut c_void);
let _ = mem::take(&mut f.lf.cdef_line_buf); // TODO: remove when context is owned
rav1d_free_aligned(f.lf.lr_line_buf as *mut c_void);
n_1 = n_1.wrapping_add(1);
}
Expand Down

0 comments on commit b344fd2

Please sign in to comment.