From 0b8324b125ac03a99b3c7331f113f2fccfa2d401 Mon Sep 17 00:00:00 2001 From: Stephen Crane Date: Thu, 4 Jul 2024 00:49:14 -0700 Subject: [PATCH] Re-implement case set as a macro --- lib.rs | 4 +- src/ctx.rs | 355 +++++++++++++++++++++++++++++--------------- src/decode.rs | 297 +++++++++++++++++++----------------- src/disjoint_mut.rs | 77 ++++++++++ src/lf_mask.rs | 45 +++--- src/recon.rs | 187 ++++++++++++----------- 6 files changed, 598 insertions(+), 367 deletions(-) diff --git a/lib.rs b/lib.rs index 360f905ae..404a4a800 100644 --- a/lib.rs +++ b/lib.rs @@ -37,12 +37,12 @@ pub mod src { mod cdf; mod const_fn; pub mod cpu; - mod ctx; + pub mod ctx; mod cursor; mod data; mod decode; mod dequant_tables; - pub(crate) mod disjoint_mut; + pub mod disjoint_mut; pub(crate) mod enum_map; mod env; pub(crate) mod error; diff --git a/src/ctx.rs b/src/ctx.rs index be22f7e27..924503ef4 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -1,4 +1,5 @@ -//! The [`CaseSet`] API below is a safe and simplified version of the `case_set*` macros in `ctx.h`. +//! The [`case_set!`] macro is a safe and simplified version of the `case_set*` +//! macros in `ctx.h`. //! //! The `case_set*` macros themselves replaced `memset`s in order to further optimize them //! (in e3b5d4d044506f9e0e95e79b3de42fd94386cc61, @@ -13,135 +14,249 @@ //! as unaligned writes are UB, and so we'd need to check at runtime if they're aligned //! (a runtime-determined `off`set is used, so we can't reasonably ensure this at compile-time). //! -//! To more thoroughly check this, I ran the same benchmarks done in -//! e3b5d4d044506f9e0e95e79b3de42fd94386cc61, which introduced the `case_set*` macros: +//! We also want to avoid multiple switches when setting a group of buffers as +//! the C implementation did, which was implemented in +//! https://github.com/memorysafety/rav1d/pull/1293. //! -//! ```sh -//! cargo build --release && hyperfine './target/release/dav1d -i ./tests/large/chimera_8b_1080p.ivf -l 1000 -o /dev/null' -//! ``` +//! # Benchmarks //! -//! for 3 implementations: -//! 1. the original `case_set*` macros translated directly to `unsafe` Rust `fn`s -//! 2. the safe [`CaseSet`] implementation below using [`small_memset`] with its small powers of 2 optimization -//! 3. a safe [`CaseSet`] implementation using [`slice::fill`]/`memset` only -//! -//! The [`small_memset`] version was ~1.27% faster than the `case_set*` one, -//! and ~3.26% faster than the `memset` one. -//! The `case_set*` macros were also faster than `memset` in C by a similar margin, -//! meaning the `memset` option is the slowest in both C and Rust, -//! and since it was replaced with `case_set*` in C, we shouldn't use it in Rust. -//! Thus, the [`small_memset`] implementation seems optimal, as it: -//! * is the fastest of the Rust implementations -//! * is completely safe -//! * employs the same small powers of 2 optimization the `case_set*` implementation did -//! * is far simpler than the `case_set*` implementation, consisting of a `match` and array writes +//! Comparing this implementation to the previous implementation of `CaseSet` we +//! see an 8.2-10.5% speedup for a single buffer, a 5.9-7.0% speedup for +//! multiple buffers, and a minor improvement to multiple [`DisjointMut`] +//! buffers (which happened to be well-optimized in the previous +//! implementation). //! //! [`BlockContext`]: crate::src::env::BlockContext -use crate::src::disjoint_mut::AsMutPtr; -use crate::src::disjoint_mut::DisjointMut; -use std::iter::zip; +//! [`DisjointMut`]: crate::src::disjoint_mut::DisjointMut -/// Perform a `memset` optimized for lengths that are small powers of 2. +/// Fill small ranges of buffers with a value. +/// +/// This is effectively a specialized version [`slice::fill`] for small +/// power-of-two sized ranges of buffers. +/// +/// `$UP_TO` is the maximum length that will be optimized, with powers of two up +/// to 64 supported. If the buffer length is not a power of two or greater than +/// `$UP_TO`, this macro will do nothing. See [`case_set_with_default!`] to fill +/// buffers with non-comforming lengths if needed. +/// +/// # Examples +/// +/// ``` +/// # use rav1d::case_set; +/// let mut buf = [0u8; 32]; +/// let len = 16; +/// for offset in [0, 16] { +/// case_set!(up_to = 32, len, offset, { +/// set!(&mut buf, 1u8); +/// }); +/// } +/// ``` +/// +/// In the simplest case, `$len` is the length of the buffer range to fill +/// starting from `$offset`. The `$body` block is executed with `len` and +/// `offset` identifiers set to the given length and offset values. Within the +/// body a `set!` macro is available and must be called to set each buffer range +/// to a value. `set!` takes a buffer and a value and sets the range +/// `buf[offset..][..len]` to the value. +/// ``` +/// # macro_rules! set { +/// # ($buf:expr, $val:expr) => {}; +/// # } +/// set!(buf, value); +/// ``` +/// +/// ## Naming parameters +/// +/// The identifier for either or both of `len` and `offset` can be overridden by +/// specifying `identifer=value` for those parameters: +/// ``` +/// # use rav1d::case_set; +/// let mut buf = [0u8; 32]; +/// let outer_len = 16; +/// for outer_offset in [0, 16] { +/// case_set!( +/// up_to = 32, +/// len=outer_len, +/// offset=outer_offset, +/// { +/// set!(&mut buf, (offset+len) as u8); +/// } +/// ); +/// } +/// ``` +/// +/// ## `DisjointMut` buffers +/// +/// [`DisjointMut`] buffers can be used in basically the same way as normal +/// buffers but using the `set_disjoint!` macro instead of `set!`. +/// ``` +/// # use rav1d::case_set; +/// # use rav1d::src::disjoint_mut::DisjointMut; +/// let mut buf = DisjointMut::new([0u8; 32]); +/// let len = 16; +/// for offset in [0, 16] { +/// case_set!(up_to = 32, len, offset, { +/// set_disjoint!(&mut buf, 1u8); +/// }); +/// } +/// ``` +/// +/// ## Multiple buffer ranges +/// +/// Multiple buffers with different lengths and offsets can be filled with the +/// same body statements. In the following example, two buffers with different +/// sizes are initialized by quarters. +/// ``` +/// # use rav1d::case_set; +/// let mut buf1 = [0u8; 32]; +/// let mut buf2 = [0u8; 64]; +/// for offset in [0, 8, 16, 24] { +/// case_set!( +/// up_to = 16, +/// buf = [&mut buf1[..], &mut buf2[..]], +/// len = [8, 16], +/// offset = [offset, offset*2], +/// { +/// set!(buf, len as u8 >> 3); +/// } +/// ); +/// } +/// ``` /// -/// For power of 2 lengths `<= UP_TO`, -/// the `memset` is done as an array write of that exactly (compile-time known) length. -/// If the length is not a power of 2 or `> UP_TO`, -/// then the `memset` is done by [`slice::fill`] (a `memset` call) if `WITH_DEFAULT` is `true`, -/// or else skipped if `WITH_DEFAULT` is `false`. +/// A more realistic example of filling multiple buffers with the same value is +/// initializing different struct fields at the same time (from +/// `src/decode.rs`): +/// ```ignore +/// case_set!( +/// up_to = 32, +/// ctx = [(&t.l, 1), (&f.a[t.a], 0)], +/// len = [bh4, bw4], +/// offset = [by4, bx4], +/// { +/// let (dir, dir_index) = ctx; +/// set_disjoint!(dir.seg_pred, seg_pred.into()); +/// set_disjoint!(dir.skip_mode, b.skip_mode); +/// set_disjoint!(dir.intra, 0); +/// set_disjoint!(dir.skip, b.skip); +/// set_disjoint!(dir.pal_sz, 0); +/// } +/// ); +/// ``` /// -/// This optimizes for the common cases where `buf.len()` is a small power of 2, -/// where the array write is optimized as few and large stores as possible. -#[inline] -pub fn small_memset( - buf: &mut [T], - val: T, -) { - fn as_array(buf: &mut [T]) -> &mut [T; N] { - buf.try_into().unwrap() - } - match buf.len() { - 01 if UP_TO >= 01 => *as_array(buf) = [val; 01], - 02 if UP_TO >= 02 => *as_array(buf) = [val; 02], - 04 if UP_TO >= 04 => *as_array(buf) = [val; 04], - 08 if UP_TO >= 08 => *as_array(buf) = [val; 08], - 16 if UP_TO >= 16 => *as_array(buf) = [val; 16], - 32 if UP_TO >= 32 => *as_array(buf) = [val; 32], - 64 if UP_TO >= 64 => *as_array(buf) = [val; 64], - _ => { - if WITH_DEFAULT { - buf.fill(val) +/// [`DisjointMut`]: crate::src::disjoint_mut::DisjointMut +macro_rules! case_set { + (up_to=$UP_TO:literal, $(@DEFAULT=$WITH_DEFAULT:literal,)? $ctx:ident=[$($ctx_expr:expr),* $(,)?], $len:ident=[$($len_expr:expr),* $(,)?], $offset:ident=[$($offset_expr:expr),* $(,)?], $body:block) => { + let ctxs = [$($ctx_expr,)*]; + let lens = [$($len_expr,)*]; + let offsets = [$($offset_expr,)*]; + assert_eq!(ctxs.len(), lens.len()); + assert_eq!(ctxs.len(), offsets.len()); + for (i, ctx) in ctxs.into_iter().enumerate() { + case_set!(up_to=$UP_TO, $(@DEFAULT=$WITH_DEFAULT,)? $ctx=ctx, $len=lens[i], $offset=offsets[i], $body); + } + }; + (up_to=$UP_TO:literal, $(@DEFAULT=$WITH_DEFAULT:literal,)? $len:ident, $offset:ident, $body:block) => { + case_set!(up_to=$UP_TO, $(@DEFAULT=$WITH_DEFAULT,)? _ctx=(), $len=$len, $offset=$offset, $body); + }; + (up_to=$UP_TO:literal, $(@DEFAULT=$WITH_DEFAULT:literal,)? $len:ident=$len_expr:expr, $offset:ident=$offset_expr:expr, $body:block) => { + case_set!(up_to=$UP_TO, $(@DEFAULT=$WITH_DEFAULT,)? _ctx=(), $len=$len_expr, $offset=$offset_expr, $body); + }; + (up_to=$UP_TO:literal, $(@DEFAULT=$WITH_DEFAULT:literal,)? $ctx:ident=$ctx_expr:expr, $len:ident=$len_expr:expr, $offset:ident=$offset_expr:expr, $body:block) => { + #[allow(unused_mut)] + let mut $ctx = $ctx_expr; + let $len = $len_expr; + let $offset = $offset_expr; + { + #[allow(unused_macros)] + macro_rules! set { + ($buf:expr, $val:expr) => {{ + assert!($offset <= $buf.len() && $offset + $len <= $buf.len()); + }}; } + #[allow(unused_imports)] + use set as set_disjoint; + #[allow(unused)] + $body } - } -} - -pub struct CaseSetter { - offset: usize, - len: usize, -} - -impl CaseSetter { - #[inline] - pub fn set(&self, buf: &mut [T], val: T) { - small_memset::(&mut buf[self.offset..][..self.len], val); - } - - /// # Safety - /// - /// Caller must ensure that no elements of the written range are concurrently - /// borrowed (immutably or mutably) at all during the call to `set_disjoint`. - #[inline] - pub fn set_disjoint(&self, buf: &DisjointMut, val: V) - where - T: AsMutPtr, - V: Clone + Copy, - { - let mut buf = buf.index_mut(self.offset..self.offset + self.len); - small_memset::(&mut *buf, val); - } + macro_rules! exec_block { + ($N:literal, $block:block) => { + { + #[allow(unused_macros)] + macro_rules! set { + ($buf:expr, $val:expr) => { + // SAFETY: The offset and length are checked by the + // assert outside of the match. + let buf_range = unsafe { + $buf.get_unchecked_mut($offset..$offset+$N) + }; + *<&mut [_; $N]>::try_from(buf_range).unwrap() = [$val; $N]; + }; + } + #[allow(unused_macros)] + macro_rules! set_disjoint { + ($buf:expr, $val:expr) => {{ + // SAFETY: The offset and length are checked by the + // assert outside of the match. + let mut buf_range = unsafe { + $buf.index_mut_unchecked(($offset.., ..$N)) + }; + *<&mut [_; $N]>::try_from(&mut *buf_range).unwrap() = [$val; $N]; + }}; + } + $block + } + }; + } + match $len { + 01 if $UP_TO >= 01 => exec_block!(01, $body), + 02 if $UP_TO >= 02 => exec_block!(02, $body), + 04 if $UP_TO >= 04 => exec_block!(04, $body), + 08 if $UP_TO >= 08 => exec_block!(08, $body), + 16 if $UP_TO >= 16 => exec_block!(16, $body), + 32 if $UP_TO >= 32 => exec_block!(32, $body), + 64 if $UP_TO >= 64 => exec_block!(64, $body), + _ => { + if $($WITH_DEFAULT ||)? false { + #[allow(unused_macros)] + macro_rules! set { + ($buf:expr, $val:expr) => {{ + // SAFETY: The offset and length are checked by the + // assert outside of the match. + let buf_range = unsafe { + $buf.get_unchecked_mut($offset..$offset+$len) + }; + buf_range.fill($val); + }}; + } + #[allow(unused_macros)] + macro_rules! set_disjoint { + ($buf:expr, $val:expr) => {{ + // SAFETY: The offset and length are checked by the + // assert outside of the match. + let mut buf_range = unsafe { + $buf.index_mut_unchecked(($offset.., ..$len)) + }; + buf_range.fill($val); + }}; + } + $body + } + } + } + }; } +pub(crate) use case_set; -/// The entrypoint to the [`CaseSet`] API. +/// Fill small ranges of buffers with a value. /// -/// `UP_TO` and `WITH_DEFAULT` are made const generic parameters rather than have multiple `case_set*` `fn`s, -/// and these are put in a separate `struct` so that these 2 generic parameters -/// can be manually specified while the ones on the methods are inferred. -pub struct CaseSet; - -impl CaseSet { - /// Perform one case set. - /// - /// This API is generic over the element type (`T`) rather than hardcoding `u8`, - /// as sometimes other types are used, though only `i8` is used currently. - /// - /// The `len` and `offset` are supplied here and - /// applied to each `buf` passed to [`CaseSetter::set`] in `set_ctx`. - #[inline] - pub fn one(ctx: T, len: usize, offset: usize, mut set_ctx: F) - where - F: FnMut(&CaseSetter, T), - { - set_ctx(&CaseSetter { offset, len }, ctx); - } - - /// Perform many case sets in one call. - /// - /// This allows specifying the `set_ctx` closure inline easily, - /// and also allows you to group the same args together. - /// - /// The `lens`, `offsets`, and `dirs` are zipped and passed to [`CaseSet::one`], - /// where `dirs` can be an array of any type and whose elements are passed back to the `set_ctx` closure. - #[inline] - pub fn many( - dirs: [T; N], - lens: [usize; N], - offsets: [usize; N], - mut set_ctx: F, - ) where - F: FnMut(&CaseSetter, T), - { - for (dir, (len, offset)) in zip(dirs, zip(lens, offsets)) { - Self::one(dir, len, offset, &mut set_ctx); - } - } +/// `$UP_TO` is the maximum length that will be optimized, with powers of two up +/// to 64 supported. If the buffer length is not a power of two or greater than +/// `$UP_TO`, this macro will still fill the buffer with a slower fallback. +/// +/// See [`case_set!`] for examples and more documentation. +macro_rules! case_set_with_default { + (up_to=$UP_TO:literal, $($tt:tt)*) => { + $crate::src::ctx::case_set!(up_to=$UP_TO, @DEFAULT=true, $($tt)*); + }; } +pub(crate) use case_set_with_default; diff --git a/src/decode.rs b/src/decode.rs index b99fbc9a5..81c2289e3 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -26,7 +26,7 @@ use crate::src::cdf::rav1d_cdf_thread_init_static; use crate::src::cdf::rav1d_cdf_thread_update; use crate::src::cdf::CdfMvComponent; use crate::src::cdf::CdfThreadContext; -use crate::src::ctx::CaseSet; +use crate::src::ctx::case_set; use crate::src::dequant_tables::dav1d_dq_tbl; use crate::src::disjoint_mut::DisjointMut; use crate::src::disjoint_mut::DisjointMutSlice; @@ -368,19 +368,21 @@ fn read_tx_tree( } t.b.y -= txsh; } else { - CaseSet::<16, false>::many( - [(&t.l, txh), (&f.a[t.a], txw)], - [t_dim.h as usize, t_dim.w as usize], - [by4 as usize, bx4 as usize], - |case, (dir, val)| { + case_set!( + up_to = 16, + ctx = [(&t.l, txh), (&f.a[t.a], txw)], + len = [t_dim.h as usize, t_dim.w as usize], + offset = [by4 as usize, bx4 as usize], + { + let (dir, val) = ctx; let tx = if is_split { TxfmSize::S4x4 } else { // TODO check unwrap is optimized out TxfmSize::from_repr(val as _).unwrap() }; - case.set_disjoint(&dir.tx, tx); - }, + set_disjoint!(&dir.tx, tx); + } ); }; } @@ -808,26 +810,29 @@ fn read_vartx_tree( uvtx = TxfmSize::S4x4; max_ytx = uvtx; if txfm_mode == Rav1dTxfmMode::Switchable { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [bh4 as usize, bw4 as usize], - [by4 as usize, bx4 as usize], - |case, dir| { - case.set_disjoint(&dir.tx, TxfmSize::S4x4); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [bh4, bw4], + offset = [by4 as usize, bx4 as usize], + { + set_disjoint!(&dir.tx, TxfmSize::S4x4); + } ); } } else if txfm_mode != Rav1dTxfmMode::Switchable || b.skip != 0 { if txfm_mode == Rav1dTxfmMode::Switchable { - CaseSet::<32, false>::many( - [(&t.l, 1), (&f.a[t.a], 0)], - [bh4 as usize, bw4 as usize], - [by4 as usize, bx4 as usize], - |case, (dir, dir_index)| { + case_set!( + up_to = 32, + ctx = [(&t.l, 1), (&f.a[t.a], 0)], + len = [bh4, bw4], + offset = [by4 as usize, bx4 as usize], + { + let (dir, dir_index) = ctx; // TODO check unwrap is optimized out let tx = TxfmSize::from_repr(b_dim[2 + dir_index] as _).unwrap(); - case.set_disjoint(&dir.tx, tx); - }, + set_disjoint!(&dir.tx, tx); + } ); } uvtx = dav1d_max_txfm_size_for_bs[bs as usize][f.cur.p.layout as usize]; @@ -1203,14 +1208,15 @@ fn decode_b( } else { y_mode }; - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [bh4 as usize, bw4 as usize], - [by4 as usize, bx4 as usize], - |case, dir| { - case.set_disjoint(&dir.mode, y_mode_nofilt); - case.set_disjoint(&dir.intra, 1); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [bh4 as usize, bw4 as usize], + offset = [by4 as usize, bx4 as usize], + { + set_disjoint!(&dir.mode, y_mode_nofilt); + set_disjoint!(&dir.intra, 1); + } ); if frame_type.is_inter_or_switch() { let ri = t.rt.r[(t.b.y as usize & 31) + 5 + bh4 as usize - 1] + t.b.x as usize; @@ -1228,13 +1234,14 @@ fn decode_b( } if has_chroma { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [cbh4 as usize, cbw4 as usize], - [cby4 as usize, cbx4 as usize], - |case, dir| { - case.set_disjoint(&dir.uvmode, intra.uv_mode); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [cbh4 as usize, cbw4 as usize], + offset = [cby4 as usize, cbx4 as usize], + { + set_disjoint!(&dir.uvmode, intra.uv_mode); + } ); } } @@ -1278,15 +1285,16 @@ fn decode_b( (bd_fn.recon_b_inter)(f, t, None, bs, b, inter)?; let filter = &dav1d_filter_dir[inter.filter2d as usize]; - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [bh4 as usize, bw4 as usize], - [by4 as usize, bx4 as usize], - |case, dir| { - case.set_disjoint(&dir.filter[0], filter[0].into()); - case.set_disjoint(&dir.filter[1], filter[1].into()); - case.set_disjoint(&dir.intra, 0); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [bh4 as usize, bw4 as usize], + offset = [by4 as usize, bx4 as usize], + { + set_disjoint!(&dir.filter[0], filter[0].into()); + set_disjoint!(&dir.filter[1], filter[1].into()); + set_disjoint!(&dir.intra, 0); + } ); if frame_type.is_inter_or_switch() { @@ -1307,13 +1315,14 @@ fn decode_b( } if has_chroma { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [cbh4 as usize, cbw4 as usize], - [cby4 as usize, cbx4 as usize], - |case, dir| { - case.set_disjoint(&dir.uvmode, DC_PRED); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [cbh4 as usize, cbw4 as usize], + offset = [cby4 as usize, cbx4 as usize], + { + set_disjoint!(&dir.uvmode, DC_PRED); + } ); } } @@ -1970,45 +1979,48 @@ fn decode_b( y_mode }; let is_inter_or_switch = f.frame_hdr().frame_type.is_inter_or_switch(); - CaseSet::<32, false>::many( - [(&t.l, t_dim.lh, 1), (&f.a[t.a], t_dim.lw, 0)], - [bh4 as usize, bw4 as usize], - [by4 as usize, bx4 as usize], - |case, (dir, lw_lh, dir_index)| { - case.set_disjoint(&dir.tx_intra, lw_lh as i8); + case_set!( + up_to = 32, + ctx = [(&t.l, t_dim.lh, 1), (&f.a[t.a], t_dim.lw, 0)], + len = [bh4 as usize, bw4 as usize], + offset = [by4 as usize, bx4 as usize], + { + let (dir, lw_lh, dir_index) = ctx; + set_disjoint!(&dir.tx_intra, lw_lh as i8); // TODO check unwrap is optimized out - case.set_disjoint(&dir.tx, TxfmSize::from_repr(lw_lh as _).unwrap()); - case.set_disjoint(&dir.mode, y_mode_nofilt); - case.set_disjoint(&dir.pal_sz, pal_sz[0]); - case.set_disjoint(&dir.seg_pred, seg_pred.into()); - case.set_disjoint(&dir.skip_mode, 0); - case.set_disjoint(&dir.intra, 1); - case.set_disjoint(&dir.skip, b.skip); + set_disjoint!(&dir.tx, TxfmSize::from_repr(lw_lh as _).unwrap()); + set_disjoint!(&dir.mode, y_mode_nofilt); + set_disjoint!(&dir.pal_sz, pal_sz[0]); + set_disjoint!(&dir.seg_pred, seg_pred.into()); + set_disjoint!(&dir.skip_mode, 0); + set_disjoint!(&dir.intra, 1); + set_disjoint!(&dir.skip, b.skip); // see aomedia bug 2183 for why we use luma coordinates here - case.set( + set!( &mut t.pal_sz_uv[dir_index], - if has_chroma { pal_sz[1] } else { 0 }, + if has_chroma { pal_sz[1] } else { 0 } ); if is_inter_or_switch { - case.set_disjoint(&dir.comp_type, None); - case.set_disjoint(&dir.r#ref[0], -1); - case.set_disjoint(&dir.r#ref[1], -1); - case.set_disjoint(&dir.filter[0], Rav1dFilterMode::N_SWITCHABLE_FILTERS); - case.set_disjoint(&dir.filter[1], Rav1dFilterMode::N_SWITCHABLE_FILTERS); + set_disjoint!(&dir.comp_type, None); + set_disjoint!(&dir.r#ref[0], -1); + set_disjoint!(&dir.r#ref[1], -1); + set_disjoint!(&dir.filter[0], Rav1dFilterMode::N_SWITCHABLE_FILTERS); + set_disjoint!(&dir.filter[1], Rav1dFilterMode::N_SWITCHABLE_FILTERS); } - }, + } ); if pal_sz[0] != 0 { (bd_fn.copy_pal_block_y)(t, f, bx4 as usize, by4 as usize, bw4 as usize, bh4 as usize); } if has_chroma { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [cbh4 as usize, cbw4 as usize], - [cby4 as usize, cbx4 as usize], - |case, dir| { - case.set_disjoint(&dir.uvmode, uv_mode); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [cbh4 as usize, cbw4 as usize], + offset = [cby4 as usize, cbx4 as usize], + { + set_disjoint!(&dir.uvmode, uv_mode); + } ); if pal_sz[1] != 0 { (bd_fn.copy_pal_block_uv)( @@ -2179,30 +2191,33 @@ fn decode_b( splat_intrabc_mv(c, t, &f.rf, bs, r#ref, bw4 as usize, bh4 as usize); - CaseSet::<32, false>::many( - [(&t.l, 1), (&f.a[t.a], 0)], - [bh4 as usize, bw4 as usize], - [by4 as usize, bx4 as usize], - |case, (dir, dir_index)| { - case.set_disjoint(&dir.tx_intra, b_dim[2 + dir_index] as i8); - case.set_disjoint(&dir.mode, DC_PRED); - case.set_disjoint(&dir.pal_sz, 0); + case_set!( + up_to = 32, + ctx = [(&t.l, 1), (&f.a[t.a], 0)], + len = [bh4 as usize, bw4 as usize], + offfset = [by4 as usize, bx4 as usize], + { + let (dir, dir_index) = ctx; + set_disjoint!(&dir.tx_intra, b_dim[2 + dir_index] as i8); + set_disjoint!(&dir.mode, DC_PRED); + set_disjoint!(&dir.pal_sz, 0); // see aomedia bug 2183 for why this is outside `if has_chroma {}` - case.set(&mut t.pal_sz_uv[dir_index], 0); - case.set_disjoint(&dir.seg_pred, seg_pred.into()); - case.set_disjoint(&dir.skip_mode, 0); - case.set_disjoint(&dir.intra, 0); - case.set_disjoint(&dir.skip, b.skip); - }, + set!(&mut t.pal_sz_uv[dir_index], 0); + set_disjoint!(&dir.seg_pred, seg_pred.into()); + set_disjoint!(&dir.skip_mode, 0); + set_disjoint!(&dir.intra, 0); + set_disjoint!(&dir.skip, b.skip); + } ); if has_chroma { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [cbh4 as usize, cbw4 as usize], - [cby4 as usize, cbx4 as usize], - |case, dir| { - case.set_disjoint(&dir.uvmode, DC_PRED); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [cbh4 as usize, cbw4 as usize], + offset = [cby4 as usize, cbx4 as usize], + { + set_disjoint!(&dir.uvmode, DC_PRED); + } ); } } else { @@ -3132,36 +3147,39 @@ fn decode_b( splat_oneref_mv(c, t, &f.rf, bs, &inter, bw4 as usize, bh4 as usize); } - CaseSet::<32, false>::many( - [(&t.l, 1), (&f.a[t.a], 0)], - [bh4 as usize, bw4 as usize], - [by4 as usize, bx4 as usize], - |case, (dir, dir_index)| { - case.set_disjoint(&dir.seg_pred, seg_pred.into()); - case.set_disjoint(&dir.skip_mode, b.skip_mode); - case.set_disjoint(&dir.intra, 0); - case.set_disjoint(&dir.skip, b.skip); - case.set_disjoint(&dir.pal_sz, 0); + case_set!( + up_to = 32, + ctx = [(&t.l, 1), (&f.a[t.a], 0)], + len = [bh4 as usize, bw4 as usize], + offset = [by4 as usize, bx4 as usize], + { + let (dir, dir_index) = ctx; + set_disjoint!(&dir.seg_pred, seg_pred.into()); + set_disjoint!(&dir.skip_mode, b.skip_mode); + set_disjoint!(&dir.intra, 0); + set_disjoint!(&dir.skip, b.skip); + set_disjoint!(&dir.pal_sz, 0); // see aomedia bug 2183 for why this is outside if (has_chroma) - case.set(&mut t.pal_sz_uv[dir_index], 0); - case.set_disjoint(&dir.tx_intra, b_dim[2 + dir_index] as i8); - case.set_disjoint(&dir.comp_type, comp_type); - case.set_disjoint(&dir.filter[0], filter[0]); - case.set_disjoint(&dir.filter[1], filter[1]); - case.set_disjoint(&dir.mode, inter_mode); - case.set_disjoint(&dir.r#ref[0], r#ref[0]); - case.set_disjoint(&dir.r#ref[1], r#ref[1]); - }, + set!(&mut t.pal_sz_uv[dir_index], 0); + set_disjoint!(&dir.tx_intra, b_dim[2 + dir_index] as i8); + set_disjoint!(&dir.comp_type, comp_type); + set_disjoint!(&dir.filter[0], filter[0]); + set_disjoint!(&dir.filter[1], filter[1]); + set_disjoint!(&dir.mode, inter_mode); + set_disjoint!(&dir.r#ref[0], r#ref[0]); + set_disjoint!(&dir.r#ref[1], r#ref[1]); + } ); if has_chroma { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [cbh4 as usize, cbw4 as usize], - [cby4 as usize, cbx4 as usize], - |case, dir| { - case.set_disjoint(&dir.uvmode, DC_PRED); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [cbh4 as usize, cbw4 as usize], + offset = [cby4 as usize, cbx4 as usize], + { + set_disjoint!(&dir.uvmode, DC_PRED); + } ); } } @@ -3174,10 +3192,11 @@ fn decode_b( let b4_stride = usize::try_from(f.b4_stride).unwrap(); let cur_segmap = &f.cur_segmap.as_ref().unwrap().inner; let offset = by * b4_stride + bx; - CaseSet::<32, false>::one((), bw4, 0, |case, ()| { + case_set!(up_to = 32, len = bw4, case_offset = 0, { for i in 0..bh4 { let i = offset + i * b4_stride; - case.set(&mut cur_segmap.index_mut((i.., ..bw4)), b.seg_id); + let buf = &mut cur_segmap.index_mut((i.., ..bw4)); + set!(buf, b.seg_id); } }); } @@ -3792,16 +3811,18 @@ fn decode_sb( if matches!(pass, FrameThreadPassState::First(_)) && (bp != BlockPartition::Split || bl == BlockLevel::Bl8x8) { - CaseSet::<16, false>::many( - [(&f.a[t.a], 0), (&t.l, 1)], - [hsz as usize; 2], - [bx8 as usize, by8 as usize], - |case, (dir, dir_index)| { - case.set_disjoint( + case_set!( + up_to = 16, + ctx = [(&f.a[t.a], 0), (&t.l, 1)], + len = [hsz as usize, hsz as usize], + offset = [bx8 as usize, by8 as usize], + { + let (dir, dir_index) = ctx; + set_disjoint!( &dir.partition, - dav1d_al_part_ctx[dir_index][bl as usize][bp as usize], + dav1d_al_part_ctx[dir_index][bl as usize][bp as usize] ); - }, + } ); } diff --git a/src/disjoint_mut.rs b/src/disjoint_mut.rs index d07b33cec..0808acb4b 100644 --- a/src/disjoint_mut.rs +++ b/src/disjoint_mut.rs @@ -303,6 +303,47 @@ impl DisjointMut { DisjointMutGuard::new(self, slice, bounds) } + /// Mutably borrow a slice or element without bounds checking. + /// + /// This mutable borrow may be unchecked and callers must ensure that no + /// other borrows from this collection overlap with the mutably borrowed + /// region for the lifetime of that mutable borrow. + /// + /// # Safety + /// + /// This method requires correct usage alongside other calls to [`index`] + /// and [`index_mut`]. Caller must ensure that no elements of the resulting + /// borrowed slice or element are concurrently borrowed (immutably or + /// mutably) at all during the lifetime of the returned mutable borrow. This + /// is checked in debug builds, but checks are disabled in release builds + /// for performance. We also require that the referenced data must be plain + /// data and not contain any pointers or references to avoid other potential + /// memory safety issues due to racy access. + /// + /// The index provided must be within the bounds of the collection. + /// + /// [`index`]: DisjointMut::index + /// [`index_mut`]: DisjointMut::index_mut + #[inline] // Inline to see bounds checks in order to potentially elide them. + #[cfg_attr(debug_assertions, track_caller)] + pub unsafe fn index_mut_unchecked<'a, I>( + &'a self, + index: I, + ) -> DisjointMutGuard<'a, T, I::Output> + where + I: Into + Clone, + I: DisjointMutIndex<[::Target]>, + { + let bounds = index.clone().into(); + // SAFETY: The safety preconditions of `index` and `index_mut` imply + // that the indexed region we are mutably borrowing is not concurrently + // borrowed and will not be borrowed during the lifetime of the returned + // reference. The index is assumed to be within the bounds of the slice + // by the safety preconditions of this method. + let slice = unsafe { &mut *index.get_mut_unchecked(self.as_mut_slice()) }; + DisjointMutGuard::new(self, slice, bounds) + } + /// Immutably borrow a slice or element. /// /// This immutable borrow may be unchecked and callers must ensure that no @@ -503,6 +544,22 @@ pub trait DisjointMutIndex { /// `slice` must be a valid, dereferencable pointer that this function may /// dereference immutably. unsafe fn get_mut(self, slice: *mut T) -> *mut Self::Output; + + /// Returns a mutable pointer to the output at this indexed location without + /// bounds checking. The `T` pointer must be valid to dereference to obtain + /// the slice length. + /// + /// To implement, `T` should be a slice type that `Self` is a valid index + /// into. + /// + /// This is a stable equivalent to + /// [`std::slice::SliceIndex::get_unchecked_mut`]. + /// + /// # Safety + /// + /// `slice` must be a valid, dereferencable pointer that this function may + /// dereference immutably. The index must be within the bounds of the slice. + unsafe fn get_mut_unchecked(self, slice: *mut T) -> *mut Self::Output; } pub trait TranslateRange { @@ -687,6 +744,15 @@ impl DisjointMutIndex<[T]> for usize { out_of_bounds(index, len); } } + + #[inline] // Inline to see bounds checks in order to potentially elide them. + #[cfg_attr(debug_assertions, track_caller)] + unsafe fn get_mut_unchecked(self, slice: *mut [T]) -> *mut Self::Output { + let index = self; + // SAFETY: Method preconditions require an in-bounds index and that + // `slice` is a valid pointer into an allocation of sufficient length. + unsafe { (slice as *mut T).add(index) } + } } impl DisjointMutIndex<[T]> for I @@ -721,6 +787,17 @@ where out_of_bounds(start, end, len); } } + + #[inline] // Inline to see bounds checks in order to potentially elide them. + #[cfg_attr(debug_assertions, track_caller)] + unsafe fn get_mut_unchecked(self, slice: *mut [T]) -> *mut Self::Output { + let len = slice.len(); + let Range { start, end } = self.to_range(len); + // SAFETY: Method preconditions require an in-bounds index and that + // `slice` is a valid pointer into an allocation of sufficient length. + let data = unsafe { (slice as *mut T).add(start) }; + ptr::slice_from_raw_parts_mut(data, end - start) + } } #[cfg(not(debug_assertions))] diff --git a/src/lf_mask.rs b/src/lf_mask.rs index 5224871c6..5a4fd4bd4 100644 --- a/src/lf_mask.rs +++ b/src/lf_mask.rs @@ -6,7 +6,8 @@ use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::include::dav1d::headers::Rav1dRestorationType; use crate::src::align::Align16; use crate::src::align::ArrayDefault; -use crate::src::ctx::CaseSet; +use crate::src::ctx::case_set; +use crate::src::ctx::case_set_with_default; use crate::src::disjoint_mut::DisjointMut; use crate::src::internal::Bxy; use crate::src::levels::BlockSize; @@ -136,15 +137,15 @@ fn decomp_tx( let lh = cmp::min(2, t_dim.lh); debug_assert!(t_dim.w == 1 << t_dim.lw && t_dim.w <= 16); - CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| { + case_set!(up_to = 16, len = t_dim.w as usize, offset = x0, { for y in 0..t_dim.h as usize { - case.set(&mut txa[0][0][y0 + y], MaybeUninit::new(lw)); - case.set(&mut txa[1][0][y0 + y], MaybeUninit::new(lh)); + set!(&mut txa[0][0][y0 + y], MaybeUninit::new(lw)); + set!(&mut txa[1][0][y0 + y], MaybeUninit::new(lh)); txa[0][1][y0 + y][x0].write(t_dim.w); } }); - CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| { - case.set(&mut txa[1][1][y0], MaybeUninit::new(t_dim.h)); + case_set!(up_to = 16, len = t_dim.w as usize, offset = x0, { + set!(&mut txa[1][1][y0], MaybeUninit::new(t_dim.h)); }); }; } @@ -328,13 +329,15 @@ fn mask_edges_intra( } } - CaseSet::<32, true>::many( - [(a, thl4c), (l, twl4c)], - [w4 as usize, h4 as usize], - [0, 0], - |case, (dir, tl4c)| { - case.set(dir, tl4c); - }, + case_set_with_default!( + up_to = 32, + ctx = [(a, thl4c), (l, twl4c)], + len = [w4 as usize, h4 as usize], + offset = [0, 0], + { + let (ref mut dir, tl4c) = ctx; + set!(dir, tl4c); + } ); } @@ -411,13 +414,15 @@ fn mask_edges_chroma( } } - CaseSet::<32, true>::many( - [(a, thl4c), (l, twl4c)], - [cw4 as usize, ch4 as usize], - [0, 0], - |case, (dir, tl4c)| { - case.set(dir, tl4c); - }, + case_set_with_default!( + up_to = 32, + ctx = [(a, thl4c), (l, twl4c)], + len = [cw4 as usize, ch4 as usize], + offset = [0, 0], + { + let (ref mut dir, tl4c) = ctx; + set!(dir, tl4c); + } ); } diff --git a/src/recon.rs b/src/recon.rs index f7c34e6de..3c9db7d51 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -20,7 +20,8 @@ use crate::include::dav1d::picture::Rav1dPictureDataComponent; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; use crate::src::assume::assume; use crate::src::cdef_apply::rav1d_cdef_brow; -use crate::src::ctx::CaseSet; +use crate::src::ctx::case_set; +use crate::src::ctx::case_set_with_default; use crate::src::env::get_uv_inter_txtp; use crate::src::in_range::InRange; use crate::src::internal::Bxy; @@ -1395,22 +1396,23 @@ fn read_coef_tree( ytx, txtp, eob, ts_c.msac.rng, ); } - CaseSet::<16, true>::many( - [&t.l.lcoef, &f.a[t.a].lcoef], - [ + case_set_with_default!( + up_to = 16, + dir = [&t.l, &f.a[t.a]], + len = [ cmp::min(txh as c_int, f.bh - t.b.y) as usize, cmp::min(txw as c_int, f.bw - t.b.x) as usize, ], - [by4, bx4], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + offset = [by4 as usize, bx4 as usize], + { + set_disjoint!(&dir.lcoef, cf_ctx); + } ); let txtp_map = &mut t.scratch.inter_intra_mut().ac_txtp_map.txtp_map_mut()[by4 * 32 + bx4..]; - CaseSet::<16, false>::one((), txw as usize, 0, |case, ()| { + case_set!(up_to = 16, len = txw as usize, offset = 0, { for txtp_map in txtp_map.chunks_mut(32).take(txh as usize) { - case.set(txtp_map, txtp); + set!(txtp_map, txtp); } }); if t.frame_thread.pass == 1 { @@ -1475,24 +1477,26 @@ pub(crate) fn rav1d_read_coef_blocks( && (bh4 > ss_ver || t.b.y & 1 != 0); if b.skip != 0 { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [bh4 as usize, bw4 as usize], - [by4, bx4], - |case, dir| { - case.set_disjoint(&dir.lcoef, 0x40); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [bh4 as usize, bw4 as usize], + offset = [by4, bx4], + { + set_disjoint!(&dir.lcoef, 0x40); + } ); if has_chroma { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [cbh4 as usize, cbw4 as usize], - [cby4, cbx4], - |case, dir| { + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [cbh4 as usize, cbw4 as usize], + offset = [cby4, cbx4], + { for ccoef in &dir.ccoef { - case.set_disjoint(ccoef, 0x40) + set_disjoint!(ccoef, 0x40) } - }, + } ); } return; @@ -1581,16 +1585,17 @@ pub(crate) fn rav1d_read_coef_blocks( let cbi_idx = ts.frame_thread[1].cbi_idx.get_update(|i| i + 1); f.frame_thread.cbi[cbi_idx as usize] .set(CodedBlockInfo::new(eob as i16, txtp)); - CaseSet::<16, true>::many( - [&t.l.lcoef, &f.a[t.a].lcoef], - [ + case_set_with_default!( + up_to = 16, + dir = [&t.l, &f.a[t.a]], + len = [ cmp::min(t_dim.h as i32, f.bh - t.b.y) as usize, cmp::min(t_dim.w as i32, f.bw - t.b.x) as usize, ], - [by4 + y as usize, bx4 + x as usize], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + offset = [by4 + y as usize, bx4 + x as usize], + { + set_disjoint!(&dir.lcoef, cf_ctx); + } ); } } @@ -1665,9 +1670,10 @@ pub(crate) fn rav1d_read_coef_blocks( ts.frame_thread[1] .cf .set(cf_idx + uv_t_dim.w as u32 * uv_t_dim.h as u32 * 16); - CaseSet::<16, true>::many( - [l_ccoef, a_ccoef], - [ + case_set_with_default!( + up_to = 16, + dir = [l_ccoef, a_ccoef], + len = [ cmp::min( uv_t_dim.h as i32, f.bh - t.b.y + ss_ver as c_int >> ss_ver, @@ -1677,10 +1683,10 @@ pub(crate) fn rav1d_read_coef_blocks( f.bw - t.b.x + ss_hor as c_int >> ss_hor, ) as usize, ], - [cby4 + y as usize, cbx4 as usize + x as usize], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + offset = [cby4 + y as usize, cbx4 as usize + x as usize], + { + set_disjoint!(dir, cf_ctx); + } ); x += uv_t_dim.w; t.b.x += (uv_t_dim.w as c_int) << ss_hor; @@ -2323,16 +2329,17 @@ pub(crate) fn rav1d_recon_b_intra( ts_c.as_deref().unwrap().msac.rng, ); } - CaseSet::<16, true>::many( - [&t.l, &f.a[t.a]], - [ + case_set_with_default!( + up_to = 16, + dir = [&t.l, &f.a[t.a]], + len = [ cmp::min(t_dim.h as i32, f.bh - t.b.y) as usize, cmp::min(t_dim.w as i32, f.bw - t.b.x) as usize, ], - [(by4 + y) as usize, (bx4 + x) as usize], - |case, dir| { - case.set_disjoint(&dir.lcoef, cf_ctx); - }, + offset = [(by4 + y) as usize, (bx4 + x) as usize], + { + set_disjoint!(&dir.lcoef, cf_ctx); + } ); } if eob >= 0 { @@ -2357,13 +2364,14 @@ pub(crate) fn rav1d_recon_b_intra( } } } else if t.frame_thread.pass == 0 { - CaseSet::<16, false>::many( - [&t.l, &f.a[t.a]], - [t_dim.h as usize, t_dim.w as usize], - [(by4 + y) as usize, (bx4 + x) as usize], - |case, dir| { - case.set_disjoint(&dir.lcoef, 0x40); - }, + case_set!( + up_to = 16, + dir = [&t.l, &f.a[t.a]], + len = [t_dim.h as usize, t_dim.w as usize], + offset = [(by4 + y) as usize, (bx4 + x) as usize], + { + set_disjoint!(&dir.lcoef, 0x40); + } ); } y_dst += 4 * t_dim.w as usize; @@ -2698,18 +2706,19 @@ pub(crate) fn rav1d_recon_b_intra( cbx4, ); } - CaseSet::<16, true>::many( - [l_ccoef, a_ccoef], - [ + case_set_with_default!( + up_to = 16, + dir = [l_ccoef, a_ccoef], + len = [ cmp::min(uv_t_dim.h as i32, f.bh - t.b.y + ss_ver >> ss_ver) as usize, cmp::min(uv_t_dim.w as i32, f.bw - t.b.x + ss_hor >> ss_hor) as usize, ], - [(cby4 + y) as usize, (cbx4 + x) as usize], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + offset = [(cby4 + y) as usize, (cbx4 + x) as usize], + { + set_disjoint!(dir, cf_ctx); + } ); } if eob >= 0 { @@ -2734,13 +2743,14 @@ pub(crate) fn rav1d_recon_b_intra( } } } else if t.frame_thread.pass == 0 { - CaseSet::<16, false>::many( - [&t.l, &f.a[t.a]], - [uv_t_dim.h as usize, uv_t_dim.w as usize], - [(cby4 + y) as usize, (cbx4 + x) as usize], - |case, dir| { - case.set_disjoint(&dir.ccoef[pl], 0x40); - }, + case_set!( + up_to = 16, + dir = [&t.l, &f.a[t.a]], + len = [uv_t_dim.h as usize, uv_t_dim.w as usize], + offset = [(cby4 + y) as usize, (cbx4 + x) as usize], + { + set_disjoint!(&dir.ccoef[pl], 0x40); + } ); } uv_dst += uv_t_dim.w as usize * 4; @@ -3445,24 +3455,26 @@ pub(crate) fn rav1d_recon_b_inter( if b.skip != 0 { // reset coef contexts - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [bh4 as usize, bw4 as usize], - [by4 as usize, bx4 as usize], - |case, dir| { - case.set_disjoint(&dir.lcoef, 0x40); - }, + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [bh4 as usize, bw4 as usize], + offset = [by4 as usize, bx4 as usize], + { + set_disjoint!(&dir.lcoef, 0x40); + } ); if has_chroma { - CaseSet::<32, false>::many( - [&t.l, &f.a[t.a]], - [cbh4 as usize, cbw4 as usize], - [cby4 as usize, cbx4 as usize], - |case, dir| { + case_set!( + up_to = 32, + dir = [&t.l, &f.a[t.a]], + len = [cbh4 as usize, cbw4 as usize], + offset = [cby4 as usize, cbx4 as usize], + { for ccoef in &dir.ccoef { - case.set_disjoint(ccoef, 0x40); + set_disjoint!(ccoef, 0x40); } - }, + } ); } return Ok(()); @@ -3579,18 +3591,19 @@ pub(crate) fn rav1d_recon_b_inter( ts_c.as_deref().unwrap().msac.rng, ); } - CaseSet::<16, true>::many( - [l_ccoef, a_ccoef], - [ + case_set_with_default!( + up_to = 16, + dir = [l_ccoef, a_ccoef], + len = [ cmp::min(uvtx.h as i32, f.bh - t.b.y + ss_ver >> ss_ver) as usize, cmp::min(uvtx.w as i32, f.bw - t.b.x + ss_hor >> ss_hor) as usize, ], - [(cby4 + y) as usize, (cbx4 + x) as usize], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + offset = [(cby4 + y) as usize, (cbx4 + x) as usize], + { + set_disjoint!(dir, cf_ctx); + } ); } if eob >= 0 {