diff --git a/src/recon.rs b/src/recon.rs index 8833022c9..16fd73477 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -83,7 +83,9 @@ use crate::src::tables::dav1d_lo_ctx_offsets; use crate::src::tables::dav1d_skip_ctx; use crate::src::tables::dav1d_tx_type_class; use crate::src::tables::dav1d_tx_types_per_set; +use crate::src::tables::dav1d_txfm_dimension; use crate::src::tables::dav1d_txfm_dimensions; +use crate::src::tables::dav1d_txfm_size; use crate::src::tables::dav1d_txtp_from_uvmode; use crate::src::tables::TxfmInfo; use crate::src::wedge::dav1d_ii_masks; @@ -263,15 +265,15 @@ impl_MergeInt!(u32, u16); impl_MergeInt!(u64, u32); impl_MergeInt!(u128, u64); -#[inline] -fn get_skip_ctx( - t_dim: &TxfmInfo, +#[inline(always)] +fn get_skip_ctx( bs: BlockSize, a: &[u8], l: &[u8], chroma: bool, layout: Rav1dPixelLayout, ) -> InRange { + let t_dim = const { dav1d_txfm_dimension::() }; let b_dim = bs.dimensions(); let skip_ctx = if chroma { let ss_ver = layout == Rav1dPixelLayout::I420; @@ -341,8 +343,9 @@ fn get_skip_ctx( InRange::new(skip_ctx).unwrap() } -#[inline] -fn get_dc_sign_ctx(tx: TxfmSize, a: &[u8], l: &[u8]) -> c_uint { +#[inline(always)] +fn get_dc_sign_ctx(a: &[u8], l: &[u8]) -> c_uint { + let tx = const { dav1d_txfm_size::() }; let mask = 0xc0c0c0c0c0c0c0c0 as u64; let mul = 0x101010101010101 as u64; @@ -492,7 +495,6 @@ fn get_lo_ctx( let offset; match ctx_offsets { Some(ctx_offsets) => { - level(2, 1); // Bounds check all at once. mag = level(0, 1) + level(1, 0); debug_assert_matches!(tx_class, TxClass::TwoD); mag += level(1, 1); @@ -502,7 +504,6 @@ fn get_lo_ctx( } None => { debug_assert_matches!(tx_class, TxClass::H | TxClass::V); - level(1, 4); // Bounds check all at once. mag = level(0, 1) + level(1, 0); mag += level(0, 2); *hi_mag = mag; @@ -518,6 +519,8 @@ fn get_lo_ctx( } } +#[rustfmt::skip] +#[inline(always)] fn decode_coefs( f: &Rav1dFrameData, ts: usize, @@ -535,6 +538,51 @@ fn decode_coefs( txtp: &mut TxfmType, res_ctx: &mut u8, ) -> c_int { + // We make the `TxfmSize` a const so the optimizer sees we don't need memory reads to access the + // `TxfmInfo` dimensions. + use TxfmSize::*; + match tx { + S4x4 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + S8x8 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + S16x16 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + S32x32 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + S64x64 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R4x8 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R8x4 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R8x16 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R16x8 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R16x32 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R32x16 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R32x64 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R64x32 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R4x16 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R16x4 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R8x32 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R32x8 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R16x64 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + R64x16 => decode_coefs_inner::(f, ts, ts_c, dbg_block_info, scratch, t_cf, a, l, tx, bs, b, plane, cf, txtp, res_ctx), + } +} + +#[inline(never)] +fn decode_coefs_inner( + f: &Rav1dFrameData, + ts: usize, + ts_c: &mut Rav1dTileStateContext, + dbg_block_info: bool, + scratch: &mut TaskContextScratch, + t_cf: &mut Cf, + a: &mut [u8], + l: &mut [u8], + tx: TxfmSize, + bs: BlockSize, + b: &Av1Block, + plane: usize, + cf: CfSelect, + txtp: &mut TxfmType, + res_ctx: &mut u8, +) -> c_int { + let t_dim = const { dav1d_txfm_dimension::() }; let dc_sign_ctx; let dc_sign; let mut dc_dq; @@ -542,7 +590,6 @@ fn decode_coefs( let chroma = plane != 0; let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); let lossless = frame_hdr.segmentation.lossless[b.seg_id.get()]; - let t_dim = &dav1d_txfm_dimensions[tx as usize]; let dbg = dbg_block_info && plane != 0 && false; if dbg { @@ -550,7 +597,7 @@ fn decode_coefs( } // does this block have any non-zero coefficients - let sctx = get_skip_ctx(t_dim, bs, a, l, chroma, f.cur.p.layout); + let sctx = get_skip_ctx::(bs, a, l, chroma, f.cur.p.layout); let all_skip = rav1d_msac_decode_bool_adapt( &mut ts_c.msac, &mut ts_c.cdf.coef.skip[t_dim.ctx as usize][sctx.get() as usize], @@ -578,7 +625,7 @@ fn decode_coefs( Inter(_) if t_dim.max >= TxfmSize::S64x64 as _ => DCT_DCT, Intra(intra) if chroma => dav1d_txtp_from_uvmode[intra.uv_mode as usize], // inferred from either the luma txtp (inter) or a LUT (intra) - Inter(_) if chroma => get_uv_inter_txtp(t_dim, *txtp), + Inter(_) if chroma => get_uv_inter_txtp(&t_dim, *txtp), // In libaom, lossless is checked by a literal qidx == 0, but not all // such blocks are actually lossless. The remainder gets an implicit // transform type (for luma) @@ -760,7 +807,7 @@ fn decode_coefs( let mut rc; let mut dc_tok; - #[inline] + #[inline(always)] fn decode_coefs_class( ts_c: &mut Rav1dTileStateContext, t_dim: &TxfmInfo, @@ -1013,13 +1060,13 @@ fn decode_coefs( let cf = &mut cf; (rc, dc_tok) = match tx_class { TxClass::TwoD => decode_coefs_class::<{ TxClass::TwoD as _ }, BD>( - ts_c, t_dim, chroma, scratch, eob, tx, dbg, cf, + ts_c, &t_dim, chroma, scratch, eob, tx, dbg, cf, ), TxClass::H => decode_coefs_class::<{ TxClass::H as _ }, BD>( - ts_c, t_dim, chroma, scratch, eob, tx, dbg, cf, + ts_c, &t_dim, chroma, scratch, eob, tx, dbg, cf, ), TxClass::V => decode_coefs_class::<{ TxClass::V as _ }, BD>( - ts_c, t_dim, chroma, scratch, eob, tx, dbg, cf, + ts_c, &t_dim, chroma, scratch, eob, tx, dbg, cf, ), }; } else { @@ -1084,7 +1131,7 @@ fn decode_coefs( None => Ac::NoQm, }); } else { - dc_sign_ctx = get_dc_sign_ctx(tx, a, l) as c_int; + dc_sign_ctx = get_dc_sign_ctx::(a, l) as c_int; let dc_sign_cdf = &mut ts_c.cdf.coef.dc_sign[chroma][dc_sign_ctx as usize]; dc_sign = rav1d_msac_decode_bool_adapt(&mut ts_c.msac, dc_sign_cdf) as c_int; if dbg { diff --git a/src/tables.rs b/src/tables.rs index 9b7190e9f..f6b552e53 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -192,10 +192,17 @@ impl BlockSize { } } -pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { +pub const fn dav1d_txfm_size() -> TxfmSize { + let Some(size) = TxfmSize::from_repr(TX) else { + panic!("invalid `TxfmSize` discriminant"); + }; + size +} + +pub const fn dav1d_txfm_dimension() -> TxfmInfo { use TxfmSize::*; - [ - TxfmInfo { + match dav1d_txfm_size::() { + S4x4 => TxfmInfo { w: 1, h: 1, lw: 0, @@ -205,7 +212,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: DefaultValue::DEFAULT, ctx: 0, }, - TxfmInfo { + S8x8 => TxfmInfo { w: 2, h: 2, lw: 1, @@ -215,7 +222,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S4x4, ctx: 1, }, - TxfmInfo { + S16x16 => TxfmInfo { w: 4, h: 4, lw: 2, @@ -225,7 +232,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S8x8, ctx: 2, }, - TxfmInfo { + S32x32 => TxfmInfo { w: 8, h: 8, lw: 3, @@ -235,7 +242,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S16x16, ctx: 3, }, - TxfmInfo { + S64x64 => TxfmInfo { w: 16, h: 16, lw: 4, @@ -245,7 +252,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S32x32, ctx: 4, }, - TxfmInfo { + R4x8 => TxfmInfo { w: 1, h: 2, lw: 0, @@ -255,7 +262,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S4x4, ctx: 1, }, - TxfmInfo { + R8x4 => TxfmInfo { w: 2, h: 1, lw: 1, @@ -265,7 +272,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S4x4, ctx: 1, }, - TxfmInfo { + R8x16 => TxfmInfo { w: 2, h: 4, lw: 1, @@ -275,7 +282,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S8x8, ctx: 2, }, - TxfmInfo { + R16x8 => TxfmInfo { w: 4, h: 2, lw: 2, @@ -285,7 +292,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S8x8, ctx: 2, }, - TxfmInfo { + R16x32 => TxfmInfo { w: 4, h: 8, lw: 2, @@ -295,7 +302,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S16x16, ctx: 3, }, - TxfmInfo { + R32x16 => TxfmInfo { w: 8, h: 4, lw: 3, @@ -305,7 +312,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S16x16, ctx: 3, }, - TxfmInfo { + R32x64 => TxfmInfo { w: 8, h: 16, lw: 3, @@ -315,7 +322,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S32x32, ctx: 4, }, - TxfmInfo { + R64x32 => TxfmInfo { w: 16, h: 8, lw: 4, @@ -325,7 +332,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: S32x32, ctx: 4, }, - TxfmInfo { + R4x16 => TxfmInfo { w: 1, h: 4, lw: 0, @@ -335,7 +342,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: R4x8, ctx: 1, }, - TxfmInfo { + R16x4 => TxfmInfo { w: 4, h: 1, lw: 2, @@ -345,7 +352,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: R8x4, ctx: 1, }, - TxfmInfo { + R8x32 => TxfmInfo { w: 2, h: 8, lw: 1, @@ -355,7 +362,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: R8x16, ctx: 2, }, - TxfmInfo { + R32x8 => TxfmInfo { w: 8, h: 2, lw: 3, @@ -365,7 +372,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: R16x8, ctx: 2, }, - TxfmInfo { + R16x64 => TxfmInfo { w: 4, h: 16, lw: 2, @@ -375,7 +382,7 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: R16x32, ctx: 3, }, - TxfmInfo { + R64x16 => TxfmInfo { w: 16, h: 4, lw: 4, @@ -385,6 +392,31 @@ pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { sub: R32x16, ctx: 3, }, + } +} + +pub static dav1d_txfm_dimensions: [TxfmInfo; TxfmSize::COUNT] = { + use TxfmSize::*; + [ + dav1d_txfm_dimension::<{ S4x4 as _ }>(), + dav1d_txfm_dimension::<{ S8x8 as _ }>(), + dav1d_txfm_dimension::<{ S16x16 as _ }>(), + dav1d_txfm_dimension::<{ S32x32 as _ }>(), + dav1d_txfm_dimension::<{ S64x64 as _ }>(), + dav1d_txfm_dimension::<{ R4x8 as _ }>(), + dav1d_txfm_dimension::<{ R8x4 as _ }>(), + dav1d_txfm_dimension::<{ R8x16 as _ }>(), + dav1d_txfm_dimension::<{ R16x8 as _ }>(), + dav1d_txfm_dimension::<{ R16x32 as _ }>(), + dav1d_txfm_dimension::<{ R32x16 as _ }>(), + dav1d_txfm_dimension::<{ R32x64 as _ }>(), + dav1d_txfm_dimension::<{ R64x32 as _ }>(), + dav1d_txfm_dimension::<{ R4x16 as _ }>(), + dav1d_txfm_dimension::<{ R16x4 as _ }>(), + dav1d_txfm_dimension::<{ R8x32 as _ }>(), + dav1d_txfm_dimension::<{ R32x8 as _ }>(), + dav1d_txfm_dimension::<{ R16x64 as _ }>(), + dav1d_txfm_dimension::<{ R64x16 as _ }>(), ] };