diff --git a/src/complevel_estimator.rs b/src/complevel_estimator.rs index f84537c..6103e84 100644 --- a/src/complevel_estimator.rs +++ b/src/complevel_estimator.rs @@ -8,16 +8,16 @@ /// Getting the parameters correct means that the resulting diff between the deflate stream /// and the predicted deflate stream will be as small as possible. use crate::{ - hash_algorithm::{ - HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, RotatingHashTrait, ZlibNGHash, - ZlibRotatingHash, MINIZ_LEVEL1_HASH_SIZE_MASK, - }, - hash_chain::{DictionaryAddPolicy, HashChain, MAX_UPDATE_HASH_BATCH}, + hash_algorithm::HashAlgorithm, + hash_chain::DictionaryAddPolicy, + hash_chain_holder::{new_hash_chain_holder, HashChainHolderTrait}, preflate_constants, preflate_input::PreflateInput, + preflate_parameter_estimator::PreflateStrategy, preflate_parse_config::{FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS}, preflate_token::{BlockType, PreflateToken, PreflateTokenBlock, PreflateTokenReference}, skip_length_estimator::estimate_skip_length, + token_predictor::TokenPredictorParameters, }; #[derive(Default)] @@ -30,8 +30,6 @@ pub struct CompLevelInfo { pub max_dist_3_matches: u16, pub min_len: u32, pub add_policy: DictionaryAddPolicy, - pub hash_mask: u16, - pub hash_shift: u32, pub hash_algorithm: HashAlgorithm, pub good_length: u32, pub max_lazy: u32, @@ -39,64 +37,10 @@ pub struct CompLevelInfo { pub max_chain: u32, } -/// vtable for invoking the hash chain functions on specific implementation -/// of hash algorithm -trait HashChainInvoke { - fn invoke_update_hash( - &mut self, - len: u32, - input: &PreflateInput, - add_policy: DictionaryAddPolicy, - ); - - fn invoke_match_depth( - &mut self, - token: PreflateTokenReference, - window_size: u32, - input: &PreflateInput, - ) -> u32; -} - -/// holds the hashchain for a specific hash algorithm -struct HashChainHolder { - hash_chain: HashChain, -} - -impl HashChainHolder { - fn new(hash_shift: u32, hash_mask: u16, input: &PreflateInput<'_>) -> Box { - Box::new(HashChainHolder:: { - hash_chain: HashChain::::new(hash_shift, hash_mask, input), - }) - } -} - -impl HashChainInvoke for HashChainHolder { - fn invoke_update_hash( - &mut self, - len: u32, - input: &PreflateInput, - add_policy: DictionaryAddPolicy, - ) { - self.hash_chain - .update_hash_with_policy::(len, input, add_policy) - } - - fn invoke_match_depth( - &mut self, - token: PreflateTokenReference, - window_size: u32, - input: &PreflateInput, - ) -> u32 { - self.hash_chain.match_depth(&token, window_size, input) - } -} - struct CandidateInfo { hash_algorithm: HashAlgorithm, - hash_mask: u16, - hash_shift: u32, add_policy: DictionaryAddPolicy, - hash_chain: Box, + hash_chain: Box, longest_dist_at_hop_0: u32, longest_dist_at_hop_1_plus: u32, @@ -105,31 +49,31 @@ struct CandidateInfo { impl CandidateInfo { fn new( - hash_mask: u16, - hash_shift: u32, add_policy: DictionaryAddPolicy, hash_algorithm: HashAlgorithm, - input: &PreflateInput, + window_bits: u32, ) -> Self { - CandidateInfo { - hash_mask, - hash_shift, + let params = TokenPredictorParameters { + hash_algorithm, + add_policy, + matches_to_start_detected: false, + very_far_matches_detected: false, + window_bits, + strategy: PreflateStrategy::Default, + nice_length: 0, + max_token_count: 0, + zlib_compatible: false, + max_dist_3_matches: 0, + good_length: 0, + max_lazy: 0, + max_chain: 0, + min_len: 0, + }; + + Self { add_policy, hash_algorithm, - hash_chain: match hash_algorithm { - HashAlgorithm::Zlib => { - HashChainHolder::::new(hash_shift, hash_mask, input) - } - HashAlgorithm::MiniZFast => { - HashChainHolder::::new(hash_shift, hash_mask, input) - } - HashAlgorithm::Libdeflate4 => { - HashChainHolder::::new(hash_shift, hash_mask, input) - } - HashAlgorithm::ZlibNG => { - HashChainHolder::::new(hash_shift, hash_mask, input) - } - }, + hash_chain: new_hash_chain_holder(¶ms), longest_dist_at_hop_0: 0, longest_dist_at_hop_1_plus: 0, max_chain_found: 0, @@ -142,9 +86,7 @@ impl CandidateInfo { window_size: u32, input: &PreflateInput, ) -> bool { - let mdepth = self - .hash_chain - .invoke_match_depth(token, window_size, input); + let mdepth = self.hash_chain.match_depth(token, window_size, input); // remove element if the match was impossible due to matching the // the hash depth or because in fast mode we can't match partial words @@ -187,14 +129,6 @@ impl CandidateInfo { self.max_chain_found } - fn hash_mask(&self) -> u16 { - self.hash_mask - } - - fn hash_shift(&self) -> u32 { - self.hash_shift - } - fn hash_algorithm(&self) -> HashAlgorithm { self.hash_algorithm } @@ -243,39 +177,34 @@ impl<'a> CompLevelEstimatorState<'a> { let mut candidates: Vec> = Vec::new(); candidates.push(Box::new(CandidateInfo::new( - MINIZ_LEVEL1_HASH_SIZE_MASK, - 0, add_policy, HashAlgorithm::MiniZFast, - &input, + wbits, ))); for (hash_shift, hash_mask) in [(5, 32767), (4, 2047)] { candidates.push(Box::new(CandidateInfo::new( - hash_mask, - hash_shift, add_policy, - HashAlgorithm::Zlib, - &input, + HashAlgorithm::Zlib { + hash_mask, + hash_shift, + }, + wbits, ))); } // LibFlate4 candidate candidates.push(Box::new(CandidateInfo::new( - 0xffff, - 0, add_policy, HashAlgorithm::Libdeflate4, - &input, + wbits, ))); // ZlibNG candidate candidates.push(Box::new(CandidateInfo::new( - 0xffff, - 0, add_policy, HashAlgorithm::ZlibNG, - &input, + wbits, ))); CompLevelEstimatorState { @@ -291,25 +220,14 @@ impl<'a> CompLevelEstimatorState<'a> { } } - fn update_hash(&mut self, mut length: u32, override_add_policy: bool) { - while length > 0 { - let batch_len = std::cmp::min(length, MAX_UPDATE_HASH_BATCH); - - for i in &mut self.candidates { - i.hash_chain.invoke_update_hash( - batch_len, - &self.input, - if override_add_policy { - DictionaryAddPolicy::AddAll - } else { - i.add_policy - }, - ); - } - - self.input.advance(batch_len); - length -= batch_len; + fn update_hash(&mut self, length: u32, override_add_policy: bool) { + for i in &mut self.candidates { + let mut inputc = self.input.clone(); + i.hash_chain + .update_hash_with_depth(length, &mut inputc, override_add_policy); } + + self.input.advance(length); } fn check_match(&mut self, token: PreflateTokenReference) { @@ -371,8 +289,6 @@ impl<'a> CompLevelEstimatorState<'a> { let mut max_lazy = 258; let mut nice_length = 258; - let hash_mask = candidate.hash_mask(); - let hash_shift = candidate.hash_shift(); let add_policy = candidate.add_policy; let max_chain = candidate.max_chain_found() + 1; let hash_algorithm = candidate.hash_algorithm(); @@ -419,8 +335,6 @@ impl<'a> CompLevelEstimatorState<'a> { matches_to_start_detected: self.match_to_start, very_far_matches_detected: very_far_matches, max_dist_3_matches: self.longest_len_3_dist as u16, - hash_mask, - hash_shift, add_policy, good_length, max_lazy, diff --git a/src/hash_algorithm.rs b/src/hash_algorithm.rs index 70b6477..c2894b6 100644 --- a/src/hash_algorithm.rs +++ b/src/hash_algorithm.rs @@ -1,149 +1,225 @@ +use crate::hash_chain::{HashChain, HashChainNormalize, HashChainNormalizeLibflate4}; + #[derive(Debug, Copy, Clone, Eq, PartialEq, Default)] pub enum HashAlgorithm { + Zlib { + hash_mask: u16, + hash_shift: u32, + }, #[default] - Zlib, MiniZFast, Libdeflate4, ZlibNG, + RandomVector, } -pub trait RotatingHashTrait: Default + Copy + Clone { - fn hash(&self, mask: u16) -> usize; - fn append(&self, c: u8, hash_shift: u32) -> Self; - fn hash_algorithm() -> HashAlgorithm; +pub trait HashImplementation: Default + Copy + Clone { + type HashChainType: HashChain; + + fn get_hash(&self, b: &[u8]) -> usize; fn num_hash_bytes() -> u16; + fn new_hash_chain(self) -> Self::HashChainType; } -#[derive(Default, Debug, Copy, Clone)] +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] pub struct ZlibRotatingHash { - hash: u16, + pub hash_mask: u16, + pub hash_shift: u32, } -impl RotatingHashTrait for ZlibRotatingHash { - fn hash(&self, mask: u16) -> usize { - usize::from(self.hash & mask) - } - - fn append(&self, c: u8, hash_shift: u32) -> ZlibRotatingHash { - ZlibRotatingHash { - hash: (self.hash << hash_shift) ^ u16::from(c), - } - } +impl HashImplementation for ZlibRotatingHash { + type HashChainType = HashChainNormalize; - fn hash_algorithm() -> HashAlgorithm { - HashAlgorithm::Zlib + fn get_hash(&self, b: &[u8]) -> usize { + let c = u16::from(b[0]); + let c = (c << self.hash_shift) ^ u16::from(b[1]); + let c = (c << self.hash_shift) ^ u16::from(b[2]); + usize::from(c & self.hash_mask) } fn num_hash_bytes() -> u16 { 3 } + + fn new_hash_chain(self) -> Self::HashChainType { + HashChainNormalize::::new(self) + } } #[derive(Default, Copy, Clone)] -pub struct MiniZHash { - hash: u32, -} +pub struct MiniZHash {} /// Size of hash chain for fast compression mode. pub const MINIZ_LEVEL1_HASH_SIZE_MASK: u16 = 4095; -impl RotatingHashTrait for MiniZHash { - fn hash(&self, mask: u16) -> usize { - debug_assert!(mask == MINIZ_LEVEL1_HASH_SIZE_MASK); - ((self.hash ^ (self.hash >> 17)) & u32::from(MINIZ_LEVEL1_HASH_SIZE_MASK)) as usize - } +impl HashImplementation for MiniZHash { + type HashChainType = HashChainNormalize; - fn append(&self, c: u8, _hash_shift: u32) -> Self { - MiniZHash { - hash: (c as u32) << 16 | (self.hash >> 8), - } - } + fn get_hash(&self, b: &[u8]) -> usize { + let hash = u32::from(b[0]) | (u32::from(b[1]) << 8) | (u32::from(b[2]) << 16); - fn hash_algorithm() -> HashAlgorithm { - HashAlgorithm::MiniZFast + ((hash ^ (hash >> 17)) & u32::from(MINIZ_LEVEL1_HASH_SIZE_MASK)) as usize } fn num_hash_bytes() -> u16 { 3 } + + fn new_hash_chain(self) -> Self::HashChainType { + crate::hash_chain::HashChainNormalize::::new(self) + } } #[derive(Default, Copy, Clone)] -pub struct LibdeflateRotatingHash4 { - hash: u32, -} +pub struct LibdeflateRotatingHash4 {} -impl RotatingHashTrait for LibdeflateRotatingHash4 { - fn hash(&self, mask: u16) -> usize { - debug_assert!(mask == 0xffff); - (self.hash.wrapping_mul(0x1E35A7BD) >> 16) as usize - } +impl HashImplementation for LibdeflateRotatingHash4 { + type HashChainType = HashChainNormalizeLibflate4; - fn append(&self, c: u8, _hash_shift: u32) -> Self { - Self { - hash: ((c as u32) << 24) | (self.hash >> 8), - } - } + fn get_hash(&self, b: &[u8]) -> usize { + let hash = u32::from_le_bytes([b[0], b[1], b[2], b[3]]); - fn hash_algorithm() -> HashAlgorithm { - HashAlgorithm::Libdeflate4 + (hash.wrapping_mul(0x1E35A7BD) >> 16) as usize } fn num_hash_bytes() -> u16 { 4 } + + fn new_hash_chain(self) -> Self::HashChainType { + crate::hash_chain::HashChainNormalizeLibflate4::new() + } } /// This is the 3 byte version of the libdeflate hash algorithm, which is used /// as a secondary hash value to find 3 byte matches within the first 4096K if /// we fail to find any four byte matches with the primary hash. #[derive(Default, Copy, Clone)] -pub struct LibdeflateRotatingHash3 { - hash: u32, -} +pub struct LibdeflateRotatingHash3 {} -impl RotatingHashTrait for LibdeflateRotatingHash3 { - fn hash(&self, mask: u16) -> usize { - debug_assert!(mask == 0x7fff); - (self.hash.wrapping_mul(0x1E35A7BD) >> 17) as usize - } +impl HashImplementation for LibdeflateRotatingHash3 { + type HashChainType = HashChainNormalize; - fn append(&self, c: u8, _hash_shift: u32) -> Self { - Self { - hash: (c as u32) << 16 | (self.hash >> 8), - } - } + fn get_hash(&self, b: &[u8]) -> usize { + let hash = u32::from(b[0]) | (u32::from(b[1]) << 8) | (u32::from(b[2]) << 16); - fn hash_algorithm() -> HashAlgorithm { - unimplemented!(); + (hash.wrapping_mul(0x1E35A7BD) >> 17) as usize } fn num_hash_bytes() -> u16 { 3 } + + fn new_hash_chain(self) -> Self::HashChainType { + unimplemented!(); + } } #[derive(Default, Copy, Clone)] -pub struct ZlibNGHash { - hash: u32, -} +pub struct ZlibNGHash {} + +impl HashImplementation for ZlibNGHash { + type HashChainType = HashChainNormalize; + + fn get_hash(&self, b: &[u8]) -> usize { + let hash = u32::from_le_bytes([b[0], b[1], b[2], b[3]]); -impl RotatingHashTrait for ZlibNGHash { - fn hash(&self, mask: u16) -> usize { - debug_assert!(mask == 0xffff); - (self.hash.wrapping_mul(2654435761) >> 16) as usize + (hash.wrapping_mul(2654435761) >> 16) as usize } - fn append(&self, c: u8, _hash_shift: u32) -> Self { - Self { - hash: ((c as u32) << 24) | (self.hash >> 8), - } + fn num_hash_bytes() -> u16 { + 4 } - fn hash_algorithm() -> HashAlgorithm { - HashAlgorithm::ZlibNG + fn new_hash_chain(self) -> Self::HashChainType { + crate::hash_chain::HashChainNormalize::::new(self) + } +} + +/// This vector uses a lookup into a table for random values +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] +pub struct RandomVectorHash {} + +const RANDOM_VECTOR: [u16; 768] = [ + 0x499d, 0x3dc2, 0x2d07, 0x705b, 0x7a76, 0x3469, 0x59db, 0x0c58, 0x2b72, 0x412d, 0x1246, 0x2095, + 0x1c1c, 0x4726, 0x5f45, 0x2c4e, 0x7b1b, 0x1e70, 0x2743, 0x554f, 0x1334, 0x5328, 0x78c1, 0x41cc, + 0x4b2c, 0x62a5, 0x1d93, 0x4aa4, 0x64c8, 0x65f0, 0x194d, 0x1ac0, 0x3f96, 0x41df, 0x4389, 0x065b, + 0x4b74, 0x15e2, 0x0389, 0x0b7e, 0x5778, 0x5d95, 0x7ffc, 0x1e6f, 0x5465, 0x23d3, 0x01ab, 0x567e, + 0x0b3b, 0x6c2f, 0x5e4d, 0x2641, 0x03a4, 0x1214, 0x4b01, 0x48f3, 0x7ba9, 0x7009, 0x1270, 0x0e67, + 0x40e8, 0x710d, 0x6b7f, 0x1418, 0x45f6, 0x2785, 0x4725, 0x7904, 0x14a2, 0x71b8, 0x3189, 0x6ccc, + 0x4d66, 0x701e, 0x4148, 0x6c05, 0x01a8, 0x5ff1, 0x4fbb, 0x0a2a, 0x541d, 0x4378, 0x3f15, 0x3677, + 0x0d82, 0x578b, 0x345d, 0x6052, 0x0beb, 0x553d, 0x4d89, 0x1315, 0x311c, 0x3f33, 0x226d, 0x3223, + 0x478b, 0x487b, 0x5326, 0x160e, 0x05b3, 0x486d, 0x0f2f, 0x1ecc, 0x04b7, 0x01a0, 0x6f70, 0x425c, + 0x3d3f, 0x1610, 0x4211, 0x68d3, 0x3041, 0x7ddf, 0x5967, 0x36f3, 0x31a5, 0x2137, 0x4692, 0x56de, + 0x53d8, 0x4466, 0x5720, 0x6d64, 0x3421, 0x6979, 0x3151, 0x5ee6, 0x0e2f, 0x35d8, 0x30ff, 0x3070, + 0x19b1, 0x4651, 0x6b4f, 0x4cea, 0x7991, 0x4e0b, 0x2d3f, 0x3d1e, 0x09a0, 0x4bac, 0x0571, 0x079a, + 0x4380, 0x411a, 0x4012, 0x57f5, 0x0f7a, 0x5ae9, 0x1b6d, 0x6f3c, 0x3b37, 0x0b66, 0x60af, 0x17b9, + 0x77df, 0x286f, 0x14c9, 0x2274, 0x1d96, 0x67dc, 0x7801, 0x68d9, 0x0942, 0x1c06, 0x4922, 0x7a4b, + 0x1732, 0x6c5d, 0x4928, 0x3c70, 0x64fa, 0x6ce8, 0x2979, 0x163b, 0x4379, 0x64ee, 0x37d3, 0x5bf2, + 0x1725, 0x5749, 0x26aa, 0x13e7, 0x1e82, 0x2226, 0x723c, 0x4677, 0x4a6f, 0x0e39, 0x6431, 0x50f7, + 0x7ff9, 0x7b82, 0x2307, 0x7254, 0x1c17, 0x1d2c, 0x580d, 0x3b5f, 0x3e99, 0x46ee, 0x3105, 0x5d19, + 0x38bb, 0x4134, 0x21bc, 0x068a, 0x0e6b, 0x5aa7, 0x68ef, 0x2bd2, 0x71b5, 0x0db8, 0x28c5, 0x5a48, + 0x14ad, 0x1ec0, 0x2c71, 0x690c, 0x1559, 0x5638, 0x73b2, 0x26c6, 0x301b, 0x2aad, 0x256f, 0x15fd, + 0x7e60, 0x5a5a, 0x70a8, 0x70a2, 0x3c76, 0x5a00, 0x49b3, 0x0f1d, 0x7a43, 0x18d8, 0x56e1, 0x6101, + 0x3f86, 0x4ad9, 0x26b4, 0x0305, 0x388c, 0x13e2, 0x36e9, 0x35e4, 0x587c, 0x2e31, 0x5ecb, 0x2ed3, + 0x4493, 0x40a6, 0x0d5c, 0x57de, 0x5b6b, 0x656c, 0x1ca2, 0x167c, 0x65a5, 0x7597, 0x1f4f, 0x47dd, + 0x602c, 0x2169, 0x7ccb, 0x7719, 0x07a3, 0x735b, 0x1afd, 0x6315, 0x1fba, 0x36fe, 0x5961, 0x4c63, + 0x79af, 0x1126, 0x269a, 0x312f, 0x3d20, 0x1783, 0x334b, 0x44a8, 0x6580, 0x2f6b, 0x5174, 0x5daf, + 0x01b4, 0x15b8, 0x33c1, 0x5c4b, 0x302f, 0x73bf, 0x59ce, 0x0b13, 0x1c9b, 0x2e1b, 0x27f7, 0x00a7, + 0x7c7e, 0x6763, 0x202e, 0x7a6d, 0x4a1c, 0x20dd, 0x591d, 0x7edb, 0x7c3b, 0x7532, 0x1909, 0x1dd6, + 0x466a, 0x72d0, 0x2c9a, 0x79d7, 0x0fda, 0x6dc0, 0x4907, 0x0a6c, 0x3f75, 0x34cc, 0x6e42, 0x35e4, + 0x6dbb, 0x51f0, 0x2af5, 0x441f, 0x6907, 0x27d9, 0x540b, 0x7095, 0x6723, 0x66b3, 0x1f85, 0x6213, + 0x405b, 0x06ed, 0x1d8b, 0x6550, 0x2585, 0x002e, 0x3c07, 0x5208, 0x7933, 0x3897, 0x777d, 0x03db, + 0x4d9f, 0x50cc, 0x31f1, 0x3213, 0x4a70, 0x6e2f, 0x78c4, 0x5c1e, 0x391e, 0x0e49, 0x007b, 0x7c8f, + 0x55d8, 0x51b7, 0x4477, 0x61ac, 0x7eb2, 0x330e, 0x1882, 0x4d04, 0x4b59, 0x3188, 0x74f5, 0x3ebe, + 0x2a7f, 0x6b8e, 0x705b, 0x6688, 0x1cfc, 0x084d, 0x60ed, 0x1cd9, 0x5799, 0x1f59, 0x0beb, 0x6732, + 0x6640, 0x782b, 0x455f, 0x5910, 0x7066, 0x26b0, 0x26d2, 0x7e26, 0x22bd, 0x15b3, 0x634e, 0x24f0, + 0x4649, 0x282b, 0x5631, 0x4539, 0x1b49, 0x4023, 0x48b1, 0x115b, 0x6ca6, 0x5bde, 0x4f40, 0x288f, + 0x4106, 0x6f41, 0x62fe, 0x09b1, 0x7929, 0x71e0, 0x2a80, 0x2164, 0x66be, 0x3fa8, 0x094b, 0x4a09, + 0x1177, 0x355f, 0x645a, 0x2940, 0x5a2a, 0x5369, 0x7ade, 0x0a66, 0x74e8, 0x6502, 0x6cbb, 0x1971, + 0x2ba3, 0x0ab5, 0x2f4f, 0x4539, 0x150e, 0x1dc4, 0x3262, 0x04ed, 0x5df0, 0x35af, 0x5c4a, 0x4fb4, + 0x5fcd, 0x0dc7, 0x6fef, 0x266e, 0x0be6, 0x69d9, 0x5e02, 0x4650, 0x561f, 0x03e8, 0x26e5, 0x4778, + 0x6be3, 0x4375, 0x1559, 0x7786, 0x0653, 0x2a4a, 0x4825, 0x70f0, 0x56f2, 0x596f, 0x4f6b, 0x0937, + 0x4e89, 0x5390, 0x5bf9, 0x03ea, 0x1eb7, 0x1296, 0x1966, 0x77bc, 0x6d2a, 0x3cf1, 0x43a7, 0x01a3, + 0x2e0f, 0x696e, 0x5654, 0x4ba6, 0x66be, 0x6b16, 0x2c6c, 0x3db4, 0x7b52, 0x2d5f, 0x0b3c, 0x7391, + 0x25f7, 0x45bf, 0x44c7, 0x7052, 0x3da7, 0x117c, 0x0797, 0x20b9, 0x6b35, 0x61bc, 0x511a, 0x2168, + 0x7693, 0x6de2, 0x4c7c, 0x04e1, 0x234a, 0x1e36, 0x16c7, 0x2b67, 0x5c40, 0x1dd8, 0x7164, 0x77cc, + 0x0c10, 0x6789, 0x1a4b, 0x42dd, 0x5ea5, 0x545a, 0x2c55, 0x0eb7, 0x6126, 0x48b6, 0x1a5b, 0x093d, + 0x77ee, 0x75d6, 0x5e4c, 0x0153, 0x2b53, 0x5587, 0x4e6d, 0x4cff, 0x2afb, 0x37e1, 0x4f61, 0x6ff2, + 0x1758, 0x74b2, 0x0b70, 0x4146, 0x51b8, 0x51fe, 0x6fae, 0x696b, 0x0a58, 0x43d0, 0x623e, 0x57c4, + 0x07f8, 0x712c, 0x1221, 0x7378, 0x7c69, 0x7bd0, 0x00f4, 0x35de, 0x6cd7, 0x4947, 0x6344, 0x1575, + 0x67ed, 0x1bd0, 0x45f3, 0x3d2d, 0x0bd1, 0x66c8, 0x7c11, 0x47b0, 0x19bb, 0x6695, 0x6509, 0x5eed, + 0x4e6a, 0x19ac, 0x3234, 0x5dab, 0x3a2b, 0x7a79, 0x5c58, 0x2347, 0x434b, 0x32a7, 0x3eb5, 0x1a2a, + 0x02ec, 0x1f61, 0x62a7, 0x70c0, 0x228e, 0x445d, 0x5ab6, 0x401c, 0x5404, 0x41cd, 0x46a9, 0x3358, + 0x1cb1, 0x67d6, 0x3106, 0x7ae3, 0x1ea6, 0x2ad7, 0x07d5, 0x7aa5, 0x750a, 0x6601, 0x595b, 0x4867, + 0x7b8c, 0x0c0c, 0x3f99, 0x7843, 0x27ac, 0x7a3c, 0x7928, 0x20d9, 0x024f, 0x6c8f, 0x1b90, 0x1142, + 0x75c0, 0x0227, 0x1cb7, 0x4863, 0x7705, 0x553f, 0x7d44, 0x6dff, 0x5f8c, 0x3dae, 0x1984, 0x2410, + 0x757d, 0x6403, 0x567c, 0x4bda, 0x49de, 0x10e9, 0x6a0a, 0x2054, 0x5cb1, 0x534e, 0x0206, 0x7a42, + 0x66b3, 0x18f0, 0x604f, 0x1b4f, 0x2b97, 0x1a34, 0x0284, 0x5d71, 0x0642, 0x6390, 0x6d85, 0x2e2a, + 0x17d9, 0x3d3f, 0x35d6, 0x4118, 0x5700, 0x3e89, 0x6ddb, 0x0dc2, 0x6750, 0x232e, 0x566b, 0x77b6, + 0x607f, 0x31cc, 0x0c29, 0x602b, 0x50f6, 0x6ac0, 0x305c, 0x181a, 0x4c16, 0x701b, 0x7b3d, 0x20c5, + 0x3359, 0x7034, 0x1837, 0x090a, 0x5f2d, 0x5837, 0x53dd, 0x6827, 0x0afb, 0x2968, 0x5983, 0x3a36, + 0x6a3b, 0x0b8e, 0x04e4, 0x3bf7, 0x3bba, 0x2c2b, 0x084e, 0x5ad4, 0x0da4, 0x6828, 0x7332, 0x15f4, + 0x034d, 0x1c30, 0x6907, 0x6c5f, 0x07c3, 0x0154, 0x69d0, 0x6779, 0x30bc, 0x7bf6, 0x702e, 0x614c, + 0x2696, 0x76ff, 0x0463, 0x56f7, 0x5cfa, 0x6bf7, 0x6cbc, 0x57d9, 0x4d25, 0x10fb, 0x4e57, 0x3668, + 0x091c, 0x63a8, 0x1a6d, 0x60b1, 0x5675, 0x62ca, 0x5a16, 0x550e, 0x3b66, 0x1479, 0x6827, 0x1511, + 0x64e9, 0x7ee7, 0x7b8d, 0x4137, 0x1c46, 0x44e9, 0x6d7c, 0x1709, 0x646e, 0x620a, 0x497a, 0x2971, + 0x23df, 0x1451, 0x558d, 0x693c, 0x52d6, 0x27e1, 0x487d, 0x404e, 0x092b, 0x1f57, 0x33b7, 0x3748, +]; + +impl HashImplementation for RandomVectorHash { + type HashChainType = HashChainNormalize; + + fn get_hash(&self, b: &[u8]) -> usize { + (RANDOM_VECTOR[b[0] as usize] + ^ RANDOM_VECTOR[b[1] as usize + 256] + ^ RANDOM_VECTOR[b[2] as usize + 512]) as usize } fn num_hash_bytes() -> u16 { - 4 + 3 + } + + fn new_hash_chain(self) -> Self::HashChainType { + crate::hash_chain::HashChainNormalize::::new(self) } } diff --git a/src/hash_chain.rs b/src/hash_chain.rs index 3dcc7d1..53986c6 100644 --- a/src/hash_chain.rs +++ b/src/hash_chain.rs @@ -10,7 +10,7 @@ use default_boxed::DefaultBoxed; use crate::{ bit_helper::DebugHash, - hash_algorithm::{HashAlgorithm, LibdeflateRotatingHash3, RotatingHashTrait}, + hash_algorithm::{HashImplementation, LibdeflateRotatingHash3, LibdeflateRotatingHash4}, preflate_input::PreflateInput, preflate_token::PreflateTokenReference, }; @@ -31,7 +31,6 @@ pub enum DictionaryAddPolicy { /// Add only the first and last substring of a match to the dictionary that are larger than the limit AddFirstAndLast(u16), } - #[derive(Default, Copy, Clone, Eq, PartialEq, Debug)] struct InternalPosition { pos: u16, @@ -68,7 +67,7 @@ impl InternalPosition { } #[derive(DefaultBoxed)] -struct HashTable { +struct HashTable { /// Represents the head of the hash chain for a given hash value. In order /// to find additional matches, you follow the prev chain from the head. head: [InternalPosition; 65536], @@ -93,56 +92,19 @@ struct HashTable { /// with an offset of 8 to avoid confusion with the end of the chain) prev: [InternalPosition; 65536], - hash_shift: u32, - running_hash: H, - hash_mask: u16, + /// hash function used to calculate the hash + hash: H, } -impl HashTable { - fn get_head(&self, hash: H) -> InternalPosition { - let h = hash.hash(self.hash_mask); +impl HashTable { + fn get_head(&self, h: usize) -> InternalPosition { self.head[h] } - fn get_running_hash(&self) -> usize { - self.running_hash.hash(self.hash_mask) - } - - /// calculate the hash for the current byte in the input stream, which - /// consists of the running hash plus the current character - fn calculate_hash(&self, input: &PreflateInput) -> H { - self.running_hash.append( - input.cur_char(H::num_hash_bytes() as i32 - 1), - self.hash_shift, - ) - } - - /// calculate the hash for the next byte in the input stream which - /// consists of the running hash plus the next 2 characters - fn calculate_hash_next(&self, input: &PreflateInput) -> H { - self.calculate_hash(input) - .append(input.cur_char(H::num_hash_bytes() as i32), self.hash_shift) - } - - fn hash_equal(&self, a: H, b: H) -> bool { - a.hash(self.hash_mask) == b.hash(self.hash_mask) - } - fn get_node_depth(&self, node: InternalPosition) -> i32 { self.chain_depth[node.to_index()] } - fn init_running_hash(&mut self, input: &PreflateInput) { - self.running_hash = H::default(); - for i in 0..H::num_hash_bytes() - 1 { - self.update_running_hash(input.cur_char(i as i32)); - } - } - - fn update_running_hash(&mut self, b: u8) { - self.running_hash = self.running_hash.append(b, self.hash_shift); - } - fn update_chain( &mut self, chars: &[u8], @@ -158,13 +120,11 @@ impl HashTable { let last = cmp::min(length as usize, chars.len() - offset); for i in 0..last { - self.update_running_hash(chars[i + offset]); - if UPDATE_MODE == UPDATE_MODE_ALL || (UPDATE_MODE == UPDATE_MODE_FIRST && i == 0) || (UPDATE_MODE == UPDATE_MODE_FIRST_AND_LAST && (i == 0 || i == last - 1)) { - let h = self.get_running_hash(); + let h = self.hash.get_hash(&chars[i..]); if MAINTAIN_DEPTH { self.chain_depth[pos.to_index()] = @@ -183,7 +143,7 @@ impl HashTable { } fn reshift(&mut self) { - for i in 0..=usize::from(self.hash_mask) { + for i in 0..=65535 { self.head[i] = self.head[i].saturating_sub(DELTA as u16); } @@ -198,8 +158,8 @@ impl HashTable { } pub fn match_depth(&self, end_pos: InternalPosition, input: &PreflateInput) -> u32 { - let hash = self.calculate_hash(input); - let head = self.get_head(hash); + let h = self.hash.get_hash(input.cur_chars(0)); + let head = self.get_head(h); let start_depth = self.get_node_depth(head); @@ -218,47 +178,146 @@ impl HashTable { } } -pub struct HashChain { +pub trait HashChain { + fn iterate<'a>(&'a self, input: &PreflateInput, offset: u32) -> impl Iterator + 'a; + + fn update_hash( + &mut self, + length: u32, + input: &PreflateInput, + ); + + fn match_depth( + &self, + target_reference: &PreflateTokenReference, + window_size: u32, + input: &PreflateInput, + ) -> u32; + + fn checksum(&self, checksum: &mut DebugHash); + + fn update_hash_with_policy( + &mut self, + length: u32, + input: &PreflateInput, + add_policy: DictionaryAddPolicy, + ) { + match add_policy { + DictionaryAddPolicy::AddAll => { + self.update_hash::(length, input); + } + DictionaryAddPolicy::AddFirst(limit) => { + if length > limit.into() { + self.update_hash::(length, input); + } else { + self.update_hash::(length, input); + } + } + DictionaryAddPolicy::AddFirstAndLast(limit) => { + if length > limit.into() { + self.update_hash::(length, input); + } else { + self.update_hash::(length, input); + } + } + } + } +} + +/// This hash chain algorithm periodically normalizes the hash table +pub struct HashChainNormalize { hash_table: Box>, - hash_table_3_len: Option>>, total_shift: i32, } -impl HashChain { - pub fn new(hash_shift: u32, hash_mask: u16, input: &PreflateInput) -> Self { +impl HashChainNormalize { + pub fn new(hash: H) -> Self { // Important: total_shift starts at -8 since 0 indicates the end of the hash chain // so this means that all valid values will be >= 8, otherwise the very first hash // offset would be zero and so it would get missed - let mut c = HashChain { + let mut c = HashChainNormalize { total_shift: -8, hash_table: HashTable::default_boxed(), - hash_table_3_len: None, }; - c.hash_table.hash_shift = hash_shift; - c.hash_table.hash_mask = hash_mask; + c.hash_table.hash = hash; - // initialize running hash so that it has the first bytes in it to start working - c.hash_table.init_running_hash(input); + c + } +} + +impl HashChain for HashChainNormalize { + fn iterate<'a>(&'a self, input: &PreflateInput, offset: u32) -> impl Iterator + 'a { + let ref_pos = InternalPosition::from_absolute(input.pos() + offset, self.total_shift); - // Libflate4 uses a 4 byte hash to find 4 byte matches, and if it doesn't - // find anything, it uses a 3 byte hash to find 3 byte matches within the - // first 4096 bytes. - if H::hash_algorithm() == HashAlgorithm::Libdeflate4 { - let mut libdeflate3 = HashTable::::default_boxed(); + // if we have a match that needs to be inserted at the head first before + // we start walking the chain + let mut first_match = None; + + let h1 = self.hash_table.hash.get_hash(input.cur_chars(0)); + + let curr_hash; + + if offset == 0 { + curr_hash = h1; + } else { + assert_eq!(offset, 1); - libdeflate3.hash_shift = 0; // shift is hardcoded for this hash - libdeflate3.hash_mask = 0x7fff; - libdeflate3.init_running_hash(input); + // current hash is the next hash since we are starting at offset 1 + curr_hash = self.hash_table.hash.get_hash(input.cur_chars(1)); - c.hash_table_3_len = Some(libdeflate3); + // we are a lazy match, then we haven't added the last byte to the hash yet + // which is a problem if that hash should have been part of this hash chain + // (ie the same hash chain) and we have a limited number of enumerations + // throught the hash chain. + // + // In order to fix this, we see if the hashes are the same, and then add + // a distance 1 item to the iterator that we return. + if h1 == curr_hash { + first_match = Some(1); + } } - c + let mut cur_pos = self.hash_table.get_head(curr_hash); + + std::iter::from_fn(move || { + if let Some(d) = first_match { + first_match = None; + Some(d) + } else { + if cur_pos.is_valid() { + let d = ref_pos.dist(cur_pos); + cur_pos = self.hash_table.prev[cur_pos.to_index()]; + Some(d) + } else { + None + } + } + }) + } + + fn match_depth( + &self, + target_reference: &PreflateTokenReference, + window_size: u32, + input: &PreflateInput, + ) -> u32 { + let cur_pos = input.pos(); + let cur_max_dist = std::cmp::min(cur_pos, window_size); + + if target_reference.dist() > cur_max_dist { + //println!("dtl {:?} > {}", target_reference, cur_max_dist); + return 0xffff; + } + + let end_pos = + InternalPosition::from_absolute(cur_pos - target_reference.dist(), self.total_shift); + + self.hash_table.match_depth(end_pos, input) } #[allow(dead_code)] - pub fn checksum(&self, checksum: &mut DebugHash) { + fn checksum(&self, checksum: &mut DebugHash) { checksum.update_slice(&self.hash_table.chain_depth); //checksum.update_slice(&self.hash_table.head); //checksum.update_slice(&self.hash_table.prev); @@ -267,149 +326,77 @@ impl HashChain { //checksum.update(self.total_shift); } - fn reshift_if_necessary(&mut self, input: &PreflateInput) { + fn update_hash( + &mut self, + length: u32, + input: &PreflateInput, + ) { + assert!(length <= MAX_UPDATE_HASH_BATCH); + if input.pos() as i32 - self.total_shift >= 0xfe08 { const DELTA: usize = 0x7e00; self.hash_table.reshift::(); - if let Some(x) = self.hash_table_3_len.as_mut() { - x.reshift::(); - } self.total_shift += DELTA as i32; } - } - - #[cfg(bad)] - pub fn validate_hash_chains(&self, input: &PreflateInput) { - /* - let window_start = cmp::min(32768, input.pos()); - - let hash_calc = H::default(); - for i in (1..window_start).rev() - { - hash_calc.append(input.cur_char(-(i as i32)), self.hash_shift); - - if i >= window_start - H::num_hash_bytes() as u32 { - continue; - } - - let pos = InternalPosition::from_absolute(input.pos() - i, self.total_shift); - let h = hash_calc.hash(self.hash_mask); - assert_eq!(self.hash_table.chain_depth_v[pos.to_index()], h); - }*/ + let pos = InternalPosition::from_absolute(input.pos(), self.total_shift); + let chars = input.cur_chars(0); - for i in 0..=self.hash_mask as usize { - let mut h = self.hash_table.head[i]; - while h.is_valid() { - assert_eq!(self.hash_table.chain_depth_v[h.to_index()], i); - h = self.hash_table.prev[h.to_index()]; - } - } + self.hash_table + .update_chain::(chars, pos, length); } +} - /// construct a hash chain from scratch and verify that we match the existing hash chain - /// used for debugging only - #[allow(dead_code)] - #[cfg(bad)] - pub fn verify_hash(&self, dist: Option, input: &PreflateInput) { - let mut hash = H::default(); - let mut start_pos = self.total_shift; - - let mut chains: Vec> = Vec::new(); - chains.resize(self.hash_mask as usize + 1, Vec::new()); - - let mut start_delay = H::num_hash_bytes() - 1; - - let window_size = cmp::min(input.pos(), 0x8000); - - while start_pos - 1 <= input.pos() as i32 { - hash = hash.append( - input.cur_char(start_pos - input.pos() as i32), - self.hash_shift, - ); - - if start_delay > 0 { - start_delay -= 1; - } else { - chains[hash.hash(self.hash_mask) as usize].push( - InternalPosition::from_absolute(start_pos, self.total_shift).sub_offset(2), - ); - } - - start_pos += 1; - } - - let distance = dist.map_or(0, |d| d.dist() as i32); - - println!( - "MATCH t={:?} a={:?} b={:?} d={}", - dist, - &input.cur_chars(-distance)[0..10], - &input.cur_chars(0)[0..10], - input.pos() - self.total_shift as u32 - distance as u32 - ); - - //println!("MATCH pos = {}, total_shift = {}", self.input.pos(), self.total_shift); - let mut mismatch = false; - for i in 0..=self.hash_mask { - let current_chain = &chains[i as usize]; - - let mut hash_table_chain = Vec::new(); - hash_table_chain.reserve(current_chain.len()); - - let mut curr_pos = self.hash_table.head[i as usize]; - while curr_pos.is_valid() { - hash_table_chain.push(curr_pos); - curr_pos = self.hash_table.prev[curr_pos.to_index()]; - } - hash_table_chain.reverse(); - - if hash_table_chain[..] != current_chain[..] { - mismatch = true; - println!( - "HASH {i} MISMATCH a={:?} b={:?}", - hash_table_chain, current_chain - ); - } +/// implementation of the hash chain that uses the libdeflate rotating hash. +/// This consists of two hash tables, one for length 3 and one for length 4. +pub struct HashChainNormalizeLibflate4 { + hash_table: Box>, + hash_table_3: Box>, + total_shift: i32, +} - //assert_eq!(0, chains[i as usize].len()); +impl HashChainNormalizeLibflate4 { + pub fn new() -> Self { + // Important: total_shift starts at -8 since 0 indicates the end of the hash chain + // so this means that all valid values will be >= 8, otherwise the very first hash + // offset would be zero and so it would get missed + HashChainNormalizeLibflate4 { + total_shift: -8, + hash_table: HashTable::default_boxed(), + hash_table_3: HashTable::default_boxed(), } - assert!(!mismatch); } +} - pub fn iterate<'a>( - &'a self, - input: &PreflateInput, - offset: u32, - ) -> impl Iterator + 'a { +impl HashChain for HashChainNormalizeLibflate4 { + fn iterate<'a>(&'a self, input: &PreflateInput, offset: u32) -> impl Iterator + 'a { let ref_pos = InternalPosition::from_absolute(input.pos() + offset, self.total_shift); // if we have a match that needs to be inserted at the head first before // we start walking the chain let mut first_match = None; - let curr_hash; + let mut cur_pos; if offset == 0 { // for libflate, we look once at the 3 length hash table for a match // and then walk the length 4 hash table - if let Some(x) = &self.hash_table_3_len { - let curr_hash = x.calculate_hash(input); - let start_pos = x.get_head(curr_hash); + let curr_hash = self.hash_table_3.hash.get_hash(input.cur_chars(0)); + let start_pos = self.hash_table_3.get_head(curr_hash); - if start_pos.is_valid() { - first_match = Some(ref_pos.dist(start_pos)); - } + if start_pos.is_valid() { + first_match = Some(ref_pos.dist(start_pos)); } - curr_hash = self.hash_table.calculate_hash(input); + let curr_hash = self.hash_table.hash.get_hash(input.cur_chars(0)); + cur_pos = self.hash_table.get_head(curr_hash); } else { assert_eq!(offset, 1); // current hash is the next hash since we are starting at offset 1 - curr_hash = self.hash_table.calculate_hash_next(input); + let curr_hash = self.hash_table.hash.get_hash(input.cur_chars(1)); // we are a lazy match, then we haven't added the last byte to the hash yet // which is a problem if that hash should have been part of this hash chain @@ -418,13 +405,13 @@ impl HashChain { // // In order to fix this, we see if the hashes are the same, and then add // a distance 1 item to the iterator that we return. - let prev_hash = self.hash_table.calculate_hash(input); - if self.hash_table.hash_equal(prev_hash, curr_hash) { + let prev_hash = self.hash_table.hash.get_hash(input.cur_chars(0)); + if prev_hash == curr_hash { first_match = Some(1); } - } - let mut cur_pos = self.hash_table.get_head(curr_hash); + cur_pos = self.hash_table.get_head(curr_hash); + } std::iter::from_fn(move || { if let Some(d) = first_match { @@ -442,33 +429,55 @@ impl HashChain { }) } - pub fn update_hash_with_policy( - &mut self, - length: u32, + fn match_depth( + &self, + target_reference: &PreflateTokenReference, + window_size: u32, input: &PreflateInput, - add_policy: DictionaryAddPolicy, - ) { - match add_policy { - DictionaryAddPolicy::AddAll => { - self.update_hash::(length, input); + ) -> u32 { + let cur_pos = input.pos(); + let cur_max_dist = std::cmp::min(cur_pos, window_size); + + if target_reference.dist() > cur_max_dist { + //println!("dtl {:?} > {}", target_reference, cur_max_dist); + return 0xffff; + } + + let end_pos = + InternalPosition::from_absolute(cur_pos - target_reference.dist(), self.total_shift); + + if target_reference.len() == 3 { + // libdeflate uses the 3 byte hash table only for a single match attempt + // only legal location for the 3 byte hash is at the beginning of the chain, otherwise + // we wouldn't find it using the libdeflate algorithm + if self.hash_table_3.match_depth(end_pos, input) == 0 { + return 0; + } else { + return 0xffff; } - DictionaryAddPolicy::AddFirst(limit) => { - if length > limit.into() { - self.update_hash::(length, input); - } else { - self.update_hash::(length, input); - } + } else { + let mut d = self.hash_table.match_depth(end_pos, input); + if d == 0xffff { + return d; } - DictionaryAddPolicy::AddFirstAndLast(limit) => { - if length > limit.into() { - self.update_hash::(length, input); - } else { - self.update_hash::(length, input); - } + + // if there was a valid 3 byte match, then the hash chain will be one larger + // than the 4 byte hash chain + if self.hash_table_3.head[self.hash_table_3.hash.get_hash(input.cur_chars(0))] + .is_valid() + { + d += 1; } + + return d; } } + #[allow(dead_code)] + fn checksum(&self, checksum: &mut DebugHash) { + checksum.update_slice(&self.hash_table.chain_depth); + } + fn update_hash( &mut self, length: u32, @@ -476,7 +485,14 @@ impl HashChain { ) { assert!(length <= MAX_UPDATE_HASH_BATCH); - self.reshift_if_necessary::(input); + if input.pos() as i32 - self.total_shift >= 0xfe08 { + const DELTA: usize = 0x7e00; + + self.hash_table.reshift::(); + self.hash_table_3.reshift::(); + + self.total_shift += DELTA as i32; + } let pos = InternalPosition::from_absolute(input.pos(), self.total_shift); let chars = input.cur_chars(0); @@ -484,16 +500,84 @@ impl HashChain { self.hash_table .update_chain::(chars, pos, length); - // maintain the extra 3 length chain if we have it - if let Some(x) = self.hash_table_3_len.as_mut() { - x.update_chain::(chars, pos, length); + self.hash_table_3 + .update_chain::(chars, pos, length); + } +} + +/* +/// This hash chain algorithm periodically normalizes the hash table +pub struct HashChainAbs { + + head : [u32; 32768], + + prev : [u32; 32768], + + running_hash : H, +} + +impl HashChain for HashChainAbs { + fn new(_hash_shift: u32, _hash_mask: u16, input: &PreflateInput) -> Self { + + let mut c = HashChainAbs { + head: [0; 32768], + prev: [0; 32768], + running_hash: H::default(), + }; + + // initialize running hash so that it has the first bytes in it to start working + c.running_hash = H::init(input, 0); + + c + } + + fn iterate<'a>(&'a self, input: &PreflateInput, offset: u32) -> impl Iterator + 'a { + // if we have a match that needs to be inserted at the head first before + // we start walking the chain + let mut first_match = None; + + let curr_hash; + + if offset == 0 { + curr_hash = self.hash_table.calculate_hash(input); + } else { + assert_eq!(offset, 1); + + // current hash is the next hash since we are starting at offset 1 + curr_hash = self.hash_table.calculate_hash_next(input); + + // we are a lazy match, then we haven't added the last byte to the hash yet + // which is a problem if that hash should have been part of this hash chain + // (ie the same hash chain) and we have a limited number of enumerations + // throught the hash chain. + // + // In order to fix this, we see if the hashes are the same, and then add + // a distance 1 item to the iterator that we return. + let prev_hash = self.hash_table.calculate_hash(input); + if self.hash_table.hash_equal(prev_hash, curr_hash) { + first_match = Some(1); + } } - //let c = self.checksum_whole_struct(); - //println!("u {} = {}", length, c); + let mut cur_pos = self.hash_table.get_head(curr_hash); + + std::iter::from_fn(move || { + if let Some(d) = first_match { + first_match = None; + Some(d) + } else { + if cur_pos.is_valid() { + let d = ref_pos.dist(cur_pos); + cur_pos = self.hash_table.prev[cur_pos.to_index()]; + Some(d) + } else { + None + } + } + }) } - pub fn match_depth( + fn match_depth( &self, target_reference: &PreflateTokenReference, window_size: u32, @@ -510,32 +594,39 @@ impl HashChain { let end_pos = InternalPosition::from_absolute(cur_pos - target_reference.dist(), self.total_shift); - if let Some(x) = &self.hash_table_3_len { - if target_reference.len() == 3 { - // libdeflate uses the 3 byte hash table only for a single match attempt - // only legal location for the 3 byte hash is at the beginning of the chain, otherwise - // we wouldn't find it using the libdeflate algorithm - if x.match_depth(end_pos, input) == 0 { - return 0; - } else { - return 0xffff; - } - } else { - let mut d = self.hash_table.match_depth(end_pos, input); - if d == 0xffff { - return d; - } + self.hash_table.match_depth(end_pos, input) + } - // if there was a valid 3 byte match, then the hash chain will be one larger - // than the 4 byte hash chain - if x.head[x.calculate_hash(input).hash(x.hash_mask)].is_valid() { - d += 1; - } + #[allow(dead_code)] + fn checksum(&self, checksum: &mut DebugHash) { + checksum.update_slice(&self.hash_table.chain_depth); + //checksum.update_slice(&self.hash_table.head); + //checksum.update_slice(&self.hash_table.prev); + //checksum.update(self.hash_shift); + //checksum.update(self.running_hash.hash(self.hash_mask)); + //checksum.update(self.total_shift); + } - return d; - } + fn update_hash( + &mut self, + length: u32, + input: &PreflateInput, + ) { + assert!(length <= MAX_UPDATE_HASH_BATCH); + + if input.pos() as i32 - self.total_shift >= 0xfe08 { + const DELTA: usize = 0x7e00; + + self.hash_table.reshift::(); + + self.total_shift += DELTA as i32; } - self.hash_table.match_depth(end_pos, input) + let pos = InternalPosition::from_absolute(input.pos(), self.total_shift); + let chars = input.cur_chars(0); + + self.hash_table + .update_chain::(chars, pos, length); } } +*/ diff --git a/src/predictor_state.rs b/src/hash_chain_holder.rs similarity index 50% rename from src/predictor_state.rs rename to src/hash_chain_holder.rs index b2ef2f6..70c81e0 100644 --- a/src/predictor_state.rs +++ b/src/hash_chain_holder.rs @@ -5,14 +5,17 @@ *--------------------------------------------------------------------------------------------*/ use crate::bit_helper::DebugHash; -use crate::hash_algorithm::RotatingHashTrait; +use crate::hash_algorithm::{ + HashAlgorithm, HashImplementation, LibdeflateRotatingHash4, MiniZHash, RandomVectorHash, + ZlibNGHash, ZlibRotatingHash, +}; use crate::hash_chain::{DictionaryAddPolicy, HashChain, MAX_UPDATE_HASH_BATCH}; use crate::preflate_constants::{MAX_MATCH, MIN_LOOKAHEAD, MIN_MATCH}; use crate::preflate_input::PreflateInput; -use crate::preflate_parameter_estimator::{PreflateParameters, PreflateStrategy}; +use crate::preflate_parameter_estimator::PreflateStrategy; use crate::preflate_token::PreflateTokenReference; +use crate::token_predictor::TokenPredictorParameters; use std::cmp; -use std::sync::atomic; #[derive(Debug, Copy, Clone)] pub enum MatchResult { @@ -23,106 +26,158 @@ pub enum MatchResult { MaxChainExceeded(u32), } -#[derive(Default)] -pub struct PreflateRematchInfo { - pub requested_match_depth: u32, - pub condensed_hops: u32, -} - -pub struct PredictorState<'a, H: RotatingHashTrait> { - hash: HashChain, - input: PreflateInput<'a>, - params: PreflateParameters, +struct HashChainHolder { + hash: H::HashChainType, + params: TokenPredictorParameters, window_bytes: u32, - last_chain: atomic::AtomicU32, } -impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { - pub fn new(uncompressed: &'a [u8], params: &PreflateParameters) -> Self { - let input = PreflateInput::new(uncompressed); - - Self { - hash: HashChain::new(params.hash_shift, params.hash_mask, &input), - window_bytes: 1 << params.window_bits, - params: *params, - input, - last_chain: atomic::AtomicU32::new(0), +/// Factory function to create a new HashChainHolder based on the parameters and returns +/// a boxed trait object. The reason for this is that this lets the compiler optimize the +pub fn new_hash_chain_holder(params: &TokenPredictorParameters) -> Box { + let predictor_state: Box; + match params.hash_algorithm { + HashAlgorithm::Zlib { + hash_mask, + hash_shift, + } => { + predictor_state = Box::new(HashChainHolder::new( + params, + ZlibRotatingHash { + hash_mask, + hash_shift, + }, + )) + } + HashAlgorithm::MiniZFast => { + predictor_state = Box::new(HashChainHolder::new(params, MiniZHash {})) + } + HashAlgorithm::Libdeflate4 => { + predictor_state = Box::new(HashChainHolder::new(params, LibdeflateRotatingHash4 {})) + } + HashAlgorithm::ZlibNG => { + predictor_state = Box::new(HashChainHolder::new(params, ZlibNGHash {})) + } + HashAlgorithm::RandomVector => { + predictor_state = Box::new(HashChainHolder::new(params, RandomVectorHash {})) } } + predictor_state +} - #[allow(dead_code)] - pub fn checksum(&self, checksum: &mut DebugHash) { - self.hash.checksum(checksum); - } +/// trait that is not dependent on the HashImplementation so it can +/// be used in a boxed type by the TokenPredictor +pub trait HashChainHolderTrait { + /// updates the hash dictionary for a given length of matches. + /// + /// If this is a literal, then the update policy is to add all the bytes to the dictionary. + fn update_hash(&mut self, length: u32, input: &mut PreflateInput, is_literal: bool); + + /// updates the hash dictionary for a given length of matches, and also updates the depth + /// map of the hash chain. + /// + /// If this is a literal, then the update policy is to add all the bytes to the dictionary. + fn update_hash_with_depth(&mut self, length: u32, input: &mut PreflateInput, is_literal: bool); + + /// searches the hash chain for a given match, returns the longest result found if any + /// + /// prev_len is the length of the previous match. We won't match anything shorter than that. + /// offset is the offset from the current position in the input (can be 0 for current or 1 for lazy matches) + /// max_depth is the maximum number of hops we will take in the hash chain + fn match_token( + &self, + prev_len: u32, + offset: u32, + max_depth: u32, + input: &PreflateInput, + ) -> MatchResult; - pub fn update_hash_with_policy(&mut self, length: u32, add_policy: DictionaryAddPolicy) { - self.hash - .update_hash_with_policy::(length, &self.input, add_policy); - self.input.advance(length); - } + /// Tries to find the match by continuing on the hash chain, returns how many hops we went + /// or none if it wasn't found + fn calculate_hops( + &self, + target_reference: &PreflateTokenReference, + input: &PreflateInput, + ) -> anyhow::Result; + + /// Does the inverse of calculate_hops, where we start from the predicted token and + /// get the new distance based on the number of hops + fn hop_match(&self, len: u32, hops: u32, input: &PreflateInput) -> anyhow::Result; + + /// Returns the depth of the match, which refers to the number of hops in the hashtable + fn match_depth( + &self, + token: PreflateTokenReference, + window_size: u32, + input: &PreflateInput, + ) -> u32; - pub fn update_hash_batch(&mut self, mut length: u32) { + /// debugging function to verify that the hash chain is correct + fn verify_hash(&self, _dist: Option); + + fn checksum(&self, checksum: &mut DebugHash); +} + +impl HashChainHolderTrait for HashChainHolder { + fn update_hash(&mut self, mut length: u32, input: &mut PreflateInput, is_literal: bool) { while length > 0 { let batch_len = cmp::min(length, MAX_UPDATE_HASH_BATCH); self.hash.update_hash_with_policy::( batch_len, - &self.input, - DictionaryAddPolicy::AddAll, + input, + if is_literal { + DictionaryAddPolicy::AddAll + } else { + self.params.add_policy + }, ); - self.input.advance(batch_len); + input.advance(batch_len); length -= batch_len; } } - pub fn current_input_pos(&self) -> u32 { - self.input.pos() - } - - pub fn input_cursor(&self) -> &[u8] { - self.input.cur_chars(0) - } - - pub fn input_cursor_offset(&self, offset: i32) -> &[u8] { - self.input.cur_chars(offset) - } - - pub fn window_size(&self) -> u32 { - self.window_bytes - } - - fn total_input_size(&self) -> u32 { - self.input.size() - } - - pub fn available_input_size(&self) -> u32 { - self.input.remaining() - } - - fn prefix_compare(s1: &[u8], s2: &[u8], best_len: u32, max_len: u32) -> u32 { - assert!(max_len >= 3 && s1.len() >= max_len as usize && s2.len() >= max_len as usize); - - if s1[best_len as usize] != s2[best_len as usize] { - return 0; - } - if s1[0] != s2[0] || s1[1] != s2[1] || s1[2] != s2[2] { - return 0; - } + fn update_hash_with_depth( + &mut self, + mut length: u32, + input: &mut PreflateInput, + is_literal: bool, + ) { + while length > 0 { + let batch_len = cmp::min(length, MAX_UPDATE_HASH_BATCH); - let mut match_len = 3; // Initialize with the length of the fixed prefix - for i in 3..max_len { - if s1[i as usize] != s2[i as usize] { - break; - } - match_len = i + 1; + self.hash.update_hash_with_policy::( + batch_len, + input, + if is_literal { + DictionaryAddPolicy::AddAll + } else { + self.params.add_policy + }, + ); + input.advance(batch_len); + length -= batch_len; } + } - match_len + fn match_depth( + &self, + token: PreflateTokenReference, + window_size: u32, + input: &PreflateInput, + ) -> u32 { + self.hash.match_depth(&token, window_size, input) } - pub fn match_token(&self, prev_len: u32, offset: u32, max_depth: u32) -> MatchResult { - let start_pos = self.current_input_pos() + offset; - let max_len = std::cmp::min(self.total_input_size() - start_pos, MAX_MATCH); + fn match_token( + &self, + prev_len: u32, + offset: u32, + max_depth: u32, + input: &PreflateInput, + ) -> MatchResult { + let start_pos = input.pos() + offset; + let max_len = std::cmp::min(input.size() - start_pos, MAX_MATCH); if max_len < std::cmp::max( prev_len + 1, @@ -142,7 +197,7 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { let cur_max_dist_hop0; let cur_max_dist_hop1_plus; if self.params.very_far_matches_detected { - cur_max_dist_hop0 = cmp::min(max_dist_to_start, self.window_size()); + cur_max_dist_hop0 = cmp::min(max_dist_to_start, self.window_bytes); cur_max_dist_hop1_plus = cur_max_dist_hop0; } else { match self.params.strategy { @@ -154,7 +209,7 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { cur_max_dist_hop1_plus = 1; } _ => { - let max_dist: u32 = self.window_size() - MIN_LOOKAHEAD + 1; + let max_dist: u32 = self.window_bytes - MIN_LOOKAHEAD + 1; cur_max_dist_hop0 = cmp::min(max_dist_to_start, max_dist); cur_max_dist_hop1_plus = cmp::min(max_dist_to_start, max_dist - 1); } @@ -164,13 +219,13 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { let nice_len = std::cmp::min(self.params.nice_length, max_len); let mut max_chain = max_depth; - let input = self.input.cur_chars(offset as i32); + let input_chars = input.cur_chars(offset as i32); let mut best_len = prev_len; let mut best_match: Option = None; let mut num_chain_matches = 0; let mut first = true; - for dist in self.hash.iterate(&self.input, offset) { + for dist in self.hash.iterate(input, offset) { // first entry gets a special treatment to make sure it doesn't exceed // the limits we calculated for the first hop if first { @@ -184,9 +239,9 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { } } - let match_start = self.input.cur_chars(offset as i32 - dist as i32); + let match_start = input.cur_chars(offset as i32 - dist as i32); - let match_length = Self::prefix_compare(match_start, input, best_len, max_len); + let match_length = Self::prefix_compare(match_start, input_chars, best_len, max_len); if match_length > best_len { let r = PreflateTokenReference::new(match_length, dist, false); @@ -203,8 +258,6 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { if max_chain == 0 { if let Some(r) = best_match { - self.last_chain - .store(num_chain_matches, atomic::Ordering::Relaxed); return MatchResult::Success(r); } else { return MatchResult::MaxChainExceeded(max_depth); @@ -221,8 +274,12 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { /// Tries to find the match by continuing on the hash chain, returns how many hops we went /// or none if it wasn't found - pub fn calculate_hops(&self, target_reference: &PreflateTokenReference) -> anyhow::Result { - let max_len = std::cmp::min(self.available_input_size(), MAX_MATCH); + fn calculate_hops( + &self, + target_reference: &PreflateTokenReference, + input: &PreflateInput, + ) -> anyhow::Result { + let max_len = std::cmp::min(input.remaining(), MAX_MATCH); if max_len < target_reference.len() { return Err(anyhow::anyhow!("max_len < target_reference.len()")); @@ -233,16 +290,16 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { let best_len = target_reference.len(); let mut hops = 0; - let cur_max_dist = std::cmp::min(self.current_input_pos(), self.window_size()); + let cur_max_dist = std::cmp::min(input.pos(), self.window_bytes); - for dist in self.hash.iterate(&self.input, 0) { + for dist in self.hash.iterate(input, 0) { if dist > cur_max_dist { break; } - let match_pos = self.input_cursor_offset(-(dist as i32)); + let match_pos = input.cur_chars(-(dist as i32)); let match_length = - Self::prefix_compare(match_pos, self.input_cursor(), best_len - 1, best_len); + Self::prefix_compare(match_pos, input.cur_chars(0), best_len - 1, best_len); if match_length >= best_len { hops += 1; @@ -268,23 +325,23 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { /// Does the inverse of calculate_hops, where we start from the predicted token and /// get the new distance based on the number of hops - pub fn hop_match(&self, len: u32, hops: u32) -> anyhow::Result { - let max_len = std::cmp::min(self.available_input_size(), MAX_MATCH); + fn hop_match(&self, len: u32, hops: u32, input: &PreflateInput) -> anyhow::Result { + let max_len = std::cmp::min(input.remaining(), MAX_MATCH); if max_len < len { return Err(anyhow::anyhow!("not enough data left to match")); } - let cur_max_dist = std::cmp::min(self.current_input_pos(), self.window_size()); + let cur_max_dist = std::cmp::min(input.pos(), self.window_bytes); let mut current_hop = 0; - for dist in self.hash.iterate(&self.input, 0) { + for dist in self.hash.iterate(input, 0) { if dist > cur_max_dist { break; } let match_length = Self::prefix_compare( - self.input_cursor_offset(-(dist as i32)), - self.input_cursor(), + input.cur_chars(-(dist as i32)), + input.cur_chars(0), len - 1, len, ); @@ -302,7 +359,43 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> { /// debugging function to verify that the hash chain is correct #[allow(dead_code)] - pub fn verify_hash(&self, _dist: Option) { + fn verify_hash(&self, _dist: Option) { //self.hash.verify_hash(dist, &self.input); } + + #[allow(dead_code)] + fn checksum(&self, checksum: &mut DebugHash) { + self.hash.checksum(checksum); + } +} + +impl HashChainHolder { + pub fn new(params: &TokenPredictorParameters, hash: H) -> Self { + Self { + hash: hash.new_hash_chain(), + window_bytes: 1 << params.window_bits, + params: *params, + } + } + + fn prefix_compare(s1: &[u8], s2: &[u8], best_len: u32, max_len: u32) -> u32 { + assert!(max_len >= 3 && s1.len() >= max_len as usize && s2.len() >= max_len as usize); + + if s1[best_len as usize] != s2[best_len as usize] { + return 0; + } + if s1[0] != s2[0] || s1[1] != s2[1] || s1[2] != s2[2] { + return 0; + } + + let mut match_len = 3; // Initialize with the length of the fixed prefix + for i in 3..max_len { + if s1[i as usize] != s2[i as usize] { + break; + } + match_len = i + 1; + } + + match_len + } } diff --git a/src/lib.rs b/src/lib.rs index b37b822..adb412d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,10 +13,10 @@ mod deflate_reader; mod deflate_writer; mod hash_algorithm; mod hash_chain; +mod hash_chain_holder; mod huffman_calc; mod huffman_encoding; mod huffman_helper; -mod predictor_state; mod preflate_constants; pub mod preflate_error; mod preflate_input; diff --git a/src/preflate_input.rs b/src/preflate_input.rs index 9284819..604bd29 100644 --- a/src/preflate_input.rs +++ b/src/preflate_input.rs @@ -3,6 +3,8 @@ * Licensed under the Apache License, Version 2.0. See LICENSE.txt in the project root for license information. * This software incorporates material from third parties. See NOTICE.txt for details. *--------------------------------------------------------------------------------------------*/ + +#[derive(Clone)] pub struct PreflateInput<'a> { data: &'a [u8], pos: i32, diff --git a/src/preflate_parameter_estimator.rs b/src/preflate_parameter_estimator.rs index f59dc9d..14520b5 100644 --- a/src/preflate_parameter_estimator.rs +++ b/src/preflate_parameter_estimator.rs @@ -15,6 +15,7 @@ use crate::{ preflate_stream_info::{extract_preflate_info, PreflateStreamInfo}, preflate_token::PreflateTokenBlock, statistical_codec::{PredictionDecoder, PredictionEncoder}, + token_predictor::TokenPredictorParameters, }; #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -34,45 +35,38 @@ pub enum PreflateHuffStrategy { #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct PreflateParameters { - pub strategy: PreflateStrategy, pub huff_strategy: PreflateHuffStrategy, - pub zlib_compatible: bool, - pub window_bits: u32, - pub hash_shift: u32, - pub hash_mask: u16, - pub max_token_count: u16, - pub max_dist_3_matches: u16, - - /// if there are matches that have a distance larger than window_size - MAX_MATCH. - /// Zlib does not allow these. - pub very_far_matches_detected: bool, - - /// Zlib does not match to first byte of a file in order to reserve 0 for the end of chain - pub matches_to_start_detected: bool, - - pub good_length: u32, - pub max_lazy: u32, - pub nice_length: u32, - pub max_chain: u32, - pub hash_algorithm: HashAlgorithm, - pub min_len: u32, - - /// if something, then we use the "fast" compressor, which only adds smaller substrings - /// to the dictionary - pub add_policy: DictionaryAddPolicy, + + pub predictor: TokenPredictorParameters, } const FILE_VERSION: u16 = 1; +const HASH_ALGORITHM_ZLIB: u16 = 0; +const HASH_ALGORITHM_MINIZ_FAST: u16 = 1; +const HASH_ALGORITHM_LIBDEFLATE4: u16 = 2; +const HASH_ALGORITHM_ZLIBNG: u16 = 3; +const HASH_ALGORITHM_RANDOMVECTOR: u16 = 4; + impl PreflateParameters { - pub fn read(decoder: &mut D) -> Result { + pub fn read(decoder: &mut impl PredictionDecoder) -> Result { assert_eq!(FILE_VERSION, decoder.decode_value(8)); let strategy = decoder.decode_value(4); let huff_strategy = decoder.decode_value(4); let zlib_compatible = decoder.decode_value(1) != 0; let window_bits = decoder.decode_value(8); - let hash_shift = decoder.decode_value(8); - let hash_mask = decoder.decode_value(16); + let hash_algorithm = decoder.decode_value(4); + + let hash_shift; + let hash_mask; + if hash_algorithm == HASH_ALGORITHM_ZLIB { + hash_shift = decoder.decode_value(8); + hash_mask = decoder.decode_value(16); + } else { + hash_shift = 0; + hash_mask = 0; + } + let max_token_count = decoder.decode_value(16); let max_dist_3_matches = decoder.decode_value(16); let very_far_matches_detected = decoder.decode_value(1) != 0; @@ -81,7 +75,6 @@ impl PreflateParameters { let max_lazy = decoder.decode_value(16); let nice_length = decoder.decode_value(16); let max_chain = decoder.decode_value(16); - let hash_algorithm = decoder.decode_value(4); let min_len = decoder.decode_value(16); let add_policy = match decoder.decode_value(2) { @@ -100,18 +93,37 @@ impl PreflateParameters { const HUFF_STRATEGY_MIXED: u16 = PreflateHuffStrategy::Mixed as u16; const HUFF_STRATEGY_STATIC: u16 = PreflateHuffStrategy::Static as u16; - const HASH_ALGORITHM_ZLIB: u16 = HashAlgorithm::Zlib as u16; - const HASH_ALGORITHM_MINIZ_FAST: u16 = HashAlgorithm::MiniZFast as u16; - const HASH_ALGORITHM_LIBDEFLATE4: u16 = HashAlgorithm::Libdeflate4 as u16; - const HASH_ALGORITHM_ZLIBNG: u16 = HashAlgorithm::ZlibNG as u16; - Ok(PreflateParameters { - strategy: match strategy { - STRATEGY_DEFAULT => PreflateStrategy::Default, - STRATEGY_RLE_ONLY => PreflateStrategy::RleOnly, - STRATEGY_HUFF_ONLY => PreflateStrategy::HuffOnly, - STRATEGY_STORE => PreflateStrategy::Store, - _ => panic!("invalid strategy"), + predictor: TokenPredictorParameters { + strategy: match strategy { + STRATEGY_DEFAULT => PreflateStrategy::Default, + STRATEGY_RLE_ONLY => PreflateStrategy::RleOnly, + STRATEGY_HUFF_ONLY => PreflateStrategy::HuffOnly, + STRATEGY_STORE => PreflateStrategy::Store, + _ => panic!("invalid strategy"), + }, + window_bits: window_bits.into(), + very_far_matches_detected, + matches_to_start_detected, + nice_length: nice_length.into(), + add_policy, + max_token_count, + zlib_compatible, + max_dist_3_matches, + good_length: good_length.into(), + max_lazy: max_lazy.into(), + max_chain: max_chain.into(), + min_len: min_len.into(), + hash_algorithm: match hash_algorithm { + HASH_ALGORITHM_ZLIB => HashAlgorithm::Zlib { + hash_shift: hash_shift.into(), + hash_mask, + }, + HASH_ALGORITHM_MINIZ_FAST => HashAlgorithm::MiniZFast, + HASH_ALGORITHM_LIBDEFLATE4 => HashAlgorithm::Libdeflate4, + HASH_ALGORITHM_ZLIBNG => HashAlgorithm::ZlibNG, + _ => panic!("invalid hash algorithm"), + }, }, huff_strategy: match huff_strategy { HUFF_STRATEGY_DYNAMIC => PreflateHuffStrategy::Dynamic, @@ -119,50 +131,56 @@ impl PreflateParameters { HUFF_STRATEGY_STATIC => PreflateHuffStrategy::Static, _ => panic!("invalid huff strategy"), }, - zlib_compatible, - window_bits: window_bits.into(), - hash_shift: hash_shift.into(), - hash_mask, - max_token_count, - max_dist_3_matches, - very_far_matches_detected, - matches_to_start_detected, - good_length: good_length.into(), - max_lazy: max_lazy.into(), - nice_length: nice_length.into(), - max_chain: max_chain.into(), - min_len: min_len.into(), - add_policy, - hash_algorithm: match hash_algorithm { - HASH_ALGORITHM_ZLIB => HashAlgorithm::Zlib, - HASH_ALGORITHM_MINIZ_FAST => HashAlgorithm::MiniZFast, - HASH_ALGORITHM_LIBDEFLATE4 => HashAlgorithm::Libdeflate4, - HASH_ALGORITHM_ZLIBNG => HashAlgorithm::ZlibNG, - _ => panic!("invalid hash algorithm"), - }, }) } pub fn write(&self, encoder: &mut E) { encoder.encode_value(FILE_VERSION, 8); - encoder.encode_value(self.strategy as u16, 4); + encoder.encode_value(self.predictor.strategy as u16, 4); encoder.encode_value(self.huff_strategy as u16, 4); - encoder.encode_value(u16::try_from(self.zlib_compatible).unwrap(), 1); - encoder.encode_value(u16::try_from(self.window_bits).unwrap(), 8); - encoder.encode_value(u16::try_from(self.hash_shift).unwrap(), 8); - encoder.encode_value(self.hash_mask, 16); - encoder.encode_value(self.max_token_count, 16); - encoder.encode_value(self.max_dist_3_matches, 16); - encoder.encode_value(u16::try_from(self.very_far_matches_detected).unwrap(), 1); - encoder.encode_value(u16::try_from(self.matches_to_start_detected).unwrap(), 1); - encoder.encode_value(u16::try_from(self.good_length).unwrap(), 16); - encoder.encode_value(u16::try_from(self.max_lazy).unwrap(), 16); - encoder.encode_value(u16::try_from(self.nice_length).unwrap(), 16); - encoder.encode_value(u16::try_from(self.max_chain).unwrap(), 16); - encoder.encode_value(self.hash_algorithm as u16, 4); - encoder.encode_value(u16::try_from(self.min_len).unwrap(), 16); - - match self.add_policy { + encoder.encode_value(u16::try_from(self.predictor.zlib_compatible).unwrap(), 1); + encoder.encode_value(u16::try_from(self.predictor.window_bits).unwrap(), 8); + + match self.predictor.hash_algorithm { + HashAlgorithm::Zlib { + hash_shift, + hash_mask, + } => { + encoder.encode_value(HASH_ALGORITHM_ZLIB, 4); + encoder.encode_value(u16::try_from(hash_shift).unwrap(), 8); + encoder.encode_value(hash_mask, 16); + } + HashAlgorithm::MiniZFast => { + encoder.encode_value(HASH_ALGORITHM_MINIZ_FAST, 4); + } + HashAlgorithm::Libdeflate4 => { + encoder.encode_value(HASH_ALGORITHM_LIBDEFLATE4, 4); + } + HashAlgorithm::ZlibNG => { + encoder.encode_value(HASH_ALGORITHM_ZLIBNG, 4); + } + HashAlgorithm::RandomVector => { + encoder.encode_value(HASH_ALGORITHM_RANDOMVECTOR, 4); + } + } + + encoder.encode_value(self.predictor.max_token_count, 16); + encoder.encode_value(self.predictor.max_dist_3_matches, 16); + encoder.encode_value( + u16::try_from(self.predictor.very_far_matches_detected).unwrap(), + 1, + ); + encoder.encode_value( + u16::try_from(self.predictor.matches_to_start_detected).unwrap(), + 1, + ); + encoder.encode_value(u16::try_from(self.predictor.good_length).unwrap(), 16); + encoder.encode_value(u16::try_from(self.predictor.max_lazy).unwrap(), 16); + encoder.encode_value(u16::try_from(self.predictor.nice_length).unwrap(), 16); + encoder.encode_value(u16::try_from(self.predictor.max_chain).unwrap(), 16); + encoder.encode_value(u16::try_from(self.predictor.min_len).unwrap(), 16); + + match self.predictor.add_policy { DictionaryAddPolicy::AddAll => encoder.encode_value(0, 2), DictionaryAddPolicy::AddFirst(v) => { encoder.encode_value(1, 2); @@ -233,27 +251,24 @@ pub fn estimate_preflate_parameters( let cl = estimate_preflate_comp_level(window_bits, mem_level, unpacked_output, blocks)?; - let hash_shift = cl.hash_shift; - let hash_mask = cl.hash_mask; - Ok(PreflateParameters { - window_bits, - hash_shift, - hash_mask, - max_token_count, - strategy: estimate_preflate_strategy(&info), + predictor: TokenPredictorParameters { + window_bits, + very_far_matches_detected: cl.very_far_matches_detected, + matches_to_start_detected: cl.matches_to_start_detected, + strategy: estimate_preflate_strategy(&info), + nice_length: cl.nice_length, + add_policy: cl.add_policy, + max_token_count, + zlib_compatible: cl.zlib_compatible, + max_dist_3_matches: cl.max_dist_3_matches, + good_length: cl.good_length, + max_lazy: cl.max_lazy, + max_chain: cl.max_chain, + min_len: cl.min_len, + hash_algorithm: cl.hash_algorithm, + }, huff_strategy: estimate_preflate_huff_strategy(&info), - zlib_compatible: cl.zlib_compatible, - max_dist_3_matches: cl.max_dist_3_matches, - very_far_matches_detected: cl.very_far_matches_detected, - matches_to_start_detected: cl.matches_to_start_detected, - good_length: cl.good_length, - max_lazy: cl.max_lazy, - nice_length: cl.nice_length, - max_chain: cl.max_chain, - hash_algorithm: cl.hash_algorithm, - min_len: cl.min_len, - add_policy: cl.add_policy, }) } @@ -270,37 +285,37 @@ fn verify_zlib_recognition() { let params = estimate_preflate_parameters(&contents.plain_text, &contents.blocks).unwrap(); - assert_eq!(params.zlib_compatible, true); + assert_eq!(params.predictor.zlib_compatible, true); if i == 0 { - assert_eq!(params.strategy, PreflateStrategy::Store); + assert_eq!(params.predictor.strategy, PreflateStrategy::Store); } else if i >= 1 && i < 4 { let config = &FAST_PREFLATE_PARSER_SETTINGS[i as usize - 1]; assert!( - params.max_chain <= config.max_chain, + params.predictor.max_chain <= config.max_chain, "max_chain mismatch {} should be <= {}", - params.max_chain, + params.predictor.max_chain, config.max_chain ); - assert_eq!(params.good_length, config.good_length); + assert_eq!(params.predictor.good_length, config.good_length); assert_eq!( - params.add_policy, + params.predictor.add_policy, DictionaryAddPolicy::AddFirst(config.max_lazy as u16) ); - assert_eq!(params.nice_length, config.nice_length); - assert_eq!(params.strategy, PreflateStrategy::Default); + assert_eq!(params.predictor.nice_length, config.nice_length); + assert_eq!(params.predictor.strategy, PreflateStrategy::Default); } else if i >= 4 { let config = &SLOW_PREFLATE_PARSER_SETTINGS[i as usize - 4]; assert!( - params.max_chain <= config.max_chain, + params.predictor.max_chain <= config.max_chain, "max_chain mismatch {} should be <= {}", - params.max_chain, + params.predictor.max_chain, config.max_chain ); - assert_eq!(params.good_length, config.good_length); - assert_eq!(params.max_lazy, config.max_lazy); - assert_eq!(params.nice_length, config.nice_length); - assert_eq!(params.add_policy, DictionaryAddPolicy::AddAll); - assert_eq!(params.strategy, PreflateStrategy::Default); + assert_eq!(params.predictor.good_length, config.good_length); + assert_eq!(params.predictor.max_lazy, config.max_lazy); + assert_eq!(params.predictor.nice_length, config.nice_length); + assert_eq!(params.predictor.add_policy, DictionaryAddPolicy::AddAll); + assert_eq!(params.predictor.strategy, PreflateStrategy::Default); } } } @@ -316,7 +331,7 @@ fn verify_miniz_recognition() { let params = estimate_preflate_parameters(&contents.plain_text, &contents.blocks).unwrap(); if i == 0 { - assert_eq!(params.strategy, PreflateStrategy::Store); + assert_eq!(params.predictor.strategy, PreflateStrategy::Store); } else if i == 1 { println!("{:?}", params); } else { diff --git a/src/process.rs b/src/process.rs index 68eccdb..e77ad99 100644 --- a/src/process.rs +++ b/src/process.rs @@ -9,10 +9,6 @@ use std::io::Cursor; use crate::{ deflate_reader::DeflateReader, deflate_writer::DeflateWriter, - hash_algorithm::{ - HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, RotatingHashTrait, ZlibNGHash, - ZlibRotatingHash, - }, huffman_calc::HufftreeBitCalc, preflate_error::PreflateError, preflate_parameter_estimator::PreflateParameters, @@ -31,28 +27,11 @@ pub fn encode_mispredictions( params: &PreflateParameters, encoder: &mut impl PredictionEncoder, ) -> Result<(), PreflateError> { - match params.hash_algorithm { - HashAlgorithm::MiniZFast => predict_blocks( - &deflate.blocks, - TokenPredictor::::new(&deflate.plain_text, params), - encoder, - )?, - HashAlgorithm::Zlib => predict_blocks( - &deflate.blocks, - TokenPredictor::::new(&deflate.plain_text, params), - encoder, - )?, - HashAlgorithm::Libdeflate4 => predict_blocks( - &deflate.blocks, - TokenPredictor::::new(&deflate.plain_text, params), - encoder, - )?, - HashAlgorithm::ZlibNG => predict_blocks( - &deflate.blocks, - TokenPredictor::::new(&deflate.plain_text, params), - encoder, - )?, - } + predict_blocks( + &deflate.blocks, + TokenPredictor::new(&deflate.plain_text, ¶ms.predictor), + encoder, + )?; encoder.encode_misprediction(CodecMisprediction::EOFMisprediction, false); @@ -99,9 +78,9 @@ pub fn parse_deflate( }) } -fn predict_blocks( +fn predict_blocks( blocks: &[PreflateTokenBlock], - mut token_predictor_in: TokenPredictor, + mut token_predictor_in: TokenPredictor, encoder: &mut impl PredictionEncoder, ) -> Result<(), PreflateError> { for i in 0..blocks.len() { @@ -134,28 +113,11 @@ pub fn decode_mispredictions( ) -> Result<(Vec, Vec), PreflateError> { let mut deflate_writer: DeflateWriter<'_> = DeflateWriter::new(plain_text); - let output_blocks = match params.hash_algorithm { - HashAlgorithm::MiniZFast => recreate_blocks( - TokenPredictor::::new(plain_text, params), - decoder, - &mut deflate_writer, - )?, - HashAlgorithm::Zlib => recreate_blocks( - TokenPredictor::::new(plain_text, params), - decoder, - &mut deflate_writer, - )?, - HashAlgorithm::Libdeflate4 => recreate_blocks( - TokenPredictor::::new(plain_text, params), - decoder, - &mut deflate_writer, - )?, - HashAlgorithm::ZlibNG => recreate_blocks( - TokenPredictor::::new(plain_text, params), - decoder, - &mut deflate_writer, - )?, - }; + let output_blocks = recreate_blocks( + TokenPredictor::new(plain_text, ¶ms.predictor), + decoder, + &mut deflate_writer, + )?; // flush the last byte, which may be incomplete and normally // padded with zeros, but maybe not @@ -165,8 +127,8 @@ pub fn decode_mispredictions( Ok((deflate_writer.detach_output(), output_blocks)) } -fn recreate_blocks( - mut token_predictor: TokenPredictor, +fn recreate_blocks( + mut token_predictor: TokenPredictor, decoder: &mut D, deflate_writer: &mut DeflateWriter, ) -> Result, PreflateError> { @@ -404,7 +366,65 @@ fn verify_longmatch() { #[test] #[ignore = "doesn't work yet due to excessive hash chain length"] fn test_treepngdeflate() { - do_analyze(None, &read_file("treepng.deflate"), true); + use crate::hash_algorithm::{HashImplementation, RandomVectorHash}; + use crate::hash_chain::HashChain; + + let compressed_data: &[u8] = &read_file("treepng.deflate"); + + let contents = parse_deflate(compressed_data, 1).unwrap(); + + let mut input = crate::preflate_input::PreflateInput::new(&contents.plain_text); + let mut chain = RandomVectorHash::new_hash_chain(RandomVectorHash {}); + + let r = RandomVectorHash::default(); + + let h = r.get_hash(&contents.plain_text); + + println!("hashx: {:?}", h); + + let mut maxdepth = 0; + + for b in &contents.blocks { + for i in 0..b.tokens.len() { + let t = &b.tokens[i]; + match t { + crate::preflate_token::PreflateToken::Literal => { + chain.update_hash_with_policy::( + 1, + &input, + crate::hash_chain::DictionaryAddPolicy::AddAll, + ); + input.advance(1); + } + crate::preflate_token::PreflateToken::Reference(r) => { + let depth = chain.match_depth(&r, 32768, &input); + if depth > 5 { + println!("reference: {:?}", r); + + println!("back: {:?}", &input.cur_chars(-82)[0..82]); + + println!( + "depth: {}, {}, {:?}", + depth, + input.pos(), + &input.cur_chars(0)[0..16] + ); + chain.match_depth(&r, 32768, &input); + return; + } + + chain.update_hash_with_policy::( + r.len(), + &input, + crate::hash_chain::DictionaryAddPolicy::AddAll, + ); + input.advance(r.len()); + } + } + } + } + + //do_analyze(None, &read_file("treepng.deflate"), true); } #[test] @@ -453,6 +473,7 @@ fn verify_zlib_compressed() { #[test] fn verify_zlib_compressed_perfect() { use crate::{ + hash_algorithm::HashAlgorithm, preflate_parameter_estimator::PreflateHuffStrategy, preflate_parameter_estimator::PreflateStrategy, preflate_parse_config::{FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS}, @@ -482,23 +503,26 @@ fn verify_zlib_compressed_perfect() { } let params = PreflateParameters { - strategy: PreflateStrategy::Default, huff_strategy: PreflateHuffStrategy::Dynamic, - zlib_compatible: true, - window_bits: 15, - hash_shift: 5, - hash_mask: 0x7fff, - max_token_count: 16383, - max_dist_3_matches, - very_far_matches_detected: false, - matches_to_start_detected: false, - good_length: config.good_length, - max_lazy: max_lazy, - nice_length: config.nice_length, - max_chain: config.max_chain, - hash_algorithm: HashAlgorithm::Zlib, - min_len: 3, - add_policy, + predictor: crate::token_predictor::TokenPredictorParameters { + strategy: PreflateStrategy::Default, + window_bits: 15, + very_far_matches_detected: false, + matches_to_start_detected: false, + nice_length: config.nice_length, + add_policy, + max_token_count: 16383, + zlib_compatible: true, + max_dist_3_matches, + good_length: config.good_length, + max_lazy: max_lazy, + max_chain: config.max_chain, + min_len: 3, + hash_algorithm: HashAlgorithm::Zlib { + hash_shift: 5, + hash_mask: 0x7fff, + }, + }, }; let contents = parse_deflate(&v, 1).unwrap(); @@ -516,6 +540,7 @@ fn verify_zlib_compressed_perfect() { fn verify_miniz1_compressed_perfect() { use crate::{ cabac_codec::{PredictionDecoderCabac, PredictionEncoderCabac}, + hash_algorithm::HashAlgorithm, preflate_parameter_estimator::{PreflateHuffStrategy, PreflateStrategy}, }; use cabac::vp8::{VP8Reader, VP8Writer}; @@ -528,23 +553,23 @@ fn verify_miniz1_compressed_perfect() { let mut cabac_encoder = PredictionEncoderCabac::new(VP8Writer::new(&mut buffer).unwrap()); let params = PreflateParameters { - strategy: PreflateStrategy::Default, + predictor: crate::token_predictor::TokenPredictorParameters { + strategy: PreflateStrategy::Default, + window_bits: 15, + very_far_matches_detected: false, + matches_to_start_detected: false, + nice_length: 258, + add_policy: crate::hash_chain::DictionaryAddPolicy::AddFirst(0), + max_token_count: 16383, + zlib_compatible: true, + max_dist_3_matches: 8192, + good_length: 258, + max_lazy: 0, + max_chain: 2, + min_len: 3, + hash_algorithm: HashAlgorithm::MiniZFast, + }, huff_strategy: PreflateHuffStrategy::Dynamic, - zlib_compatible: true, - window_bits: 15, - hash_shift: 0, - hash_mask: crate::hash_algorithm::MINIZ_LEVEL1_HASH_SIZE_MASK, - max_token_count: 16383, - max_dist_3_matches: 8192, - very_far_matches_detected: false, - matches_to_start_detected: false, - good_length: 258, - max_lazy: 0, - nice_length: 258, - max_chain: 2, - hash_algorithm: HashAlgorithm::MiniZFast, - min_len: 3, - add_policy: crate::hash_chain::DictionaryAddPolicy::AddFirst(0), }; encode_mispredictions(&contents, ¶ms, &mut cabac_encoder).unwrap(); diff --git a/src/skip_length_estimator.rs b/src/skip_length_estimator.rs index d553d6f..1fe1bb0 100644 --- a/src/skip_length_estimator.rs +++ b/src/skip_length_estimator.rs @@ -21,7 +21,8 @@ pub fn estimate_skip_length(token_blocks: &[PreflateTokenBlock]) -> DictionaryAd let mut counters = [0u32; 259]; let mut counters_b = [0u32; 259]; - for token_block in token_blocks { + for i in 0..token_blocks.len() { + let token_block = &token_blocks[i]; for token in token_block.tokens.iter() { match token { PreflateToken::Literal => { diff --git a/src/token_predictor.rs b/src/token_predictor.rs index ca873ce..baba4b4 100644 --- a/src/token_predictor.rs +++ b/src/token_predictor.rs @@ -9,10 +9,12 @@ use anyhow::Context; use crate::{ bit_helper::DebugHash, cabac_codec::{decode_difference, encode_difference}, - hash_algorithm::RotatingHashTrait, - predictor_state::{MatchResult, PredictorState}, + hash_algorithm::HashAlgorithm, + hash_chain::DictionaryAddPolicy, + hash_chain_holder::{new_hash_chain_holder, HashChainHolderTrait, MatchResult}, preflate_constants::MIN_MATCH, - preflate_parameter_estimator::PreflateParameters, + preflate_input::PreflateInput, + preflate_parameter_estimator::PreflateStrategy, preflate_token::{BlockType, PreflateToken, PreflateTokenBlock, PreflateTokenReference}, statistical_codec::{ CodecCorrection, CodecMisprediction, PredictionDecoder, PredictionEncoder, @@ -21,27 +23,60 @@ use crate::{ const VERIFY: bool = false; -pub struct TokenPredictor<'a, H: RotatingHashTrait> { - state: PredictorState<'a, H>, - params: PreflateParameters, +pub struct TokenPredictor<'a> { + state: Box, + params: TokenPredictorParameters, pending_reference: Option, current_token_count: u32, max_token_count: u32, + input: PreflateInput<'a>, } -impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { - pub fn new(uncompressed: &'a [u8], params: &PreflateParameters) -> Self { +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct TokenPredictorParameters { + /// Zlib does not match to first byte of a file in order to reserve 0 for the end of chain + pub matches_to_start_detected: bool, + + /// if there are matches that have a distance larger than window_size - MAX_MATCH. + /// Zlib does not allow these. + pub very_far_matches_detected: bool, + pub window_bits: u32, + + pub strategy: PreflateStrategy, + pub nice_length: u32, + + /// if something, then we use the "fast" compressor, which only adds smaller substrings + /// to the dictionary + pub add_policy: DictionaryAddPolicy, + + pub max_token_count: u16, + + pub zlib_compatible: bool, + pub max_dist_3_matches: u16, + pub good_length: u32, + pub max_lazy: u32, + pub max_chain: u32, + pub min_len: u32, + + pub hash_algorithm: HashAlgorithm, +} + +impl<'a> TokenPredictor<'a> { + pub fn new(uncompressed: &'a [u8], params: &TokenPredictorParameters) -> Self { // Implement constructor logic for PreflateTokenPredictor // Initialize fields as necessary // Create and initialize PreflatePredictorState, PreflateHashChainExt, and PreflateSeqChain instances // Construct the analysisResults vector + let predictor_state = new_hash_chain_holder(params); + Self { - state: PredictorState::<'a>::new(uncompressed, params), + state: predictor_state, params: *params, pending_reference: None, current_token_count: 0, max_token_count: params.max_token_count.into(), + input: PreflateInput::new(uncompressed), } } @@ -71,7 +106,8 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { codec.encode_value(block.uncompressed_len as u16, 16); codec.encode_correction(CodecCorrection::NonZeroPadding, block.padding_bits.into()); - self.state.update_hash_batch(block.uncompressed_len); + self.state + .update_hash(block.uncompressed_len, &mut self.input, true); return Ok(()); } @@ -180,14 +216,20 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { ); if predicted_ref.len() != target_ref.len() { - let rematch = self.state.calculate_hops(target_ref).with_context(|| { - format!("calculate_hops p={:?}, t={:?}", predicted_ref, target_ref) - })?; + let rematch = self + .state + .calculate_hops(target_ref, &self.input) + .with_context(|| { + format!("calculate_hops p={:?}, t={:?}", predicted_ref, target_ref) + })?; codec.encode_correction(CodecCorrection::DistAfterLenCorrection, rematch); } else if target_ref.dist() != predicted_ref.dist() { - let rematch = self.state.calculate_hops(target_ref).with_context(|| { - format!("calculate_hops p={:?}, t={:?}", predicted_ref, target_ref) - })?; + let rematch = self + .state + .calculate_hops(target_ref, &self.input) + .with_context(|| { + format!("calculate_hops p={:?}, t={:?}", predicted_ref, target_ref) + })?; codec.encode_correction(CodecCorrection::DistOnlyCorrection, rematch); } else { codec.encode_correction(CodecCorrection::DistOnlyCorrection, 0); @@ -234,7 +276,8 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { block.uncompressed_len = codec.decode_value(16).into(); block.padding_bits = codec.decode_correction(CodecCorrection::NonZeroPadding) as u8; - self.state.update_hash_batch(block.uncompressed_len); + self.state + .update_hash(block.uncompressed_len, &mut self.input, true); return Ok(block); } BT_STATICHUFF => { @@ -308,7 +351,7 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { predicted_ref = PreflateTokenReference::new( new_len, self.state - .hop_match(new_len, hops) + .hop_match(new_len, hops, &self.input) .with_context(|| format!("hop_match l={} {:?}", new_len, predicted_ref))?, false, ); @@ -317,7 +360,7 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { if hops != 0 { let new_dist = self .state - .hop_match(predicted_ref.len(), hops) + .hop_match(predicted_ref.len(), hops, &self.input) .with_context(|| { format!("recalculate_distance token {}", self.current_token_count) })?; @@ -341,18 +384,19 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { pub fn input_eof(&self) -> bool { // Return a boolean indicating whether input has reached EOF - self.state.available_input_size() == 0 + self.input.remaining() == 0 } fn predict_token(&mut self) -> PreflateToken { - if self.state.current_input_pos() == 0 || self.state.available_input_size() < MIN_MATCH { + if self.input.pos() == 0 || self.input.remaining() < MIN_MATCH { return PreflateToken::Literal; } let m = if let Some(pending) = self.pending_reference { MatchResult::Success(pending) } else { - self.state.match_token(0, 0, self.params.max_chain) + self.state + .match_token(0, 0, self.params.max_chain, &self.input) }; self.pending_reference = None; @@ -371,15 +415,18 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { // Check for a longer match that starts at the next byte, in which case we should // just emit a literal instead of a distance/length pair. if match_token.len() < self.params.max_lazy - && self.state.available_input_size() >= match_token.len() + 2 + && self.input.remaining() >= match_token.len() + 2 { let mut max_depth = self.params.max_chain; if self.params.zlib_compatible && match_token.len() >= self.params.good_length { + // zlib shortens the amount we search by half if the match is "good" enough max_depth >>= 2; } - let match_next = self.state.match_token(match_token.len(), 1, max_depth); + let match_next = + self.state + .match_token(match_token.len(), 1, max_depth, &self.input); if let MatchResult::Success(m) = match_next { if m.len() > match_token.len() { @@ -405,7 +452,7 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { &mut self, _dist_match: Option, ) -> anyhow::Result { - if self.state.current_input_pos() == 0 || self.state.available_input_size() < MIN_MATCH { + if self.input.pos() == 0 || self.input.remaining() < MIN_MATCH { return Err(anyhow::Error::msg( "Not enough space left to find a reference", )); @@ -419,7 +466,9 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { } */ - let match_token = self.state.match_token(0, 0, self.params.max_chain); + let match_token = self + .state + .match_token(0, 0, self.params.max_chain, &self.input); self.pending_reference = None; @@ -439,18 +488,17 @@ impl<'a, H: RotatingHashTrait> TokenPredictor<'a, H> { match token { PreflateToken::Literal => { if let Some(block) = block { - block.add_literal(self.state.input_cursor()[0]); + block.add_literal(self.input.cur_char(0)); } - self.state.update_hash_batch(1); + self.state.update_hash(1, &mut self.input, true); } PreflateToken::Reference(t) => { if let Some(block) = block { block.add_reference(t.len(), t.dist(), t.get_irregular258()); } - self.state - .update_hash_with_policy(t.len(), self.params.add_policy); + self.state.update_hash(t.len(), &mut self.input, false); } }