diff --git a/src/complevel_estimator.rs b/src/complevel_estimator.rs index 77bb5be..f84537c 100644 --- a/src/complevel_estimator.rs +++ b/src/complevel_estimator.rs @@ -7,16 +7,18 @@ /// This module is design to detect the appropriate overall parameters for the preflate compressor. /// Getting the parameters correct means that the resulting diff between the deflate stream /// and the predicted deflate stream will be as small as possible. -use crate::hash_algorithm::{ - HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, ZlibNGHash, ZlibRotatingHash, - MINIZ_LEVEL1_HASH_SIZE_MASK, +use crate::{ + hash_algorithm::{ + HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, RotatingHashTrait, ZlibNGHash, + ZlibRotatingHash, MINIZ_LEVEL1_HASH_SIZE_MASK, + }, + hash_chain::{DictionaryAddPolicy, HashChain, MAX_UPDATE_HASH_BATCH}, + preflate_constants, + preflate_input::PreflateInput, + preflate_parse_config::{FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS}, + preflate_token::{BlockType, PreflateToken, PreflateTokenBlock, PreflateTokenReference}, + skip_length_estimator::estimate_skip_length, }; -use crate::hash_chain::{DictionaryAddPolicy, HashChain, MAX_UPDATE_HASH_BATCH}; -use crate::preflate_constants; -use crate::preflate_input::PreflateInput; -use crate::preflate_parse_config::{FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS}; -use crate::preflate_token::{BlockType, PreflateToken, PreflateTokenBlock, PreflateTokenReference}; -use crate::skip_length_estimator::estimate_skip_length; #[derive(Default)] pub struct CompLevelInfo { @@ -37,18 +39,64 @@ pub struct CompLevelInfo { pub max_chain: u32, } -enum HashChainType { - Zlib(HashChain), - MiniZ(HashChain), - LibFlate4(HashChain), - ZlibNG(HashChain), +/// vtable for invoking the hash chain functions on specific implementation +/// of hash algorithm +trait HashChainInvoke { + fn invoke_update_hash( + &mut self, + len: u32, + input: &PreflateInput, + add_policy: DictionaryAddPolicy, + ); + + fn invoke_match_depth( + &mut self, + token: PreflateTokenReference, + window_size: u32, + input: &PreflateInput, + ) -> u32; +} + +/// holds the hashchain for a specific hash algorithm +struct HashChainHolder { + hash_chain: HashChain, +} + +impl HashChainHolder { + fn new(hash_shift: u32, hash_mask: u16, input: &PreflateInput<'_>) -> Box { + Box::new(HashChainHolder:: { + hash_chain: HashChain::::new(hash_shift, hash_mask, input), + }) + } +} + +impl HashChainInvoke for HashChainHolder { + fn invoke_update_hash( + &mut self, + len: u32, + input: &PreflateInput, + add_policy: DictionaryAddPolicy, + ) { + self.hash_chain + .update_hash_with_policy::(len, input, add_policy) + } + + fn invoke_match_depth( + &mut self, + token: PreflateTokenReference, + window_size: u32, + input: &PreflateInput, + ) -> u32 { + self.hash_chain.match_depth(&token, window_size, input) + } } struct CandidateInfo { + hash_algorithm: HashAlgorithm, hash_mask: u16, hash_shift: u32, add_policy: DictionaryAddPolicy, - hash_chain: HashChainType, + hash_chain: Box, longest_dist_at_hop_0: u32, longest_dist_at_hop_1_plus: u32, @@ -67,21 +115,20 @@ impl CandidateInfo { hash_mask, hash_shift, add_policy, + hash_algorithm, hash_chain: match hash_algorithm { - HashAlgorithm::Zlib => HashChainType::Zlib(HashChain::::new( - hash_shift, hash_mask, &input, - )), + HashAlgorithm::Zlib => { + HashChainHolder::::new(hash_shift, hash_mask, input) + } HashAlgorithm::MiniZFast => { - HashChainType::MiniZ(HashChain::::new(hash_shift, hash_mask, &input)) + HashChainHolder::::new(hash_shift, hash_mask, input) } HashAlgorithm::Libdeflate4 => { - HashChainType::LibFlate4(HashChain::::new( - hash_shift, hash_mask, &input, - )) + HashChainHolder::::new(hash_shift, hash_mask, input) + } + HashAlgorithm::ZlibNG => { + HashChainHolder::::new(hash_shift, hash_mask, input) } - HashAlgorithm::ZlibNG => HashChainType::ZlibNG(HashChain::::new( - hash_shift, hash_mask, &input, - )), }, longest_dist_at_hop_0: 0, longest_dist_at_hop_1_plus: 0, @@ -89,49 +136,15 @@ impl CandidateInfo { } } - fn invoke_update_hash( - &mut self, - len: u32, - input: &PreflateInput, - add_policy: DictionaryAddPolicy, - ) { - match self.hash_chain { - HashChainType::Zlib(ref mut h) => { - h.update_hash_with_policy::(len, input, add_policy) - } - HashChainType::MiniZ(ref mut h) => { - h.update_hash_with_policy::(len, input, add_policy) - } - HashChainType::LibFlate4(ref mut h) => { - h.update_hash_with_policy::(len, input, add_policy) - } - HashChainType::ZlibNG(ref mut h) => { - h.update_hash_with_policy::(len, input, add_policy) - } - } - } - - fn invoke_match_depth( - &mut self, - token: &PreflateTokenReference, - window_size: u32, - input: &PreflateInput, - ) -> u32 { - match self.hash_chain { - HashChainType::Zlib(ref mut h) => h.match_depth(token, window_size, input), - HashChainType::MiniZ(ref mut h) => h.match_depth(token, window_size, input), - HashChainType::LibFlate4(ref mut h) => h.match_depth(token, window_size, input), - HashChainType::ZlibNG(ref mut h) => h.match_depth(token, window_size, input), - } - } - fn match_depth( &mut self, - token: &PreflateTokenReference, + token: PreflateTokenReference, window_size: u32, input: &PreflateInput, ) -> bool { - let mdepth = self.invoke_match_depth(token, window_size, input); + let mdepth = self + .hash_chain + .invoke_match_depth(token, window_size, input); // remove element if the match was impossible due to matching the // the hash depth or because in fast mode we can't match partial words @@ -183,22 +196,14 @@ impl CandidateInfo { } fn hash_algorithm(&self) -> HashAlgorithm { - match self.hash_chain { - HashChainType::Zlib(_) => HashAlgorithm::Zlib, - HashChainType::MiniZ(_) => HashAlgorithm::MiniZFast, - HashChainType::LibFlate4(_) => HashAlgorithm::Libdeflate4, - HashChainType::ZlibNG(_) => HashAlgorithm::ZlibNG, - } + self.hash_algorithm } } struct CompLevelEstimatorState<'a> { input: PreflateInput<'a>, - // fast compressor candidates, depending on the hash shift and mask - // and what length of matches we should skip adding to the hash table. - // As we look at the data, we remove candidates that have impossible - // matches, and at the end we pick the best candidate. + /// candidates for checking for which hash algorithm to use candidates: Vec>, blocks: &'a Vec, @@ -291,7 +296,7 @@ impl<'a> CompLevelEstimatorState<'a> { let batch_len = std::cmp::min(length, MAX_UPDATE_HASH_BATCH); for i in &mut self.candidates { - i.invoke_update_hash( + i.hash_chain.invoke_update_hash( batch_len, &self.input, if override_add_policy { @@ -307,7 +312,7 @@ impl<'a> CompLevelEstimatorState<'a> { } } - fn check_match(&mut self, token: &PreflateTokenReference) { + fn check_match(&mut self, token: PreflateTokenReference) { self.reference_count += 1; if self.input.pos() < token.dist() || self.candidates.is_empty() { @@ -342,7 +347,7 @@ impl<'a> CompLevelEstimatorState<'a> { PreflateToken::Literal => { self.update_hash(1, true); } - PreflateToken::Reference(r) => { + &PreflateToken::Reference(r) => { self.check_match(r); self.update_hash(r.len(), false); } diff --git a/src/hash_chain.rs b/src/hash_chain.rs index 0a660fe..3dcc7d1 100644 --- a/src/hash_chain.rs +++ b/src/hash_chain.rs @@ -32,8 +32,6 @@ pub enum DictionaryAddPolicy { AddFirstAndLast(u16), } -pub trait HashChainTrait: Default {} - #[derive(Default, Copy, Clone, Eq, PartialEq, Debug)] struct InternalPosition { pos: u16, diff --git a/src/process.rs b/src/process.rs index 829974c..68eccdb 100644 --- a/src/process.rs +++ b/src/process.rs @@ -233,6 +233,8 @@ fn analyze_compressed_data_fast( let params = estimate_preflate_parameters(&contents.plain_text, &contents.blocks).unwrap(); + println!("params: {:?}", params); + params.write(&mut cabac_encoder); encode_mispredictions(&contents, ¶ms, &mut cabac_encoder).unwrap(); @@ -420,7 +422,7 @@ fn verify_docx() { // test binary deflate generated by starcontrol #[test] fn verify_savegame() { - do_analyze(None, &read_file("savegame.deflate"), true); + do_analyze(None, &read_file("savegame.deflate"), false); } #[test] @@ -564,7 +566,7 @@ fn verify_miniz_compressed_1() { //let minusheader = &v[2..v.len() - 4]; //let crc = Some(u32::from_le_bytes([v[v.len() - 4], v[v.len() - 3], v[v.len() - 2], v[v.len() - 1]])); - do_analyze(None, &v, true); + do_analyze(None, &v, false); } #[test]