From 1c368a2134da08e255a9f7ea5115252bafaf3e67 Mon Sep 17 00:00:00 2001 From: mcrumpface Date: Thu, 4 Jan 2024 09:04:13 +0100 Subject: [PATCH] fixed chain estimation --- src/complevel_estimator.rs | 18 +++++++++++++----- src/preflate_parameter_estimator.rs | 14 ++++++++++++-- src/skip_length_estimator.rs | 3 +++ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/complevel_estimator.rs b/src/complevel_estimator.rs index 8f8a4c8..77bb5be 100644 --- a/src/complevel_estimator.rs +++ b/src/complevel_estimator.rs @@ -286,12 +286,20 @@ impl<'a> CompLevelEstimatorState<'a> { } } - fn update_hash(&mut self, mut length: u32) { + fn update_hash(&mut self, mut length: u32, override_add_policy: bool) { while length > 0 { let batch_len = std::cmp::min(length, MAX_UPDATE_HASH_BATCH); for i in &mut self.candidates { - i.invoke_update_hash(batch_len, &self.input, DictionaryAddPolicy::AddAll); + i.invoke_update_hash( + batch_len, + &self.input, + if override_add_policy { + DictionaryAddPolicy::AddAll + } else { + i.add_policy + }, + ); } self.input.advance(batch_len); @@ -326,17 +334,17 @@ impl<'a> CompLevelEstimatorState<'a> { fn check_dump(&mut self) { for (_i, b) in self.blocks.iter().enumerate() { if b.block_type == BlockType::Stored { - self.update_hash(b.uncompressed_len); + self.update_hash(b.uncompressed_len, true); continue; } for (_j, t) in b.tokens.iter().enumerate() { match t { PreflateToken::Literal => { - self.update_hash(1); + self.update_hash(1, true); } PreflateToken::Reference(r) => { self.check_match(r); - self.update_hash(r.len()); + self.update_hash(r.len(), false); } } } diff --git a/src/preflate_parameter_estimator.rs b/src/preflate_parameter_estimator.rs index 0190237..f59dc9d 100644 --- a/src/preflate_parameter_estimator.rs +++ b/src/preflate_parameter_estimator.rs @@ -275,21 +275,31 @@ fn verify_zlib_recognition() { assert_eq!(params.strategy, PreflateStrategy::Store); } else if i >= 1 && i < 4 { let config = &FAST_PREFLATE_PARSER_SETTINGS[i as usize - 1]; + assert!( + params.max_chain <= config.max_chain, + "max_chain mismatch {} should be <= {}", + params.max_chain, + config.max_chain + ); assert_eq!(params.good_length, config.good_length); assert_eq!( params.add_policy, DictionaryAddPolicy::AddFirst(config.max_lazy as u16) ); assert_eq!(params.nice_length, config.nice_length); - assert!(params.max_chain <= config.max_chain); assert_eq!(params.strategy, PreflateStrategy::Default); } else if i >= 4 { let config = &SLOW_PREFLATE_PARSER_SETTINGS[i as usize - 4]; + assert!( + params.max_chain <= config.max_chain, + "max_chain mismatch {} should be <= {}", + params.max_chain, + config.max_chain + ); assert_eq!(params.good_length, config.good_length); assert_eq!(params.max_lazy, config.max_lazy); assert_eq!(params.nice_length, config.nice_length); assert_eq!(params.add_policy, DictionaryAddPolicy::AddAll); - assert!(params.max_chain <= config.max_chain); assert_eq!(params.strategy, PreflateStrategy::Default); } } diff --git a/src/skip_length_estimator.rs b/src/skip_length_estimator.rs index 2416361..d553d6f 100644 --- a/src/skip_length_estimator.rs +++ b/src/skip_length_estimator.rs @@ -19,6 +19,7 @@ pub fn estimate_skip_length(token_blocks: &[PreflateTokenBlock]) -> DictionaryAd let mut max_distance_last_add = 0; let mut current_offset: u32 = 0; let mut counters = [0u32; 259]; + let mut counters_b = [0u32; 259]; for token_block in token_blocks { for token in token_block.tokens.iter() { @@ -35,6 +36,8 @@ pub fn estimate_skip_length(token_blocks: &[PreflateTokenBlock]) -> DictionaryAd max_distance = std::cmp::max(max_distance, match_length & 0x7fff); if (match_length & 0x8000) == 0 { + counters_b[(match_length & 0x7fff) as usize] += 1; + max_distance_last_add = std::cmp::max(max_distance_last_add, match_length & 0x7fff); }