Skip to content

Commit

Permalink
work
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Jan 3, 2024
1 parent dffb664 commit b93f8e7
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 150 deletions.
40 changes: 22 additions & 18 deletions src/complevel_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ use crate::hash_algorithm::{
HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, ZlibNGHash, ZlibRotatingHash,
MINIZ_LEVEL1_HASH_SIZE_MASK,
};
use crate::hash_chain::{HashChain, MAX_UPDATE_HASH_BATCH};
use crate::hash_chain::{DictionaryAddPolicy, HashChain, MAX_UPDATE_HASH_BATCH};
use crate::preflate_constants;
use crate::preflate_input::PreflateInput;
use crate::preflate_parse_config::{FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS};
use crate::preflate_token::{BlockType, PreflateToken, PreflateTokenBlock, PreflateTokenReference};
use crate::skip_length_estimator::{estimate_skip_length, DictionaryAddPolicy};
use crate::skip_length_estimator::estimate_skip_length;

#[derive(Default)]
pub struct CompLevelInfo {
Expand Down Expand Up @@ -89,21 +89,25 @@ impl CandidateInfo {
}
}

fn invoke_update_hash(&mut self, len: u32, input: &PreflateInput, add_policy : DictionaryAddPolicy) {
match self.hash_chain {
HashChainType::Zlib(ref mut h) => h.update_hash::<true>(len, input),
HashChainType::MiniZ(ref mut h) => h.update_hash::<true>(len, input),
HashChainType::LibFlate4(ref mut h) => h.update_hash::<true>(len, input),
HashChainType::ZlibNG(ref mut h) => h.update_hash::<true>(len, input),
}
}

fn invoke_skip_hash(&mut self, len: u32, input: &PreflateInput) {
fn invoke_update_hash(
&mut self,
len: u32,
input: &PreflateInput,
add_policy: DictionaryAddPolicy,
) {
match self.hash_chain {
HashChainType::Zlib(ref mut h) => h.skip_hash::<true>(len, input),
HashChainType::MiniZ(ref mut h) => h.skip_hash::<true>(len, input),
HashChainType::LibFlate4(ref mut h) => h.skip_hash::<true>(len, input),
HashChainType::ZlibNG(ref mut h) => h.skip_hash::<true>(len, input),
HashChainType::Zlib(ref mut h) => {
h.update_hash_with_policy::<true>(len, input, add_policy)
}
HashChainType::MiniZ(ref mut h) => {
h.update_hash_with_policy::<true>(len, input, add_policy)
}
HashChainType::LibFlate4(ref mut h) => {
h.update_hash_with_policy::<true>(len, input, add_policy)
}
HashChainType::ZlibNG(ref mut h) => {
h.update_hash_with_policy::<true>(len, input, add_policy)
}
}
}

Expand Down Expand Up @@ -287,7 +291,7 @@ impl<'a> CompLevelEstimatorState<'a> {
let batch_len = std::cmp::min(length, MAX_UPDATE_HASH_BATCH);

for i in &mut self.candidates {
i.invoke_update_hash(batch_len, &self.input);
i.invoke_update_hash(batch_len, &self.input, DictionaryAddPolicy::AddAll);
}

self.input.advance(batch_len);
Expand Down Expand Up @@ -332,7 +336,7 @@ impl<'a> CompLevelEstimatorState<'a> {
}
PreflateToken::Reference(r) => {
self.check_match(r);
self.skip_or_update_hash(r.len());
self.update_hash(r.len());
}
}
}
Expand Down
80 changes: 55 additions & 25 deletions src/hash_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,21 @@ use crate::{

pub const MAX_UPDATE_HASH_BATCH: u32 = 0x180;

pub const UPDATE_MODE_ALL: u32 = 0;
pub const UPDATE_MODE_FIRST: u32 = 1;
pub const UPDATE_MODE_FIRST_AND_LAST: u32 = 2;

#[derive(Default, Eq, PartialEq, Debug, Clone, Copy)]
pub enum DictionaryAddPolicy {
/// Add all substrings of a match to the dictionary
#[default]
AddAll,
/// Add only the first substring of a match to the dictionary that are larger than the limit
AddFirst(u16),
/// Add only the first and last substring of a match to the dictionary that are larger than the limit
AddFirstAndLast(u16),
}

pub trait HashChainTrait: Default {}

#[derive(Default, Copy, Clone, Eq, PartialEq, Debug)]
Expand Down Expand Up @@ -130,9 +145,7 @@ impl<H: RotatingHashTrait> HashTable<H> {
self.running_hash = self.running_hash.append(b, self.hash_shift);
}

fn update_chain<const MAINTAIN_DEPTH: bool,
const ONLY_FIRST : bool,
const INCLUDE_LAST : bool>(
fn update_chain<const MAINTAIN_DEPTH: bool, const UPDATE_MODE: u32>(
&mut self,
chars: &[u8],
mut pos: InternalPosition,
Expand All @@ -146,10 +159,13 @@ impl<H: RotatingHashTrait> HashTable<H> {
}

let last = cmp::min(length as usize, chars.len() - offset);
for i in 0.. last {
for i in 0..last {
self.update_running_hash(chars[i + offset]);

if !ONLY_FIRST || i == 0 || (INCLUDE_LAST && i == last - 1) {
if UPDATE_MODE == UPDATE_MODE_ALL
|| (UPDATE_MODE == UPDATE_MODE_FIRST && i == 0)
|| (UPDATE_MODE == UPDATE_MODE_FIRST_AND_LAST && (i == 0 || i == last - 1))
{
let h = self.get_running_hash();

if MAINTAIN_DEPTH {
Expand Down Expand Up @@ -428,27 +444,38 @@ impl<H: RotatingHashTrait> HashChain<H> {
})
}

pub fn update_hash<const MAINTAIN_DEPTH: bool>(&mut self, length: u32, input: &PreflateInput) {
assert!(length <= MAX_UPDATE_HASH_BATCH);

self.reshift_if_necessary::<MAINTAIN_DEPTH>(input);

let pos = InternalPosition::from_absolute(input.pos(), self.total_shift);
let chars = input.cur_chars(0);

self.hash_table
.update_chain::<MAINTAIN_DEPTH, false>(chars, pos, length);

// maintain the extra 3 length chain if we have it
if let Some(x) = self.hash_table_3_len.as_mut() {
x.update_chain::<MAINTAIN_DEPTH, false>(chars, pos, length);
pub fn update_hash_with_policy<const MAINTAIN_DEPTH: bool>(
&mut self,
length: u32,
input: &PreflateInput,
add_policy: DictionaryAddPolicy,
) {
match add_policy {
DictionaryAddPolicy::AddAll => {
self.update_hash::<MAINTAIN_DEPTH, UPDATE_MODE_ALL>(length, input);
}
DictionaryAddPolicy::AddFirst(limit) => {
if length > limit.into() {
self.update_hash::<MAINTAIN_DEPTH, UPDATE_MODE_FIRST>(length, input);
} else {
self.update_hash::<MAINTAIN_DEPTH, UPDATE_MODE_ALL>(length, input);
}
}
DictionaryAddPolicy::AddFirstAndLast(limit) => {
if length > limit.into() {
self.update_hash::<MAINTAIN_DEPTH, UPDATE_MODE_FIRST_AND_LAST>(length, input);
} else {
self.update_hash::<MAINTAIN_DEPTH, UPDATE_MODE_ALL>(length, input);
}
}
}

//let c = self.checksum_whole_struct();
//println!("u {} = {}", length, c);
}

pub fn skip_hash<const MAINTAIN_DEPTH: bool>(&mut self, length: u32, input: &PreflateInput) {
fn update_hash<const MAINTAIN_DEPTH: bool, const UPDATE_MODE: u32>(
&mut self,
length: u32,
input: &PreflateInput,
) {
assert!(length <= MAX_UPDATE_HASH_BATCH);

self.reshift_if_necessary::<MAINTAIN_DEPTH>(input);
Expand All @@ -457,12 +484,15 @@ impl<H: RotatingHashTrait> HashChain<H> {
let chars = input.cur_chars(0);

self.hash_table
.update_chain::<MAINTAIN_DEPTH, true>(chars, pos, length);
.update_chain::<MAINTAIN_DEPTH, UPDATE_MODE>(chars, pos, length);

// maintain the extra 3 length chain if we have it
if let Some(x) = self.hash_table_3_len.as_mut() {
x.update_chain::<MAINTAIN_DEPTH, true>(chars, pos, length);
x.update_chain::<MAINTAIN_DEPTH, UPDATE_MODE>(chars, pos, length);
}

//let c = self.checksum_whole_struct();
//println!("u {} = {}", length, c);
}

pub fn match_depth(
Expand Down
64 changes: 11 additions & 53 deletions src/predictor_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@

use crate::bit_helper::DebugHash;
use crate::hash_algorithm::RotatingHashTrait;
use crate::hash_chain::{HashChain, MAX_UPDATE_HASH_BATCH};
use crate::hash_chain::{DictionaryAddPolicy, HashChain, MAX_UPDATE_HASH_BATCH};
use crate::preflate_constants::{MAX_MATCH, MIN_LOOKAHEAD, MIN_MATCH};
use crate::preflate_input::PreflateInput;
use crate::preflate_parameter_estimator::{PreflateParameters, PreflateStrategy};
use crate::preflate_token::PreflateTokenReference;
use crate::skip_length_estimator::DictionaryAddPolicy;
use std::cmp;
use std::sync::atomic;

Expand Down Expand Up @@ -56,62 +55,21 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> {
self.hash.checksum(checksum);
}

pub fn update_hash_with_policy( &mut self, mut length : u32, add_policy : DictionaryAddPolicy)
{
match add_policy {
DictionaryAddPolicy::AddAll => {
self.update_hash(length);
}
DictionaryAddPolicy::AddFirst(limit) => {
if length > limit.into()
{
self.update_hash(length);
} else {
while length > 0 {
let batch_len = cmp::min(length, MAX_UPDATE_HASH_BATCH);
self.hash.skip_hash::<true>(batch_len, &self.input);

self.input.advance(batch_len);
length -= batch_len;
}
}
}
DictionaryAddPolicy::AddFirstAndLast(limit) => {
if length > limit.into()
{
self.update_hash(length);
} else {
length -= 1;
while length > 0 {
let batch_len = cmp::min(length, MAX_UPDATE_HASH_BATCH);
self.hash.skip_hash::<true>(batch_len, &self.input);

self.input.advance(batch_len);
length -= batch_len;
}
self.hash.update_hash::<true>(1, &self.input);
self.input.advance(1);
}
}
}
}

pub fn update_hash(&mut self, mut length: u32) {
while length > 0 {
let batch_len = cmp::min(length, MAX_UPDATE_HASH_BATCH);

self.hash.update_hash::<true>(batch_len, &self.input);

self.input.advance(batch_len);
length -= batch_len;
}
pub fn update_hash_with_policy(&mut self, length: u32, add_policy: DictionaryAddPolicy) {
self.hash
.update_hash_with_policy::<false>(length, &self.input, add_policy);
self.input.advance(length);
}

pub fn skip_hash(&mut self, mut length: u32) {
pub fn update_hash_batch(&mut self, mut length: u32) {
while length > 0 {
let batch_len = cmp::min(length, MAX_UPDATE_HASH_BATCH);
self.hash.skip_hash::<true>(batch_len, &self.input);

self.hash.update_hash_with_policy::<false>(
batch_len,
&self.input,
DictionaryAddPolicy::AddAll,
);
self.input.advance(batch_len);
length -= batch_len;
}
Expand Down
2 changes: 1 addition & 1 deletion src/preflate_parameter_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ use crate::{
bit_helper::bit_length,
complevel_estimator::estimate_preflate_comp_level,
hash_algorithm::HashAlgorithm,
hash_chain::DictionaryAddPolicy,
preflate_constants::{self},
preflate_stream_info::{extract_preflate_info, PreflateStreamInfo},
preflate_token::PreflateTokenBlock,
skip_length_estimator::DictionaryAddPolicy,
statistical_codec::{PredictionDecoder, PredictionEncoder},
};

Expand Down
9 changes: 3 additions & 6 deletions src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,6 @@ fn verify_zlib_compressed_perfect() {
preflate_parameter_estimator::PreflateHuffStrategy,
preflate_parameter_estimator::PreflateStrategy,
preflate_parse_config::{FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS},
skip_length_estimator::DictionaryAddPolicy,
statistical_codec::{AssertDefaultOnlyDecoder, AssertDefaultOnlyEncoder},
};

Expand All @@ -470,13 +469,12 @@ fn verify_zlib_compressed_perfect() {
let max_lazy;
if i < 4 {
config = &FAST_PREFLATE_PARSER_SETTINGS[i as usize - 1];
add_policy =
crate::skip_length_estimator::DictionaryAddPolicy::AddFirst(config.max_lazy as u16);
add_policy = crate::hash_chain::DictionaryAddPolicy::AddFirst(config.max_lazy as u16);
max_dist_3_matches = 32768;
max_lazy = 0;
} else {
config = &SLOW_PREFLATE_PARSER_SETTINGS[i as usize - 4];
add_policy = DictionaryAddPolicy::AddAll;
add_policy = crate::hash_chain::DictionaryAddPolicy::AddAll;
max_dist_3_matches = 4096;
max_lazy = config.max_lazy;
}
Expand Down Expand Up @@ -517,7 +515,6 @@ fn verify_miniz1_compressed_perfect() {
use crate::{
cabac_codec::{PredictionDecoderCabac, PredictionEncoderCabac},
preflate_parameter_estimator::{PreflateHuffStrategy, PreflateStrategy},
skip_length_estimator::DictionaryAddPolicy,
};
use cabac::vp8::{VP8Reader, VP8Writer};

Expand Down Expand Up @@ -545,7 +542,7 @@ fn verify_miniz1_compressed_perfect() {
max_chain: 2,
hash_algorithm: HashAlgorithm::MiniZFast,
min_len: 3,
add_policy: DictionaryAddPolicy::AddFirst(0),
add_policy: crate::hash_chain::DictionaryAddPolicy::AddFirst(0),
};

encode_mispredictions(&contents, &params, &mut cabac_encoder).unwrap();
Expand Down
25 changes: 4 additions & 21 deletions src/skip_length_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,10 @@
///
/// This will be the limit that we use when we decide whether to
/// use skip_hash or update_hash.
use default_boxed::DefaultBoxed;

use crate::preflate_token::{PreflateToken, PreflateTokenBlock};

#[derive(DefaultBoxed)]
pub struct SkipLengthEstimator {
pub current_window: [u8; 32768],
pub current_offset: u32,
pub max_distance: u32,
}

#[derive(Default, Eq, PartialEq, Debug, Clone, Copy)]
pub enum DictionaryAddPolicy {
/// Add all substrings of a match to the dictionary
#[default]
AddAll,
/// Add only the first substring of a match to the dictionary that are larger than the limit
AddFirst(u16),
/// Add only the first and last substring of a match to the dictionary that are larger than the limit
AddFirstAndLast(u16),
}
use crate::{
hash_chain::DictionaryAddPolicy,
preflate_token::{PreflateToken, PreflateTokenBlock},
};

pub fn estimate_skip_length(token_blocks: &[PreflateTokenBlock]) -> DictionaryAddPolicy {
let mut current_window = vec![0u16; 32768];
Expand Down
Loading

0 comments on commit b93f8e7

Please sign in to comment.