Skip to content

Commit

Permalink
add zlibng (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp authored Jan 2, 2024
1 parent 9b03c26 commit e561e29
Show file tree
Hide file tree
Showing 8 changed files with 309 additions and 78 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@ zstd = "0.13.0"
[dev-dependencies]
crc32fast = "1.3"
libz-sys = "1.1"
flate2 = "1.0"
libdeflate-sys = "1.19"
libz-ng-sys="1.1.12"
miniz_oxide="0.7.1"
116 changes: 76 additions & 40 deletions src/complevel_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
/// This module is design to detect the appropriate overall parameters for the preflate compressor.
/// Getting the parameters correct means that the resulting diff between the deflate stream
/// and the predicted deflate stream will be as small as possible.
use crate::hash_algorithm::{HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, ZlibRotatingHash};
use crate::hash_algorithm::{
HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, ZlibNGHash, ZlibRotatingHash,
MINIZ_LEVEL1_HASH_SIZE_MASK,
};
use crate::hash_chain::{HashChain, MAX_UPDATE_HASH_BATCH};
use crate::preflate_constants;
use crate::preflate_input::PreflateInput;
Expand Down Expand Up @@ -37,6 +40,7 @@ enum HashChainType {
Zlib(HashChain<ZlibRotatingHash>),
MiniZ(HashChain<MiniZHash>),
LibFlate4(HashChain<LibdeflateRotatingHash4>),
ZlibNG(HashChain<ZlibNGHash>),
}

struct CandidateInfo {
Expand All @@ -51,11 +55,45 @@ struct CandidateInfo {
}

impl CandidateInfo {
fn new(
hash_mask: u16,
hash_shift: u32,
skip_length: Option<u32>,
hash_algorithm: HashAlgorithm,
input: &PreflateInput,
) -> Self {
CandidateInfo {
hash_mask,
hash_shift,
skip_length,
hash_chain: match hash_algorithm {
HashAlgorithm::Zlib => HashChainType::Zlib(HashChain::<ZlibRotatingHash>::new(
hash_shift, hash_mask, &input,
)),
HashAlgorithm::MiniZFast => {
HashChainType::MiniZ(HashChain::<MiniZHash>::new(hash_shift, hash_mask, &input))
}
HashAlgorithm::Libdeflate4 => {
HashChainType::LibFlate4(HashChain::<LibdeflateRotatingHash4>::new(
hash_shift, hash_mask, &input,
))
}
HashAlgorithm::ZlibNG => HashChainType::ZlibNG(HashChain::<ZlibNGHash>::new(
hash_shift, hash_mask, &input,
)),
},
longest_dist_at_hop_0: 0,
longest_dist_at_hop_1_plus: 0,
max_chain_found: 0,
}
}

fn invoke_update_hash(&mut self, len: u32, input: &PreflateInput) {
match self.hash_chain {
HashChainType::Zlib(ref mut h) => h.update_hash::<true>(len, input),
HashChainType::MiniZ(ref mut h) => h.update_hash::<true>(len, input),
HashChainType::LibFlate4(ref mut h) => h.update_hash::<true>(len, input),
HashChainType::ZlibNG(ref mut h) => h.update_hash::<true>(len, input),
}
}

Expand All @@ -64,6 +102,7 @@ impl CandidateInfo {
HashChainType::Zlib(ref mut h) => h.skip_hash::<true>(len, input),
HashChainType::MiniZ(ref mut h) => h.skip_hash::<true>(len, input),
HashChainType::LibFlate4(ref mut h) => h.skip_hash::<true>(len, input),
HashChainType::ZlibNG(ref mut h) => h.skip_hash::<true>(len, input),
}
}

Expand All @@ -77,6 +116,7 @@ impl CandidateInfo {
HashChainType::Zlib(ref mut h) => h.match_depth(token, window_size, input),
HashChainType::MiniZ(ref mut h) => h.match_depth(token, window_size, input),
HashChainType::LibFlate4(ref mut h) => h.match_depth(token, window_size, input),
HashChainType::ZlibNG(ref mut h) => h.match_depth(token, window_size, input),
}
}

Expand Down Expand Up @@ -158,6 +198,7 @@ impl CandidateInfo {
HashChainType::Zlib(_) => HashAlgorithm::Zlib,
HashChainType::MiniZ(_) => HashAlgorithm::MiniZFast,
HashChainType::LibFlate4(_) => HashAlgorithm::Libdeflate4,
HashChainType::ZlibNG(_) => HashAlgorithm::ZlibNG,
}
}
}
Expand Down Expand Up @@ -208,57 +249,52 @@ impl<'a> CompLevelEstimatorState<'a> {
// add the ZlibRotatingHash candidates
for config in &FAST_PREFLATE_PARSER_SETTINGS {
for &(hash_shift, hash_mask) in hashparameters.iter() {
candidates.push(Box::new(CandidateInfo {
skip_length: Some(config.max_lazy),
candidates.push(Box::new(CandidateInfo::new(
hash_mask,
hash_shift,
hash_chain: HashChainType::Zlib(HashChain::<ZlibRotatingHash>::new(
hash_shift, hash_mask, &input,
)),
max_chain_found: 0,
longest_dist_at_hop_0: 0,
longest_dist_at_hop_1_plus: 0,
}));
Some(config.max_lazy),
HashAlgorithm::Zlib,
&input,
)));
}
}

candidates.push(Box::new(CandidateInfo {
skip_length: Some(2),
hash_shift: 5,
hash_mask: 32767,
hash_chain: HashChainType::MiniZ(HashChain::<MiniZHash>::new(5, 32767, &input)),
max_chain_found: 0,
longest_dist_at_hop_0: 0,
longest_dist_at_hop_1_plus: 0,
}));
candidates.push(Box::new(CandidateInfo::new(
MINIZ_LEVEL1_HASH_SIZE_MASK,
0,
Some(2),
HashAlgorithm::MiniZFast,
&input,
)));

// slow compressor candidates
for (hash_shift, hash_mask) in [(5, 32767), (4, 2047)] {
candidates.push(Box::new(CandidateInfo {
skip_length: None,
hash_shift,
candidates.push(Box::new(CandidateInfo::new(
hash_mask,
hash_chain: HashChainType::Zlib(HashChain::<ZlibRotatingHash>::new(
hash_shift, hash_mask, &input,
)),
max_chain_found: 0,
longest_dist_at_hop_0: 0,
longest_dist_at_hop_1_plus: 0,
}));
hash_shift,
None,
HashAlgorithm::Zlib,
&input,
)));
}

// LibFlate4 candidate
candidates.push(Box::new(CandidateInfo {
skip_length: None,
hash_shift: 0,
hash_mask: 0xffff,
hash_chain: HashChainType::LibFlate4(HashChain::<LibdeflateRotatingHash4>::new(
0, 0xffff, &input,
)),
max_chain_found: 0,
longest_dist_at_hop_0: 0,
longest_dist_at_hop_1_plus: 0,
}));
candidates.push(Box::new(CandidateInfo::new(
0xffff,
0,
None,
HashAlgorithm::Libdeflate4,
&input,
)));

// ZlibNG slow candidate
candidates.push(Box::new(CandidateInfo::new(
0xffff,
0,
None,
HashAlgorithm::ZlibNG,
&input,
)));

CompLevelEstimatorState {
input,
Expand Down
34 changes: 32 additions & 2 deletions src/hash_algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub enum HashAlgorithm {
Zlib,
MiniZFast,
Libdeflate4,
ZlibNG,
}
pub trait RotatingHashTrait: Default + Copy + Clone {
fn hash(&self, mask: u16) -> usize;
Expand Down Expand Up @@ -42,10 +43,13 @@ pub struct MiniZHash {
hash: u32,
}

/// Size of hash chain for fast compression mode.
pub const MINIZ_LEVEL1_HASH_SIZE_MASK: u16 = 4095;

impl RotatingHashTrait for MiniZHash {
fn hash(&self, mask: u16) -> usize {
debug_assert!(mask == 0x7fff);
((self.hash ^ (self.hash >> 17)) & 0x7fff) as usize
debug_assert!(mask == MINIZ_LEVEL1_HASH_SIZE_MASK);
((self.hash ^ (self.hash >> 17)) & u32::from(MINIZ_LEVEL1_HASH_SIZE_MASK)) as usize
}

fn append(&self, c: u8, _hash_shift: u32) -> Self {
Expand Down Expand Up @@ -117,3 +121,29 @@ impl RotatingHashTrait for LibdeflateRotatingHash3 {
3
}
}

#[derive(Default, Copy, Clone)]
pub struct ZlibNGHash {
hash: u32,
}

impl RotatingHashTrait for ZlibNGHash {
fn hash(&self, mask: u16) -> usize {
debug_assert!(mask == 0xffff);
(self.hash.wrapping_mul(2654435761) >> 16) as usize
}

fn append(&self, c: u8, _hash_shift: u32) -> Self {
Self {
hash: ((c as u32) << 24) | (self.hash >> 8),
}
}

fn hash_algorithm() -> HashAlgorithm {
HashAlgorithm::ZlibNG
}

fn num_hash_bytes() -> u16 {
4
}
}
5 changes: 5 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ pub struct DecompressResult {
/// the number of bytes that were processed from the compressed stream (this will be exactly the
/// data that will be recreated using the cabac_encoded data)
pub compressed_size: usize,

/// the parameters that were used to compress the stream
pub parameters: PreflateParameters,
}

impl core::fmt::Debug for DecompressResult {
Expand Down Expand Up @@ -117,6 +120,7 @@ pub fn decompress_deflate_stream(
plain_text: contents.plain_text,
prediction_corrections: cabac_encoded,
compressed_size: contents.compressed_size,
parameters: params,
})
}

Expand Down Expand Up @@ -180,6 +184,7 @@ pub fn decompress_deflate_stream_assert(
plain_text: contents.plain_text,
prediction_corrections: cabac_encoded,
compressed_size: contents.compressed_size,
parameters: params,
})
}

Expand Down
7 changes: 6 additions & 1 deletion src/predictor_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,12 @@ impl<'a, H: RotatingHashTrait> PredictorState<'a, H> {
pub fn match_token(&self, prev_len: u32, offset: u32, max_depth: u32) -> MatchResult {
let start_pos = self.current_input_pos() + offset;
let max_len = std::cmp::min(self.total_input_size() - start_pos, MAX_MATCH);
if max_len < std::cmp::max(prev_len + 1, MIN_MATCH) {
if max_len
< std::cmp::max(
prev_len + 1,
std::cmp::max(H::num_hash_bytes() as u32, MIN_MATCH),
)
{
return MatchResult::NoInput;
}

Expand Down
2 changes: 2 additions & 0 deletions src/preflate_parameter_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ impl PreflateParameters {
const HASH_ALGORITHM_ZLIB: u16 = HashAlgorithm::Zlib as u16;
const HASH_ALGORITHM_MINIZ_FAST: u16 = HashAlgorithm::MiniZFast as u16;
const HASH_ALGORITHM_LIBDEFLATE4: u16 = HashAlgorithm::Libdeflate4 as u16;
const HASH_ALGORITHM_ZLIBNG: u16 = HashAlgorithm::ZlibNG as u16;

Ok(PreflateParameters {
strategy: match strategy {
Expand Down Expand Up @@ -127,6 +128,7 @@ impl PreflateParameters {
HASH_ALGORITHM_ZLIB => HashAlgorithm::Zlib,
HASH_ALGORITHM_MINIZ_FAST => HashAlgorithm::MiniZFast,
HASH_ALGORITHM_LIBDEFLATE4 => HashAlgorithm::Libdeflate4,
HASH_ALGORITHM_ZLIBNG => HashAlgorithm::ZlibNG,
_ => panic!("invalid hash algorithm"),
},
})
Expand Down
17 changes: 14 additions & 3 deletions src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use crate::{
deflate_reader::DeflateReader,
deflate_writer::DeflateWriter,
hash_algorithm::{
HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, RotatingHashTrait, ZlibRotatingHash,
HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, RotatingHashTrait, ZlibNGHash,
ZlibRotatingHash,
},
huffman_calc::HufftreeBitCalc,
preflate_error::PreflateError,
Expand Down Expand Up @@ -46,6 +47,11 @@ pub fn encode_mispredictions(
TokenPredictor::<LibdeflateRotatingHash4>::new(&deflate.plain_text, params),
encoder,
)?,
HashAlgorithm::ZlibNG => predict_blocks(
&deflate.blocks,
TokenPredictor::<ZlibNGHash>::new(&deflate.plain_text, params),
encoder,
)?,
}

encoder.encode_misprediction(CodecMisprediction::EOFMisprediction, false);
Expand Down Expand Up @@ -144,6 +150,11 @@ pub fn decode_mispredictions(
decoder,
&mut deflate_writer,
)?,
HashAlgorithm::ZlibNG => recreate_blocks(
TokenPredictor::<ZlibNGHash>::new(plain_text, params),
decoder,
&mut deflate_writer,
)?,
};

// flush the last byte, which may be incomplete and normally
Expand Down Expand Up @@ -516,8 +527,8 @@ fn verify_miniz1_compressed_perfect() {
huff_strategy: PreflateHuffStrategy::Dynamic,
zlib_compatible: true,
window_bits: 15,
hash_shift: 5,
hash_mask: 0x7fff,
hash_shift: 0,
hash_mask: crate::hash_algorithm::MINIZ_LEVEL1_HASH_SIZE_MASK,
max_token_count: 16383,
max_dist_3_matches: 8192,
very_far_matches_detected: false,
Expand Down
Loading

0 comments on commit e561e29

Please sign in to comment.