Skip to content

Commit

Permalink
add IDAT scanning (#14)
Browse files Browse the repository at this point in the history
* idat progress

* work
  • Loading branch information
mcroomp authored Aug 30, 2024
1 parent 3c651ae commit 8c5aed6
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 28 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ byteorder = "1.4"
cabac = "0.6.0"
default-boxed = "0.2"
zstd = "0.13.0"
crc32fast = "1.3"

[dev-dependencies]
crc32fast = "1.3"
libz-sys = "1.1"
libdeflate-sys = "1.19"
libz-ng-sys="1.1.12"
Expand Down
20 changes: 8 additions & 12 deletions package/PreflateRs.nuspec
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2011/08/nuspec.xsd">
<metadata>
<id>Lepton.Jpeg.Rust</id>
<version>0.3.4.3</version>
<title>Lepton JPEG Compression Rust version binaries and libraries</title>
<id>PreflateRs</id>
<version>0.0.0.1</version>
<title>PreflateRs Compression Rust binaries and libraries</title>
<authors>kristofr</authors>
<owners>kristofr</owners>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>Lepton Rust binaries and libraries</description>
<description>Preflate Rust binaries and libraries</description>
<tags>lepton</tags>
</metadata>
<files>
<file src="..\target\debug\lepton_jpeg_util.exe" target="exe\debug\x64" />
<file src="..\target\debug\lepton_jpeg_util.pdb" target="exe\debug\x64" />
<file src="..\target\debug\lepton_jpeg.dll" target="lib\debug\x64" />
<file src="..\target\debug\lepton_jpeg.pdb" target="lib\debug\x64" />
<file src="..\target\debug\preflate_rs.dll" target="lib\debug\x64" />
<file src="..\target\debug\preflate_rs.pdb" target="lib\debug\x64" />

<file src="..\target\release\lepton_jpeg_util.exe" target="exe\release\x64" />
<file src="..\target\release\lepton_jpeg_util.pdb" target="exe\release\x64" />
<file src="..\target\release\lepton_jpeg.dll" target="lib\release\x64" />
<file src="..\target\release\lepton_jpeg.pdb" target="lib\release\x64" />
<file src="..\target\release\preflate_rs.dll" target="lib\release\x64" />
<file src="..\target\release\preflate_rs.pdb" target="lib\release\x64" />
</files>
</package>
Binary file added samples/treegdi.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 8 additions & 2 deletions src/hash_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ impl InternalPositionRel {
}
}

#[derive(Default, Copy, Clone, Eq, PartialEq, Debug)]
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
struct InternalPositionAbs {
pos: u32,
}
Expand All @@ -97,14 +97,20 @@ impl InternalPosition for InternalPositionAbs {
}

fn is_valid(&self) -> bool {
self.pos > 0
self.pos != 0xffffffff
}

fn dist(&self, pos: Self) -> u32 {
u32::from(self.pos - pos.pos)
}
}

impl Default for InternalPositionAbs {
fn default() -> Self {
Self { pos: 0xffffffff }
}
}

impl InternalPositionAbs {
fn new(pos: u32) -> Self {
Self { pos }
Expand Down
64 changes: 56 additions & 8 deletions src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::{
huffman_calc::HufftreeBitCalc,
preflate_error::PreflateError,
preflate_parameter_estimator::PreflateParameters,
preflate_token::{BlockType, PreflateTokenBlock},
preflate_token::{BlockType, PreflateToken, PreflateTokenBlock},
statistical_codec::{
CodecCorrection, CodecMisprediction, PredictionDecoder, PredictionEncoder,
},
Expand Down Expand Up @@ -70,6 +70,12 @@ pub fn parse_deflate(
let eof_padding = block_decoder.read_eof_padding();
let plain_text = block_decoder.move_plain_text();
let compressed_size = input_stream.position() as usize;

/*// write to file
let mut f = std::fs::File::create("c:\\temp\\treegdi.deflate")
.unwrap();
std::io::Write::write_all(&mut f, &compressed_data[0..compressed_size]).unwrap();*/

Ok(DeflateContents {
compressed_size,
plain_text,
Expand Down Expand Up @@ -370,10 +376,13 @@ fn test_treepngdeflate() {
use crate::hash_chain::HashChain;
use crate::hash_chain::UPDATE_MODE_ALL;

let compressed_data: &[u8] = &read_file("treepng.deflate");
let compressed_data: &[u8] = &read_file("treegdi.deflate");

let contents = parse_deflate(compressed_data, 1).unwrap();

let decoder = miniz_oxide::inflate::decompress_to_vec(compressed_data).unwrap();
assert_eq!(&decoder[..], &contents.plain_text[..]);

let mut input = crate::preflate_input::PreflateInput::new(&contents.plain_text);
let mut chain: crate::hash_chain::HashChainAbs<RandomVectorHash> =
RandomVectorHash::new_hash_chain(RandomVectorHash {});
Expand All @@ -382,22 +391,52 @@ fn test_treepngdeflate() {

let h = r.get_hash(&contents.plain_text);

//println!("hashx: {:?}", h);
println!("hashx: {:?}", h);

let mut maxdepth = 0;
let mut mismatches = 0;
let mut prev = PreflateToken::Literal;

/*let mut o = 0;
for i in 0..20
{
let t = &contents.blocks[0].tokens[i];
println!("{} token: {}, {:?}", o, i, t);
match t {
crate::preflate_token::PreflateToken::Literal => o += 1,
crate::preflate_token::PreflateToken::Reference(r) => {
o += r.len();
}
}
}*/

for block_no in 0..contents.blocks.len() {
let b = &contents.blocks[block_no];
println!("block: {} {}", block_no, b.tokens.len());

for b in &contents.blocks {
for i in 0..b.tokens.len() {
let t = &b.tokens[i];

let pos = input.pos();
let chars = input.cur_chars(0);
let depth;
let mut chars = chars[0..chars.len().min(10)].to_vec();

match t {
crate::preflate_token::PreflateToken::Literal => {
chain.update_hash::<true, UPDATE_MODE_ALL>(1, &input);
input.advance(1);
depth = 0;
chars.resize(1, 0);
}
crate::preflate_token::PreflateToken::Reference(r) => {
let depth = chain.match_depth(&r, 32768, &input);
depth = chain.match_depth(&r, 32768, &input);
chars.resize(r.len().min(10) as usize, 0);
if depth > 5 {
println!("token: {}, depth {} reference: {:?}", i, depth, r);
mismatches += 1;
if mismatches > 20 {
return;
}

//println!("back: {:?}", &input.cur_chars(-82)[0..82]);

Expand All @@ -406,15 +445,24 @@ fn test_treepngdeflate() {
depth,
input.pos(),
&input.cur_chars(0)[0..16]
);
chain.match_depth(&r, 32768, &input);*/
);*/
chain.match_depth(&r, 32768, &input);
}

chain.update_hash::<true, UPDATE_MODE_ALL>(r.len(), &input);

input.advance(r.len());
}
}

if (block_no == 1 && i > 6900 && i < 7100) {
println!(
"offset: {} token: {}/{}, depth {} reference: {:?} chars {:?}",
pos, block_no, i, depth, t, chars
);
}

prev = t.clone();
}
}

Expand Down
113 changes: 108 additions & 5 deletions src/scan_deflate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@ use crate::{decompress_deflate_stream, DecompressResult};
use byteorder::{LittleEndian, ReadBytesExt};
use std::io::{Read, Seek, SeekFrom};

#[derive(Hash, Eq, PartialEq, Copy, Clone, Debug)]
use anyhow::Result;

#[derive(Hash, Eq, PartialEq, Clone, Debug)]
pub enum Signature {
Zlib(u8),
ZipLocalFileHeader,
Gzip,
/// PNG IDAT, which is a concatenated Zlib stream of IDAT chunks, each of the size given in the Vec.
IDAT,
}

fn next_signature(src: &[u8], index: &mut usize) -> Option<Signature> {
Expand All @@ -27,6 +31,7 @@ fn next_signature(src: &[u8], index: &mut usize) -> Option<Signature> {
0xDA78 => Signature::Zlib(8),
0x4B50 => Signature::ZipLocalFileHeader,
0x8B1F => Signature::Gzip,
0x4449 => Signature::IDAT,
_ => continue,
};

Expand Down Expand Up @@ -58,20 +63,118 @@ pub fn search_for_deflate_streams(src: &[u8], locations_found: &mut Vec<DeflateS
start,
data: res,
});
} else {
index += 2;
continue;
}
}

Signature::ZipLocalFileHeader => {
if find_zip_stream(src, &mut index, locations_found).is_err() {
index += 2;
if find_zip_stream(src, &mut index, locations_found).is_ok() {
continue;
}
}

Signature::IDAT => {
if index >= 4 {
if let Ok(r) = parse_idat(&src[index - 4..], 0) {
if let Ok(res) = decompress_deflate_stream(&r.payload[2..], true) {
println!("success! {:?}", r.idat_boundaries);
println!(
"results {:?}, {}, {:?}",
res.compressed_size,
res.prediction_corrections.len(),
res.parameters
);

println!(
"recompressed: {}",
zstd::bulk::compress(&res.plain_text, 9).unwrap().len()
);
}
}
}
}
}

// wasn't able to match any of the known signatures, so skip the current byte
index += 1;
}
}

struct IdatContents {
payload: Vec<u8>,
idat_boundaries: Vec<u32>,
}

fn parse_idat(compressed_data: &[u8], deflate_info_dump_level: u32) -> Result<IdatContents> {
if compressed_data.len() < 12 || &compressed_data[4..8] != b"IDAT" {
return Err(anyhow::Error::msg("No IDAT chunk found"));
}

let mut payload = Vec::new();

// PNG file
let mut idat_boundaries = Vec::new();
let mut pos = 0;

while pos < compressed_data.len() {
// png chunks start with the length of the chunk
let chunk_len = u32::from_be_bytes([
compressed_data[pos],
compressed_data[pos + 1],
compressed_data[pos + 2],
compressed_data[pos + 3],
]) as usize;

// now look at the chunk type. We only want IDAT chunks
// and they have to be consecutive, so stop once we see
// something weird
let chunk_type = &compressed_data[pos + 4..pos + 8];
if chunk_type != b"IDAT" || pos + chunk_len + 12 > compressed_data.len() {
break;
}

let chunk = &compressed_data[pos + 8..pos + chunk_len + 8];
payload.extend_from_slice(chunk);

let mut crc = crc32fast::Hasher::new();
crc.update(&chunk_type);
crc.update(chunk);

if crc.finalize()
!= u32::from_be_bytes([
compressed_data[pos + chunk_len + 8],
compressed_data[pos + chunk_len + 9],
compressed_data[pos + chunk_len + 10],
compressed_data[pos + chunk_len + 11],
])
{
return Err(anyhow::Error::msg("CRC mismatch"));
}

idat_boundaries.push(pos as u32);
pos += chunk_len + 12;
}

if deflate_info_dump_level > 0 {
println!("IDAT boundaries: {:?}", idat_boundaries);
}

Ok(IdatContents {
payload,
idat_boundaries,
})
}

#[test]
fn parse_png() {
let f = crate::process::read_file("treegdi.png");

let mut locations_found = Vec::new();
search_for_deflate_streams(&f, &mut locations_found);

println!("locations found: {:?}", locations_found);
}

const ZIP_LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50;

#[derive(Default)]
Expand Down

0 comments on commit 8c5aed6

Please sign in to comment.