Skip to content

Commit

Permalink
Vendor the seek-bzip library with modifications to avoid Buffer usage (
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin authored Feb 13, 2025
1 parent a593f82 commit fabb3a5
Show file tree
Hide file tree
Showing 11 changed files with 930 additions and 22 deletions.
1 change: 1 addition & 0 deletions eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export default tseslint.config(
'*.mjs',
'example/*',
'src/htscodecs',
'src/seek-bzip',
'coverage',
],
},
Expand Down
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
"md5": "^2.2.1",
"pako": "^1.0.4",
"quick-lru": "^4.0.1",
"seek-bzip": "^2.0.0",
"xz-decompress": "^0.2.1"
},
"devDependencies": {
Expand Down
9 changes: 2 additions & 7 deletions src/cramFile/file.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
// import bzip2 from 'bzip2'
// import BZip2 from 'bzip2-wasm'
// import { decompress } from 'bz2'


import crc32 from 'crc/calculators/crc32'
import QuickLRU from 'quick-lru'
import Bunzip from 'seek-bzip'
import { XzReadableStream } from 'xz-decompress'

import { CramMalformedError, CramUnimplementedError } from '../errors'
import * as htscodecs from '../htscodecs'
import { open } from '../io'
import ransuncompress from '../rans'
import { parseHeaderText } from '../sam'
import { decode } from '../seek-bzip'
import { unzip } from '../unzip'
import CramContainer from './container'
import CramRecord from './record'
Expand Down Expand Up @@ -293,7 +288,7 @@ export default class CramFile {
}
return ret
} else if (compressionMethod === 'bzip2') {
return Bunzip.decode(inputBuffer)
return decode(inputBuffer)
} else if (compressionMethod === 'lzma') {
const decompressedResponse = new Response(
new XzReadableStream(bufferToStream(inputBuffer)),
Expand Down
4 changes: 2 additions & 2 deletions src/htscodecs/arith_gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

import Bunzip from 'seek-bzip'
import { decode } from '../seek-bzip'

import RangeCoder from './arith_sh'
import ByteModel from './byte_model'
Expand Down Expand Up @@ -154,7 +154,7 @@ export default class RangeCoderGen {
// ----------------------------------------------------------------------
// External codec
decodeExt(stream, n_out) {
return Bunzip.decode(stream.buf.slice(stream.pos))
return decode(stream.buf.slice(stream.pos))
}

// ----------------------------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions src/seek-bzip/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## seek-bzip

Vendored copy of seek-bzip, with code modifications to remove Buffer usage by
Colin Diesh (c) 2025

Original LICENSE MIT https://github.com/cscott/seek-bzip/blob/master/LICENSE
113 changes: 113 additions & 0 deletions src/seek-bzip/bitreader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
node-bzip - a pure-javascript Node.JS module for decoding bzip2 data
Copyright (C) 2012 Eli Skeggs
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Adapted from bzip2.js, copyright 2011 antimatter15 (antimatter15@gmail.com).
Based on micro-bunzip by Rob Landley (rob@landley.net).
Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
which also acknowledges contributions by Mike Burrows, David Wheeler,
Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
Robert Sedgewick, and Jon L. Bentley.
*/

import { toHex } from './toHex'

const BITMASK = [0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff]

interface Stream {
readByte(): number
seek(pos: number): void
}

export default class BitReader {
private stream: Stream
private bitOffset: number
private curByte: number
private hasByte: boolean

constructor(stream: Stream) {
this.stream = stream
this.bitOffset = 0
this.curByte = 0
this.hasByte = false
}

private _ensureByte(): void {
if (!this.hasByte) {
this.curByte = this.stream.readByte()
this.hasByte = true
}
}

/**
* Reads bits from the buffer
* @param bits Number of bits to read
*/
read(bits: number): number {
let result = 0
while (bits > 0) {
this._ensureByte()
const remaining = 8 - this.bitOffset
// if we're in a byte
if (bits >= remaining) {
result <<= remaining
result |= BITMASK[remaining]! & this.curByte
this.hasByte = false
this.bitOffset = 0
bits -= remaining
} else {
result <<= bits
const shift = remaining - bits
result |= (this.curByte & (BITMASK[bits]! << shift)) >> shift
this.bitOffset += bits
bits = 0
}
}
return result
}

/**
* Seek to an arbitrary point in the buffer (expressed in bits)
* @param pos Position in bits
*/
seek(pos: number): void {
const n_bit = pos % 8
const n_byte = (pos - n_bit) / 8
this.bitOffset = n_bit
this.stream.seek(n_byte)
this.hasByte = false
}

/**
* Reads 6 bytes worth of data using the read method
*/
pi(): string {
const buf = new Uint8Array(6)
for (let i = 0; i < buf.length; i++) {
buf[i] = this.read(8)
}
return toHex(buf)
}
}
116 changes: 116 additions & 0 deletions src/seek-bzip/crc32.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// @ts-nocheck

/* CRC32, used in Bzip2 implementation.
* This is a port of CRC32.java from the jbzip2 implementation at
* https://code.google.com/p/jbzip2
* which is:
* Copyright (c) 2011 Matthew Francis
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
* This JavaScript implementation is:
* Copyright (c) 2013 C. Scott Ananian
* with the same licensing terms as Matthew Francis' original implementation.
*/
module.exports = (function () {
/**
* A static CRC lookup table
*/
const crc32Lookup = new Uint32Array([
0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b,
0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7,
0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3,
0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef,
0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb,
0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0,
0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4,
0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08,
0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc,
0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050,
0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34,
0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1,
0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5,
0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9,
0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd,
0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2,
0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e,
0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a,
0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676,
0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662,
0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4,
])

const CRC32 = function () {
/**
* The current CRC
*/
let crc = 0xffffffff

/**
* @return The current CRC
*/
this.getCRC = function () {
return ~crc >>> 0 // return an unsigned value
}

/**
* Update the CRC with a single byte
* @param value The value to update the CRC with
*/
this.updateCRC = function (value) {
crc = (crc << 8) ^ crc32Lookup[((crc >>> 24) ^ value) & 0xff]
}

/**
* Update the CRC with a sequence of identical bytes
* @param value The value to update the CRC with
* @param count The number of bytes
*/
this.updateCRCRun = function (value, count) {
while (count-- > 0) {
crc = (crc << 8) ^ crc32Lookup[((crc >>> 24) ^ value) & 0xff]
}
}
}
return CRC32
})()
Loading

0 comments on commit fabb3a5

Please sign in to comment.