diff --git a/package.json b/package.json index 99bc2cf2..a9dd3f09 100644 --- a/package.json +++ b/package.json @@ -40,13 +40,16 @@ "biojs" ], "dependencies": { + "@foxglove/wasm-bz2": "^0.1.1", "bz2": "^1.0.1", "bzip2": "^0.1.1", + "bzip2-wasm": "^1.0.1", "crc": "^4.3.2", "generic-filehandle2": "^1.0.0", "md5": "^2.2.1", "pako": "^1.0.4", "quick-lru": "^4.0.1", + "seek-bzip": "^2.0.0", "xz-decompress": "^0.2.1" }, "devDependencies": { diff --git a/src/cramFile/declare.d.ts b/src/cramFile/declare.d.ts index a42173f8..fd556c2f 100644 --- a/src/cramFile/declare.d.ts +++ b/src/cramFile/declare.d.ts @@ -1 +1 @@ -declare module 'bz2' +declare module 'bzip2' diff --git a/src/cramFile/file.ts b/src/cramFile/file.ts index 9347badb..b858a7e4 100644 --- a/src/cramFile/file.ts +++ b/src/cramFile/file.ts @@ -1,4 +1,9 @@ -import { decompress } from 'bz2' +// import bzip2 from 'bzip2' +// import BZip2 from 'bzip2-wasm' +// import { decompress } from 'bz2' + +import Bunzip from 'seek-bzip' + import crc32 from 'crc/calculators/crc32' import QuickLRU from 'quick-lru' import { XzReadableStream } from 'xz-decompress' @@ -280,10 +285,15 @@ export default class CramFile { inputBuffer: Uint8Array, uncompressedSize: number, ) { + // console.log({ compressionMethod }) if (compressionMethod === 'gzip') { - return unzip(inputBuffer) + const ret = unzip(inputBuffer) + if (ret[0] === 24) { + // console.log(ret.slice(0, 500).join(',')) + } + return ret } else if (compressionMethod === 'bzip2') { - return decompress(inputBuffer) + return Bunzip.decode(inputBuffer) } else if (compressionMethod === 'lzma') { const decompressedResponse = new Response( new XzReadableStream(bufferToStream(inputBuffer)), diff --git a/src/cramFile/slice/decodeRecord.ts b/src/cramFile/slice/decodeRecord.ts index b049afa0..546a5724 100644 --- a/src/cramFile/slice/decodeRecord.ts +++ b/src/cramFile/slice/decodeRecord.ts @@ -13,6 +13,7 @@ import { import CramSlice, { SliceHeader } from './index' import { CramFileBlock } from '../file' import { isMappedSliceHeader } from '../sectionParsers' +let kk = 0 /** * given a Buffer, read a string up to the first null character @@ -371,6 +372,7 @@ export default function decodeRecord( // mapping quality mappingQuality = decodeDataSeries('MQ')! + if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) { qualityScores = new Array(readLength) for (let i = 0; i < qualityScores.length; i++) { @@ -414,3 +416,12 @@ export default function decodeRecord( tags, } } +function quals(quals: number[]) { + if (!quals || quals.length === 0) { + return '*' + } + + return quals + .map(q => String.fromCharCode(Math.min(Math.max(q, 0), 93) + 33)) + .join('') +} diff --git a/src/htscodecs/arith_gen.ts b/src/htscodecs/arith_gen.ts index 1b7393bd..09bf2e0b 100644 --- a/src/htscodecs/arith_gen.ts +++ b/src/htscodecs/arith_gen.ts @@ -34,7 +34,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -import { decompress } from 'bz2' +import Bunzip from 'seek-bzip' + import RangeCoder from './arith_sh' import ByteModel from './byte_model' import IOStream from './iostream' @@ -153,19 +154,7 @@ export default class RangeCoderGen { // ---------------------------------------------------------------------- // External codec decodeExt(stream, n_out) { - return decompress(stream.buf.slice(stream.pos)) - // const bits = bzip2.array(stream.buf.slice(stream.pos)) - // let size = bzip2.header(bits) - // let chunk - // const chunks = [] - // do { - // chunk = bzip2.decompress(bits, size) - // if (chunk !== -1) { - // chunks.push(chunk) - // size -= chunk.length - // } - // } while (chunk !== -1) - // return concatUint8Array(chunks) + return Bunzip.decode(stream.buf.slice(stream.pos)) } // ---------------------------------------------------------------------- diff --git a/test/__snapshots__/archive.test.ts.snap b/test/__snapshots__/archive.test.ts.snap deleted file mode 100644 index ae0d86ed..00000000 --- a/test/__snapshots__/archive.test.ts.snap +++ /dev/null @@ -1,821 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`archive 1`] = ` -[ - 24, - 24, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 24, - 24, -] -`; - -exports[`archive 2`] = ` -[ - 24, - 24, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 24, - 24, -] -`; - -exports[`normal 1`] = ` -[ - 24, - 24, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 24, - 24, -] -`; - -exports[`normal 2`] = ` -[ - 24, - 24, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 40, - 24, - 24, -] -`; diff --git a/test/archive.test.ts b/test/archive.test.ts index ac7cf5c6..26d5122a 100644 --- a/test/archive.test.ts +++ b/test/archive.test.ts @@ -26,8 +26,11 @@ test('archive', async () => { }) // @ts-expect-error const feats = await cram.getRecordsForRange(nameToId.chr9, 0, 200000000) - expect(feats[0]!.qualityScores).toMatchSnapshot() - expect(feats.at(-1)!.qualityScores).toMatchSnapshot() + for (const f of feats) { + expect(quals(f.qualityScores!)).toBe( + '99IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII99', + ) + } expect(feats.length).toBe(10000) }) @@ -53,7 +56,20 @@ test('normal', async () => { }) // @ts-expect-error const feats = await cram.getRecordsForRange(nameToId.chr9, 0, 200000000) - expect(feats[0]!.qualityScores).toMatchSnapshot() - expect(feats.at(-1)!.qualityScores).toMatchSnapshot() + for (const f of feats) { + expect(quals(f.qualityScores!)).toBe( + '99IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII99', + ) + } expect(feats.length).toBe(10000) }) + +function quals(quals: number[]) { + if (!quals || quals.length === 0) { + return '*' + } + + return quals + .map(q => String.fromCharCode(Math.min(Math.max(q, 0), 93) + 33)) + .join('') +} diff --git a/yarn.lock b/yarn.lock index a475ed1f..ab276fee 100644 --- a/yarn.lock +++ b/yarn.lock @@ -348,6 +348,13 @@ "@eslint/core" "^0.10.0" levn "^0.4.1" +"@foxglove/wasm-bz2@^0.1.1": + version "0.1.1" + resolved "https://registry.yarnpkg.com/@foxglove/wasm-bz2/-/wasm-bz2-0.1.1.tgz#d3bf629b7caf747a0278ed2efb9c38644b6d3439" + integrity sha512-huMVZ//J9S1TAh689pj7U5tstbmtGhH1g9/HCj9jE3UPaLF83dTeJIOLE0+pe16ha1iH4QRvYgv35aykikHkvA== + dependencies: + tslib "^2" + "@gmod/bgzf-filehandle@^2.0.0": version "2.0.4" resolved "https://registry.yarnpkg.com/@gmod/bgzf-filehandle/-/bgzf-filehandle-2.0.4.tgz#64f88ff6ad48efaf69641e796db87f24bf5da9ab" @@ -1271,6 +1278,11 @@ bz2@^1.0.1: resolved "https://registry.yarnpkg.com/bz2/-/bz2-1.0.1.tgz#c54c9de71b5188742ca28247f88f58bc05017249" integrity sha512-TRUFsKDme+nCMKa1lMx3dkLGh4KvgmCq8/xh/MTMEXAtjPqSC/NqPTzpy5auBZxrJTJX/yjWM0pHxMbpVGc+UQ== +bzip2-wasm@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/bzip2-wasm/-/bzip2-wasm-1.0.1.tgz#f10376496a8e869f87066e7807a3e2f4d2263bc5" + integrity sha512-+5/Z07KHXa6UFi1Smwx374z6ImD2Qdvr3+mXBPSQ80xTdC/8mpE40DpNDrN3MsPodx9W56uTC/nRq2WnHJ890w== + bzip2@^0.1.1: version "0.1.1" resolved "https://registry.yarnpkg.com/bzip2/-/bzip2-0.1.1.tgz#b0d232bd0f0f750d2023306d40a886ee51b901f4" @@ -1453,6 +1465,11 @@ commander@^2.20.0: resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ== +commander@^6.0.0: + version "6.2.1" + resolved "https://registry.yarnpkg.com/commander/-/commander-6.2.1.tgz#0792eb682dfbc325999bb2b84fddddba110ac73c" + integrity sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA== + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" @@ -4325,6 +4342,13 @@ schema-utils@^4.3.0: ajv-formats "^2.1.1" ajv-keywords "^5.1.0" +seek-bzip@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/seek-bzip/-/seek-bzip-2.0.0.tgz#f0478ab6acd0ac72345d18dc7525dd84d3c706a2" + integrity sha512-SMguiTnYrhpLdk3PwfzHeotrcwi8bNV4iemL9tx9poR/yeaMYwB9VzR1w7b57DuWpuqR8n6oZboi0hj3AxZxQg== + dependencies: + commander "^6.0.0" + "semver@2 || 3 || 4 || 5": version "5.7.2" resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.2.tgz#48d55db737c3287cd4835e17fa13feace1c41ef8" @@ -4732,6 +4756,11 @@ tsconfig-paths@^3.15.0: minimist "^1.2.6" strip-bom "^3.0.0" +tslib@^2: + version "2.8.1" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.8.1.tgz#612efe4ed235d567e8aba5f2a5fab70280ade83f" + integrity sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w== + type-check@^0.4.0, type-check@~0.4.0: version "0.4.0" resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1"