-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
174 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
(ns cljam.io.cram | ||
{:clj-kondo/ignore [:missing-docstring]} | ||
(:require [cljam.io.util.lsb.data-io :as lsb] | ||
[cljam.io.util.lsb.io-stream :as lsb.stream]) | ||
(:import [java.io ByteArrayInputStream DataInputStream IOException] | ||
[java.util Arrays] | ||
[org.apache.commons.compress.compressors CompressorStreamFactory])) | ||
|
||
(def ^:const cram-magic "CRAM") | ||
|
||
(defn decode-itf8 ^long [in] | ||
(let [b (long (lsb/read-ubyte in))] | ||
(cond (zero? (bit-and b 0x80)) | ||
b | ||
|
||
(zero? (bit-and b 0x40)) | ||
(bit-or (bit-shift-left (bit-and b 0x7f) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
(zero? (bit-and b 0x20)) | ||
(bit-or (bit-shift-left (bit-and b 0x3f) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
(zero? (bit-and b 0x10)) | ||
(bit-or (bit-shift-left (bit-and b 0x1f) 24) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
:else | ||
(bit-or (bit-shift-left (bit-and b 0x0f) 28) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 20) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 12) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 4) | ||
(bit-and (long (lsb/read-ubyte in)) 0x0f))))) | ||
|
||
(defn decode-ltf8 ^long [in] | ||
(let [b (long (lsb/read-ubyte in))] | ||
(cond (zero? (bit-and b 0x80)) | ||
b | ||
|
||
(zero? (bit-and b 0x40)) | ||
(bit-or (bit-shift-left (bit-and b 0x7f) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
(zero? (bit-and b 0x20)) | ||
(bit-or (bit-shift-left (bit-and b 0x3f) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
(zero? (bit-and b 0x10)) | ||
(bit-or (bit-shift-left (bit-and b 0x1f) 24) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
(zero? (bit-and b 0x08)) | ||
(bit-or (bit-shift-left (bit-and b 0x07) 32) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 24) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
(zero? (bit-and b 0x04)) | ||
(bit-or (bit-shift-left (bit-and b 0x03) 40) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 32) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 24) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
(zero? (bit-and b 0x02)) | ||
(bit-or (bit-shift-left (bit-and b 0x01) 48) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 40) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 32) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 24) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
(zero? (bit-and b 0x01)) | ||
(bit-or (bit-shift-left (long (lsb/read-ubyte in)) 48) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 40) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 32) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 24) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in))) | ||
|
||
:else | ||
(bit-or (bit-shift-left (long (lsb/read-ubyte in)) 56) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 48) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 40) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 32) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 24) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 16) | ||
(bit-shift-left (long (lsb/read-ubyte in)) 8) | ||
(long (lsb/read-ubyte in)))))) | ||
|
||
(defn decode-array [in elem-fn] | ||
(loop [i (decode-itf8 in) | ||
acc (transient [])] | ||
(if (pos? i) | ||
(recur (dec i) (conj! acc (elem-fn in))) | ||
(persistent! acc)))) | ||
|
||
(defn decode-encoding [in] | ||
(let [codec-id (decode-itf8 in) | ||
_n (decode-itf8 in)] | ||
(case codec-id | ||
0 {:codec :null} | ||
1 (let [content-id (decode-itf8 in)] | ||
{:codec :external | ||
:content-id content-id}) | ||
(throw (ex-info (str "codec " codec-id " not supported") {}))))) | ||
|
||
(defn decode-file-definition [in] | ||
(when-not (Arrays/equals ^bytes (lsb/read-bytes in 4) (.getBytes cram-magic)) | ||
(throw (IOException. "Invalid CRAM file"))) | ||
(let [major (lsb/read-ubyte in) | ||
minor (lsb/read-ubyte in) | ||
file-id (lsb/read-bytes in 20)] | ||
{:version {:major major :minor minor}, :id file-id})) | ||
|
||
(defn decode-container-header [in] | ||
(let [len (lsb/read-int in) | ||
ref-seq-id (decode-itf8 in) | ||
start-pos (decode-itf8 in) | ||
span (decode-itf8 in) | ||
n-records (decode-itf8 in) | ||
counter (decode-ltf8 in) | ||
n-bases (decode-ltf8 in) | ||
n-blocks (decode-itf8 in) | ||
landmarks (decode-array in decode-itf8) | ||
_crc (lsb/read-int in)] | ||
{:length len | ||
:ref ref-seq-id | ||
:start start-pos | ||
:span span | ||
:records n-records | ||
:counter counter | ||
:bases n-bases | ||
:blocks n-blocks | ||
:landmarks landmarks})) | ||
|
||
(def decode-block-data | ||
(let [factory (CompressorStreamFactory.)] | ||
(fn [in method size raw-size] | ||
(let [raw-data (lsb/read-bytes in size)] | ||
(if (zero? method) | ||
raw-data | ||
(let [bais (ByteArrayInputStream. raw-data)] | ||
;; TODO: create appropriate compressor input stream for method | ||
(with-open [is (DataInputStream. (.createCompressorInputStream factory bais))] | ||
(lsb.stream/read-bytes is raw-size)))))))) | ||
|
||
(defn decode-block [in] | ||
(let [method (lsb/read-byte in) | ||
content-type-id (lsb/read-byte in) | ||
content-id (decode-itf8 in) | ||
size (decode-itf8 in) | ||
raw-size (decode-itf8 in) | ||
data (decode-block-data in method size raw-size) | ||
_crc (lsb/read-bytes in 4)] | ||
{:method method | ||
:content-type content-type-id | ||
:content-id content-id | ||
:size size | ||
:raw-size raw-size | ||
:data data})) | ||
|
||
(defn decode-cram-header [in] | ||
Check warning on line 173 in src/cljam/io/cram.clj
|
||
) |