Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
athos committed Dec 12, 2023
1 parent a514b2e commit 238113d
Showing 1 changed file with 174 additions and 0 deletions.
174 changes: 174 additions & 0 deletions src/cljam/io/cram.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
(ns cljam.io.cram
{:clj-kondo/ignore [:missing-docstring]}
(:require [cljam.io.util.lsb.data-io :as lsb]
[cljam.io.util.lsb.io-stream :as lsb.stream])
(:import [java.io ByteArrayInputStream DataInputStream IOException]
[java.util Arrays]
[org.apache.commons.compress.compressors CompressorStreamFactory]))

(def ^:const cram-magic "CRAM")

(defn decode-itf8 ^long [in]
(let [b (long (lsb/read-ubyte in))]
(cond (zero? (bit-and b 0x80))
b

(zero? (bit-and b 0x40))
(bit-or (bit-shift-left (bit-and b 0x7f) 8)
(long (lsb/read-ubyte in)))

(zero? (bit-and b 0x20))
(bit-or (bit-shift-left (bit-and b 0x3f) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in)))

(zero? (bit-and b 0x10))
(bit-or (bit-shift-left (bit-and b 0x1f) 24)
(bit-shift-left (long (lsb/read-ubyte in)) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in)))

:else
(bit-or (bit-shift-left (bit-and b 0x0f) 28)
(bit-shift-left (long (lsb/read-ubyte in)) 20)
(bit-shift-left (long (lsb/read-ubyte in)) 12)
(bit-shift-left (long (lsb/read-ubyte in)) 4)
(bit-and (long (lsb/read-ubyte in)) 0x0f)))))

(defn decode-ltf8 ^long [in]
(let [b (long (lsb/read-ubyte in))]
(cond (zero? (bit-and b 0x80))
b

(zero? (bit-and b 0x40))
(bit-or (bit-shift-left (bit-and b 0x7f) 8)
(long (lsb/read-ubyte in)))

(zero? (bit-and b 0x20))
(bit-or (bit-shift-left (bit-and b 0x3f) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in)))

(zero? (bit-and b 0x10))
(bit-or (bit-shift-left (bit-and b 0x1f) 24)
(bit-shift-left (long (lsb/read-ubyte in)) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in)))

(zero? (bit-and b 0x08))
(bit-or (bit-shift-left (bit-and b 0x07) 32)
(bit-shift-left (long (lsb/read-ubyte in)) 24)
(bit-shift-left (long (lsb/read-ubyte in)) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in)))

(zero? (bit-and b 0x04))
(bit-or (bit-shift-left (bit-and b 0x03) 40)
(bit-shift-left (long (lsb/read-ubyte in)) 32)
(bit-shift-left (long (lsb/read-ubyte in)) 24)
(bit-shift-left (long (lsb/read-ubyte in)) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in)))

(zero? (bit-and b 0x02))
(bit-or (bit-shift-left (bit-and b 0x01) 48)
(bit-shift-left (long (lsb/read-ubyte in)) 40)
(bit-shift-left (long (lsb/read-ubyte in)) 32)
(bit-shift-left (long (lsb/read-ubyte in)) 24)
(bit-shift-left (long (lsb/read-ubyte in)) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in)))

(zero? (bit-and b 0x01))
(bit-or (bit-shift-left (long (lsb/read-ubyte in)) 48)
(bit-shift-left (long (lsb/read-ubyte in)) 40)
(bit-shift-left (long (lsb/read-ubyte in)) 32)
(bit-shift-left (long (lsb/read-ubyte in)) 24)
(bit-shift-left (long (lsb/read-ubyte in)) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in)))

:else
(bit-or (bit-shift-left (long (lsb/read-ubyte in)) 56)
(bit-shift-left (long (lsb/read-ubyte in)) 48)
(bit-shift-left (long (lsb/read-ubyte in)) 40)
(bit-shift-left (long (lsb/read-ubyte in)) 32)
(bit-shift-left (long (lsb/read-ubyte in)) 24)
(bit-shift-left (long (lsb/read-ubyte in)) 16)
(bit-shift-left (long (lsb/read-ubyte in)) 8)
(long (lsb/read-ubyte in))))))

(defn decode-array [in elem-fn]
(loop [i (decode-itf8 in)
acc (transient [])]
(if (pos? i)
(recur (dec i) (conj! acc (elem-fn in)))
(persistent! acc))))

(defn decode-encoding [in]
(let [codec-id (decode-itf8 in)
_n (decode-itf8 in)]
(case codec-id
0 {:codec :null}
1 (let [content-id (decode-itf8 in)]
{:codec :external
:content-id content-id})
(throw (ex-info (str "codec " codec-id " not supported") {})))))

(defn decode-file-definition [in]
(when-not (Arrays/equals ^bytes (lsb/read-bytes in 4) (.getBytes cram-magic))
(throw (IOException. "Invalid CRAM file")))
(let [major (lsb/read-ubyte in)
minor (lsb/read-ubyte in)
file-id (lsb/read-bytes in 20)]
{:version {:major major :minor minor}, :id file-id}))

(defn decode-container-header [in]
(let [len (lsb/read-int in)
ref-seq-id (decode-itf8 in)
start-pos (decode-itf8 in)
span (decode-itf8 in)
n-records (decode-itf8 in)
counter (decode-ltf8 in)
n-bases (decode-ltf8 in)
n-blocks (decode-itf8 in)
landmarks (decode-array in decode-itf8)
_crc (lsb/read-int in)]
{:length len
:ref ref-seq-id
:start start-pos
:span span
:records n-records
:counter counter
:bases n-bases
:blocks n-blocks
:landmarks landmarks}))

(def decode-block-data
(let [factory (CompressorStreamFactory.)]
(fn [in method size raw-size]
(let [raw-data (lsb/read-bytes in size)]
(if (zero? method)
raw-data
(let [bais (ByteArrayInputStream. raw-data)]
;; TODO: create appropriate compressor input stream for method
(with-open [is (DataInputStream. (.createCompressorInputStream factory bais))]
(lsb.stream/read-bytes is raw-size))))))))

(defn decode-block [in]
(let [method (lsb/read-byte in)
content-type-id (lsb/read-byte in)
content-id (decode-itf8 in)
size (decode-itf8 in)
raw-size (decode-itf8 in)
data (decode-block-data in method size raw-size)
_crc (lsb/read-bytes in 4)]
{:method method
:content-type content-type-id
:content-id content-id
:size size
:raw-size raw-size
:data data}))

(defn decode-cram-header [in]

Check warning on line 173 in src/cljam/io/cram.clj

View workflow job for this annotation

GitHub Actions / lint

[clj-kondo] unused binding in

Check warning on line 173 in src/cljam/io/cram.clj

View workflow job for this annotation

GitHub Actions / lint

[eastwood][unused-fn-args] Function arg in never used.
)

0 comments on commit 238113d

Please sign in to comment.