Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
athos committed Jan 30, 2024
1 parent 55fd251 commit 8617d43
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 50 deletions.
30 changes: 7 additions & 23 deletions src/cljam/io/cram.clj
Original file line number Diff line number Diff line change
@@ -1,32 +1,16 @@
(ns cljam.io.cram
{:clj-kondo/ignore [:missing-docstring]}
(:require [cljam.io.cram.reader :as reader]
(:require [cljam.io.cram.core :as cram]
[cljam.io.protocols :as protocols]
[cljam.io.sequence :as cseq]
[cljam.io.util.byte-buffer :as bb]
[cljam.util :as util]
[clojure.java.io :as cio])
(:import [cljam.io.cram.reader CRAMReader]
[java.nio.channels FileChannel]
[java.nio.file OpenOption StandardOpenOption]))
[cljam.io.util :as io-util])
(:import [cljam.io.cram.reader CRAMReader]))

(defn reader
(^CRAMReader [f] (reader f {}))
(^CRAMReader [f {:keys [reference]}]
(let [file (cio/file f)
url (util/as-url (.getAbsolutePath file))
ch (FileChannel/open (.toPath file)
(into-array OpenOption [StandardOpenOption/READ]))
bb (bb/allocate-lsb-byte-buffer 256)
seq-rdr (some-> reference cseq/reader)
header (volatile! nil)
refs (delay
(mapv (fn [{:keys [SN LN]}] {:name SN :len LN})
(:SQ @header)))
rdr (reader/->CRAMReader url ch bb header refs seq-rdr)]
(reader/read-file-definition rdr)
(vreset! header (reader/read-header rdr))
rdr)))
(^CRAMReader [f option]
(if (io-util/cram-reader? f)
(cram/clone-reader f)
(cram/reader f option))))

(defn read-header [rdr]
(protocols/read-header rdr))
Expand Down
41 changes: 41 additions & 0 deletions src/cljam/io/cram/core.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
(ns cljam.io.cram.core
{:clj-kondo/ignore [:missing-docstring]}
(:require [cljam.io.cram.seq-resolver :as resolver]
[cljam.io.cram.reader :as reader.core]
[cljam.io.util.byte-buffer :as bb]
[cljam.util :as util]
[clojure.java.io :as cio])
(:import [cljam.io.cram.reader CRAMReader]
[java.nio.channels FileChannel]
[java.nio.file OpenOption StandardOpenOption]))

(defn reader ^CRAMReader [f {:keys [reference]}]
(let [file (cio/file f)
url (util/as-url (.getAbsolutePath file))
ch (FileChannel/open (.toPath file)
(into-array OpenOption [StandardOpenOption/READ]))
bb (bb/allocate-lsb-byte-buffer 256)
seq-resolver (some-> reference resolver/seq-resolver)
header (volatile! nil)
refs (delay
(mapv (fn [{:keys [SN LN]}] {:name SN :len LN})
(:SQ @header)))
rdr (reader.core/->CRAMReader url ch bb header refs seq-resolver)]
(reader.core/read-file-definition rdr)
(vreset! header (reader.core/read-header rdr))
rdr))

(defn clone-reader ^CRAMReader [^CRAMReader rdr]
(let [url (.-url rdr)
file (cio/as-file url)
ch (FileChannel/open (.toPath file)
(into-array OpenOption [StandardOpenOption/READ]))
bb (bb/allocate-lsb-byte-buffer 256)
seq-resolver (some-> (.-seq-resolver rdr) resolver/clone-seq-resolver)
rdr' (reader.core/->CRAMReader url ch bb
(delay @(.-header rdr))
(delay @(.-refs rdr))
seq-resolver)]
(reader.core/read-file-definition rdr')
(reader.core/skip-container rdr')
rdr'))
26 changes: 13 additions & 13 deletions src/cljam/io/cram/decode/record.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
(ns cljam.io.cram.decode.record
{:clj-kondo/ignore [:missing-docstring]}
(:require [cljam.io.sam.util.cigar :as sam.cigar]
[cljam.io.sequence :as cseq]
[cljam.io.util.byte-buffer :as bb]))

(defn- build-positional-data-decoder
Expand Down Expand Up @@ -62,9 +61,9 @@
(assoc record :options))))))

(defn- record-seq
[seq-reader {:keys [preservation-map]} {:keys [rname pos end ::len]} features]
[seq-resolver {:keys [preservation-map]} {:keys [rname pos end ::len]} features]
(let [region {:chr rname :start pos :end end}
ref-bases (.getBytes ^String (cseq/read-sequence seq-reader region))
ref-bases (.getBytes ^String (seq-resolver region))
bs (byte-array len (byte (int \N)))
subst (:SM preservation-map)]
(loop [[f & more :as fs] features, rpos 0, spos 1]
Expand Down Expand Up @@ -173,7 +172,7 @@
(+ (long pos))))

(defn- build-mapped-read-decoder
[seq-reader
[seq-resolver
compression-header
{:keys [FN MQ QS FC FP BA BS IN SC HC PD DL RS BB QQ] :as decoders}]
(fn [{::keys [len] :as record}]
Expand All @@ -185,7 +184,7 @@
end (record-end record features)
record' (assoc record :end end)]
(assoc record'
:seq (record-seq seq-reader compression-header record' features)
:seq (record-seq seq-resolver compression-header record' features)
:qual (record-qual record' features decoders)
:mapq (MQ)
:cigar (->> (or (features->cigar len features)
Expand Down Expand Up @@ -227,16 +226,17 @@
(assoc record' :qual (String. (.array bb))))))))

(defn build-cram-record-decoder
[seq-reader cram-header compression-header slice-header {:keys [BF CF] :as decoders} tag-decoders]
(let [pd-decoder (build-positional-data-decoder cram-header
[seq-resolver cram-header compression-header slice-header ds-decoders tag-decoders]
(let [{:keys [BF CF]} ds-decoders
pd-decoder (build-positional-data-decoder cram-header
compression-header
slice-header
decoders)
rn-decoder (build-read-name-decoder compression-header decoders)
mt-decoder (build-mate-read-decoder cram-header decoders)
at-decoder (build-auxiliary-tags-decoder compression-header decoders tag-decoders)
mr-decoder (build-mapped-read-decoder seq-reader compression-header decoders)
ur-decoder (build-unmapped-read-decoder decoders)]
ds-decoders)
rn-decoder (build-read-name-decoder compression-header ds-decoders)
mt-decoder (build-mate-read-decoder cram-header ds-decoders)
at-decoder (build-auxiliary-tags-decoder compression-header ds-decoders tag-decoders)
mr-decoder (build-mapped-read-decoder seq-resolver compression-header ds-decoders)
ur-decoder (build-unmapped-read-decoder ds-decoders)]
(fn []
(let [bf (BF)
cf (CF)
Expand Down
37 changes: 23 additions & 14 deletions src/cljam/io/cram/reader.clj
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
[java.nio Buffer ByteBuffer ByteOrder]
[java.nio.channels FileChannel FileChannel$MapMode]))

(declare read-header read-alignments)
(declare read-alignments)

(deftype CRAMReader [url channel buffer header refs seq-reader]
(deftype CRAMReader [url channel buffer header refs seq-resolver]
Closeable
(close [_]
(when seq-reader
(.close ^Closeable seq-reader))
(when seq-resolver
(.close ^Closeable seq-resolver))
(.close ^FileChannel channel))
protocols/IReader
(reader-url [_] url)
Expand Down Expand Up @@ -95,10 +95,9 @@
(let [slice-header (struct/decode-slice-header-block bb)
blocks (into [] (map (fn [_] (struct/decode-block bb)))
(range (:blocks slice-header)))

ds-decoders (ds/build-data-series-decoders compression-header blocks)
tag-decoders (ds/build-tag-decoders compression-header blocks)
record-decoder (record/build-cram-record-decoder (.-seq-reader rdr)
record-decoder (record/build-cram-record-decoder (.-seq-resolver rdr)
@(.-header rdr)
compression-header
slice-header
Expand All @@ -121,23 +120,33 @@
(.position ^Buffer bb (+ container-header-end landmark))
(read-slice-records rdr bb compression-header))))))

(defn- read-container-with [^CRAMReader rdr f]
(defn- with-next-container-header [^CRAMReader rdr f]
(let [^FileChannel ch (.-channel rdr)
pos (.position ch)
_ (read-to-buffer rdr)
^ByteBuffer bb (.-buffer rdr)
container-header (struct/decode-container-header bb)
header-size (.position bb)
container-start (+ pos (.position bb))
container-size (long (:length container-header))
bb' (-> ch
(.map FileChannel$MapMode/READ_ONLY (+ pos header-size) container-size)
(.order ByteOrder/LITTLE_ENDIAN))
ret (f container-header bb')]
(.position ch (+ pos header-size container-size))
ret (f container-header container-start)]
(.position ch (+ container-start container-size))
ret))

(defn- read-container-with [^CRAMReader rdr f]
(letfn [(f' [container-header container-start]
(let [container-size (long (:length container-header))
^FileChannel ch (.-channel rdr)
bb (-> ch
(.map FileChannel$MapMode/READ_ONLY container-start container-size)
(.order ByteOrder/LITTLE_ENDIAN))]
(f container-header bb)))]
(with-next-container-header rdr f')))

(defn skip-container [rdr]
(with-next-container-header rdr (constantly nil)))

(defn read-header [^CRAMReader rdr]
(read-container-with rdr #(struct/decode-cram-header-block %2)))
(read-container-with rdr (fn [_ bb] (struct/decode-cram-header-block bb))))

(defn read-alignments [^CRAMReader rdr]
(let [^FileChannel ch (.-channel rdr)]
Expand Down
23 changes: 23 additions & 0 deletions src/cljam/io/cram/seq_resolver.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
(ns cljam.io.cram.seq-resolver
{:clj-kondo/ignore [:missing-docstring]}
(:require [cljam.io.sequence :as cseq])
(:import [java.io Closeable]))

(declare seq-resolver)

(deftype SeqResolver [seq-reader]
Object
(clone [_]
(seq-resolver seq-reader))
java.io.Closeable
(close [_]
(.close ^Closeable seq-reader))
clojure.lang.IFn
(invoke [_ region]
(cseq/read-sequence seq-reader region)))

(defn seq-resolver [seq-file]
(->SeqResolver (cseq/reader seq-file)))

(defn clone-seq-resolver [^SeqResolver resolver]
(.clone resolver))
8 changes: 8 additions & 0 deletions src/cljam/io/util.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
cljam.io.sam.writer
cljam.io.bam.reader
cljam.io.bam.writer
cljam.io.cram.reader
cljam.io.vcf.reader
cljam.io.vcf.writer
cljam.io.bcf.reader
Expand Down Expand Up @@ -50,6 +51,11 @@
[wtr]
(instance? cljam.io.bam.writer.BAMWriter wtr))

(defn cram-reader?
"Checks if given object is an instance of CRAMReader."
[rdr]
(instance? cljam.io.cram.reader.CRAMReader rdr))

(defn variant-reader?
"Checks if given object implements protocol IVariantReader."
[rdr]
Expand Down Expand Up @@ -153,6 +159,7 @@
#"(?i)\.sam$" :sam
#"(?i)\.bai$" :bai
#"(?i)\.bam$" :bam
#"(?i)\.cram$" :cram
#"(?i)\.f(ast)?q" :fastq
#"(?i)\.fai$" :fai
#"(?i)\.(fa|fasta|fas|fsa|seq|fna|faa|ffn|frn|mpfa)" :fasta
Expand Down Expand Up @@ -183,6 +190,7 @@
(condp re-find s
#"^BAM\01" :bam
#"^BAI\01" :bai
#"^CRAM" :cram
#"^BCF\02" :bcf
#"^TBI\01" :tbi
#"^##fileformat=VCF" :vcf
Expand Down

0 comments on commit 8617d43

Please sign in to comment.