Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
athos committed Jan 23, 2024
1 parent 02f8646 commit dc11884
Showing 1 changed file with 75 additions and 33 deletions.
108 changes: 75 additions & 33 deletions src/cljam/io/cram.clj
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,9 @@
:rnext (get-in cram-header [:SQ (NS) :SN])
:pnext (NP)
:tlen (TS)))
(assoc record ::nf (NF)))))
(cond-> record
(pos? (bit-and (long flag) 0x04))
(assoc ::next-fragment (NF))))))

(defn build-auxiliary-tags-decoder [{:keys [preservation-map]} {:keys [TL]} tag-decoders]
(let [tag-dict (:TD preservation-map)]
Expand All @@ -388,29 +390,9 @@
[])
(assoc record :options))))))

(defn record-end [{:keys [pos ::len]} features]
(->> features
^long (reduce
(fn [^long ret f]
(case (:code f)
(:insertion :softclip)
(- ret (alength ^bytes (:bases f)))

(:deletion :ref-skip)
(+ ret (long (:len f)))

:insert-base
(dec ret)

ret))
len)
dec
(+ (long pos))))

(defn record-seq
[seq-reader {:keys [preservation-map]} {:keys [rname pos ::len] :as record} features]
(let [end (record-end record features)
region {:chr rname :start pos :end end}
[seq-reader {:keys [preservation-map]} {:keys [rname pos ::end ::len]} features]
(let [region {:chr rname :start pos :end end}
ref-bases (.getBytes ^String (cseq/read-sequence seq-reader region))
bs (byte-array len (byte (int \N)))
subst (:SM preservation-map)]
Expand Down Expand Up @@ -500,6 +482,25 @@
(conj! [(- read-len pos) \M]))]
(some->> (not-empty (persistent! acc')) sam.cigar/simplify)))))

(defn record-end [{:keys [pos ::len]} features]
(->> features
^long (reduce
(fn [^long ret f]
(case (:code f)
(:insertion :softclip)
(- ret (alength ^bytes (:bases f)))

(:deletion :ref-skip)
(+ ret (long (:len f)))

:insert-base
(dec ret)

ret))
len)
dec
(+ (long pos))))

(defn build-mapped-read-decoder
[seq-reader
compression-header
Expand All @@ -509,10 +510,12 @@
prev-pos 0
fs (transient [])]
(if (zero? i)
(let [features (persistent! fs)]
(assoc record
:seq (record-seq seq-reader compression-header record features)
:qual (record-qual record features decoders)
(let [features (persistent! fs)
end (record-end record features)
record' (assoc record ::end end)]
(assoc record'
:seq (record-seq seq-reader compression-header record' features)
:qual (record-qual record' features decoders)
:mapq (MQ)
:cigar (->> (or (features->cigar len features)
[[len \M]])
Expand Down Expand Up @@ -571,6 +574,44 @@
(mr-decoder record')
(ur-decoder record'))))))

(defn update-next-mate
[{:keys [^long flag] :as record} {^long mate-flag :flag :as mate}]
(assoc record
:flag (cond-> flag
(pos? (bit-and mate-flag 0x10))
(bit-or 0x20)

(pos? (bit-and mate-flag 0x08))
(bit-or 0x08))
:rnext (:rname mate)
:pnext (:pos mate)))

(defn update-mate-records
[{^long s1 :pos ^long e1 ::end :as r1} {^long s2 :pos ^long e2 ::end :as r2}]
(let [r1' (update-next-mate r1 r2)
r2' (update-next-mate r2 r1)]
(if (or (pos? (bit-and (long (:flag r1)) 0x04))
(pos? (bit-and (long (:flag r2)) 0x04))
(not= (:rname r1') (:rname r2')))
[(assoc r1' :tlen 0) (assoc r2' :tlen 0)]
(if (<= s1 s2)
(let [tlen (inc (- e2 s1))]
[(assoc r1' :tlen tlen)
(assoc r2' :tlen (- tlen))])
(let [tlen (inc (- e1 s2))]
[(assoc r1' :tlen (- tlen))
(assoc r2' :tlen tlen)])))))

(defn resolve-mate-records [^objects records]
(dotimes [i (alength records)]
(let [record (aget records i)]
(when-let [nf (::next-fragment record)]
(let [j (inc (+ i (long nf)))
mate (aget records j)
[record' mate'] (update-mate-records record mate)]
(aset records i record')
(aset records j mate'))))))

(defn decode-slice-records [seq-reader cram-header compression-header slice-header blocks]
(let [decoders (build-data-series-decoders (:data-series compression-header) blocks)
tag-decoders (build-tag-decoders compression-header blocks)
Expand All @@ -579,9 +620,10 @@
compression-header
slice-header
decoders
tag-decoders)]
(letfn [(step [^long i]
(when (pos? i)
(cons (record-decoder) (lazy-seq (step (dec i))))))]
(step (:records slice-header)))))

tag-decoders)
n (:records slice-header)
records (object-array n)]
(dotimes [i n]
(aset records i (record-decoder)))
(resolve-mate-records records)
(map #(dissoc % ::flag ::len ::next-fragment) records)))

0 comments on commit dc11884

Please sign in to comment.