diff --git a/src/cljam/io/cram.clj b/src/cljam/io/cram.clj index adebc32a..9889261d 100644 --- a/src/cljam/io/cram.clj +++ b/src/cljam/io/cram.clj @@ -374,7 +374,9 @@ :rnext (get-in cram-header [:SQ (NS) :SN]) :pnext (NP) :tlen (TS))) - (assoc record ::nf (NF))))) + (cond-> record + (pos? (bit-and (long flag) 0x04)) + (assoc ::next-fragment (NF)))))) (defn build-auxiliary-tags-decoder [{:keys [preservation-map]} {:keys [TL]} tag-decoders] (let [tag-dict (:TD preservation-map)] @@ -388,29 +390,9 @@ []) (assoc record :options)))))) -(defn record-end [{:keys [pos ::len]} features] - (->> features - ^long (reduce - (fn [^long ret f] - (case (:code f) - (:insertion :softclip) - (- ret (alength ^bytes (:bases f))) - - (:deletion :ref-skip) - (+ ret (long (:len f))) - - :insert-base - (dec ret) - - ret)) - len) - dec - (+ (long pos)))) - (defn record-seq - [seq-reader {:keys [preservation-map]} {:keys [rname pos ::len] :as record} features] - (let [end (record-end record features) - region {:chr rname :start pos :end end} + [seq-reader {:keys [preservation-map]} {:keys [rname pos ::end ::len]} features] + (let [region {:chr rname :start pos :end end} ref-bases (.getBytes ^String (cseq/read-sequence seq-reader region)) bs (byte-array len (byte (int \N))) subst (:SM preservation-map)] @@ -500,6 +482,25 @@ (conj! [(- read-len pos) \M]))] (some->> (not-empty (persistent! acc')) sam.cigar/simplify))))) +(defn record-end [{:keys [pos ::len]} features] + (->> features + ^long (reduce + (fn [^long ret f] + (case (:code f) + (:insertion :softclip) + (- ret (alength ^bytes (:bases f))) + + (:deletion :ref-skip) + (+ ret (long (:len f))) + + :insert-base + (dec ret) + + ret)) + len) + dec + (+ (long pos)))) + (defn build-mapped-read-decoder [seq-reader compression-header @@ -509,10 +510,12 @@ prev-pos 0 fs (transient [])] (if (zero? i) - (let [features (persistent! fs)] - (assoc record - :seq (record-seq seq-reader compression-header record features) - :qual (record-qual record features decoders) + (let [features (persistent! fs) + end (record-end record features) + record' (assoc record ::end end)] + (assoc record' + :seq (record-seq seq-reader compression-header record' features) + :qual (record-qual record' features decoders) :mapq (MQ) :cigar (->> (or (features->cigar len features) [[len \M]]) @@ -571,6 +574,44 @@ (mr-decoder record') (ur-decoder record')))))) +(defn update-next-mate + [{:keys [^long flag] :as record} {^long mate-flag :flag :as mate}] + (assoc record + :flag (cond-> flag + (pos? (bit-and mate-flag 0x10)) + (bit-or 0x20) + + (pos? (bit-and mate-flag 0x08)) + (bit-or 0x08)) + :rnext (:rname mate) + :pnext (:pos mate))) + +(defn update-mate-records + [{^long s1 :pos ^long e1 ::end :as r1} {^long s2 :pos ^long e2 ::end :as r2}] + (let [r1' (update-next-mate r1 r2) + r2' (update-next-mate r2 r1)] + (if (or (pos? (bit-and (long (:flag r1)) 0x04)) + (pos? (bit-and (long (:flag r2)) 0x04)) + (not= (:rname r1') (:rname r2'))) + [(assoc r1' :tlen 0) (assoc r2' :tlen 0)] + (if (<= s1 s2) + (let [tlen (inc (- e2 s1))] + [(assoc r1' :tlen tlen) + (assoc r2' :tlen (- tlen))]) + (let [tlen (inc (- e1 s2))] + [(assoc r1' :tlen (- tlen)) + (assoc r2' :tlen tlen)]))))) + +(defn resolve-mate-records [^objects records] + (dotimes [i (alength records)] + (let [record (aget records i)] + (when-let [nf (::next-fragment record)] + (let [j (inc (+ i (long nf))) + mate (aget records j) + [record' mate'] (update-mate-records record mate)] + (aset records i record') + (aset records j mate')))))) + (defn decode-slice-records [seq-reader cram-header compression-header slice-header blocks] (let [decoders (build-data-series-decoders (:data-series compression-header) blocks) tag-decoders (build-tag-decoders compression-header blocks) @@ -579,9 +620,10 @@ compression-header slice-header decoders - tag-decoders)] - (letfn [(step [^long i] - (when (pos? i) - (cons (record-decoder) (lazy-seq (step (dec i))))))] - (step (:records slice-header))))) - + tag-decoders) + n (:records slice-header) + records (object-array n)] + (dotimes [i n] + (aset records i (record-decoder))) + (resolve-mate-records records) + (map #(dissoc % ::flag ::len ::next-fragment) records)))