From f30563aa10af6c2171103505ffd53dd27076cad7 Mon Sep 17 00:00:00 2001 From: Shogo Ohta Date: Fri, 26 Jan 2024 10:59:35 +0900 Subject: [PATCH] wip --- src/cljam/io/cram/decode/data_series.clj | 5 +++-- src/cljam/io/cram/decode/record.clj | 14 +++++++++----- src/cljam/io/cram/reader.clj | 9 +++++---- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/cljam/io/cram/decode/data_series.clj b/src/cljam/io/cram/decode/data_series.clj index 915bd210..df569e79 100644 --- a/src/cljam/io/cram/decode/data_series.clj +++ b/src/cljam/io/cram/decode/data_series.clj @@ -115,6 +115,7 @@ (defn build-tag-decoders [{:keys [tags]} blocks] (let [content-id->block-data (into {} (map (juxt :content-id :data)) blocks)] (reduce-kv (fn [decoders tag {tag-type :type :keys [encoding]}] - (let [decoder (build-tag-decoder encoding tag-type content-id->block-data)] - (assoc decoders tag (fn [] {:type tag-type :value (decoder)})))) + (let [decoder (build-tag-decoder encoding tag-type content-id->block-data) + tag-type' (str (if (#{\c \C \s \S \i \I} tag-type) \i tag-type))] + (assoc decoders tag (fn [] {:type tag-type' :value (decoder)})))) {} tags))) diff --git a/src/cljam/io/cram/decode/record.clj b/src/cljam/io/cram/decode/record.clj index cca5bc77..96e8db5a 100644 --- a/src/cljam/io/cram/decode/record.clj +++ b/src/cljam/io/cram/decode/record.clj @@ -38,10 +38,11 @@ (let [mate-flag (long (MF)) bam-flag (cond-> (long (:flag record)) (pos? (bit-and mate-flag 0x01)) (bit-or 0x20) - (pos? (bit-and mate-flag 0x02)) (bit-or 0x08))] + (pos? (bit-and mate-flag 0x02)) (bit-or 0x08)) + rnext (get-in cram-header [:SQ (NS) :SN])] (assoc record :flag bam-flag - :rnext (get-in cram-header [:SQ (NS) :SN]) + :rnext (if (= (:rname record) rnext) "=" rnext) :pnext (NP) :tlen (TS))) (cond-> record @@ -61,7 +62,7 @@ (assoc record :options)))))) (defn- record-seq - [seq-reader {:keys [preservation-map]} {:keys [rname pos ::end ::len]} features] + [seq-reader {:keys [preservation-map]} {:keys [rname pos end ::len]} features] (let [region {:chr rname :start pos :end end} ref-bases (.getBytes ^String (cseq/read-sequence seq-reader region)) bs (byte-array len (byte (int \N))) @@ -182,7 +183,7 @@ (if (zero? i) (let [features (persistent! fs) end (record-end record features) - record' (assoc record ::end end)] + record' (assoc record :end end)] (assoc record' :seq (record-seq seq-reader compression-header record' features) :qual (record-qual record' features decoders) @@ -213,7 +214,10 @@ (let [bb (bb/allocate-lsb-byte-buffer len) _ (dotimes [_ len] (.put bb (byte (BA)))) - record' (assoc record :seq (String. (.array bb)))] + record' (assoc record + :seq (String. (.array bb)) + :mapq 0 + :cigar "")] (if (zero? (bit-and (long flag) 0x01)) record' (let [bb (bb/allocate-lsb-byte-buffer len)] diff --git a/src/cljam/io/cram/reader.clj b/src/cljam/io/cram/reader.clj index 1cac0e4a..12ee0a34 100644 --- a/src/cljam/io/cram/reader.clj +++ b/src/cljam/io/cram/reader.clj @@ -16,12 +16,13 @@ (pos? (bit-and mate-flag 0x08)) (bit-or 0x08)) - :rnext (:rname mate) + :rnext (if (= (:rname record) (:rname mate)) + "=" + (:rname mate)) :pnext (:pos mate))) (defn- update-mate-records - [{^long s1 :pos ^long e1 ::record/end :as r1} - {^long s2 :pos ^long e2 ::record/end :as r2}] + [{^long s1 :pos ^long e1 :end :as r1} {^long s2 :pos ^long e2 :end :as r2}] (let [r1' (update-next-mate r1 r2) r2' (update-next-mate r2 r1)] (if (or (pos? (bit-and (long (:flag r1)) 0x04)) @@ -63,7 +64,7 @@ (dotimes [i n] (aset records i (record-decoder))) (resolve-mate-records records) - (map #(dissoc % ::record/flag ::record/len ::record/end ::record/next-fragment) + (map #(dissoc % ::record/flag ::record/len ::record/next-fragment) records))) (defn- read-container-records [^Buffer bb seq-reader cram-header container-header]