From e21c777f2367f7e5c389754d5f8b262bb718705d Mon Sep 17 00:00:00 2001 From: Shogo Ohta Date: Fri, 19 Jul 2024 11:05:48 +0900 Subject: [PATCH] Add tests for CRAM index generation --- src/cljam/io/crai.clj | 33 +++-- test-resources/cram/test.sorted.cram | Bin 0 -> 1953 bytes .../cram/test.sorted_with_unknown_so.cram | Bin 0 -> 1955 bytes test/cljam/algo/cram_indexer_test.clj | 35 +++++ test/cljam/io/crai_test.clj | 25 +++- .../io/cram/encode/alignment_stats_test.clj | 16 +++ test/cljam/io/cram/encode/record_test.clj | 128 ++++++++++++------ test/cljam/io/cram/writer_test.clj | 70 ++++++++++ test/cljam/io/cram_test.clj | 41 ++++++ test/cljam/test_common.clj | 2 + 10 files changed, 293 insertions(+), 57 deletions(-) create mode 100644 test-resources/cram/test.sorted.cram create mode 100644 test-resources/cram/test.sorted_with_unknown_so.cram create mode 100644 test/cljam/algo/cram_indexer_test.clj create mode 100644 test/cljam/io/cram/writer_test.clj diff --git a/src/cljam/io/crai.clj b/src/cljam/io/crai.clj index 37bd8aed..07dc8046 100644 --- a/src/cljam/io/crai.clj +++ b/src/cljam/io/crai.clj @@ -5,22 +5,33 @@ [clojure.string :as str]) (:import [java.io Closeable OutputStreamWriter PrintWriter])) +(defn- read-index-entries [rdr] + (->> (line-seq rdr) + (map (fn [line] + (let [[^long seq-id start span container-offset slice-offset size] + (map #(Long/parseLong %) (str/split line #"\t")) + unmapped? (neg? seq-id)] + {:ref-seq-id seq-id + :start (if unmapped? 0 start) + :span (if unmapped? 0 span) + :container-offset container-offset + :slice-offset slice-offset + :size size}))))) + (defn read-index "Reads a CRAI file `f` and creates an index." [f refs] (let [refs (vec refs)] (with-open [rdr (io/reader (util/compressor-input-stream f))] - (->> (line-seq rdr) - (map (fn [line] - (let [[^long seq-id ^long start ^long span container-offset slice-offset size] - (map #(Long/parseLong %) (str/split line #"\t")) - unmapped? (neg? seq-id)] - {:chr (if unmapped? "*" (:name (nth refs seq-id))) - :start (if unmapped? 0 start) - :end (if unmapped? 0 (+ start span)) - :container-offset container-offset - :slice-offset slice-offset - :size size}))) + (->> (read-index-entries rdr) + (map (fn [{:keys [^long ref-seq-id ^long start ^long span] :as entry}] + (-> entry + (assoc :chr + (if (neg? ref-seq-id) + "*" + (:name (nth refs ref-seq-id))) + :end (+ start span)) + (dissoc :ref-seq-id :span)))) intervals/index-intervals)))) (defn find-overlapping-entries diff --git a/test-resources/cram/test.sorted.cram b/test-resources/cram/test.sorted.cram new file mode 100644 index 0000000000000000000000000000000000000000..bb18b93469168a055587448ec25c711671f1edda GIT binary patch literal 1953 zcmb_deQZ-z6hG(nzOLH!coCf-a(fSFk%n=}J| zfwHT-tZeDcrI(~TyBwz`_DJ{k7It*n3%?B&r50SS){F>C_jWx|Sln~>)-FeSZcLic zWL}DpUUqv)7Ddp8*WSOZ9-O2fK!RYW4TA^9l;rH7>4JNLdn}^Ijm|mf@u0WZ>i1i% zB{?FLmbxIgD#4Y9h}i^!rvOpqnT5#67DZ1!qT1sY1#N_zXiYJJ*6l^qxrvS}p^;QV z*81YPh=x2jVssu6F?rdDMwb&YelD4qTm$VG)(IZct=??X6X)b0Ce0*sn-RNd zIZRS9)FU9{+KI8&UJyXG$u^$8fvqK4QBW~a9ih?cA|<^cI>s1pGFuX?iAgr@!wm-$ zJ($`7bfvreu{1jWbnEpOUMkW%d|%yoyUe`)v$GLe?|*Qv43|HP{5tC|WvsZ-)_$vG z+?=k(!HBNpuo(Gl+=cqP+eUrA=xRs)iIdajue|-aQg8mQam6*ee_wigzOz=|I>k}@ z$Jq0k-v23%E63$$w4P6AS`T^u9@oYm+45@A>vPMirJW{j0fSo+XX zs#lB+5DN`pX#fzlG?f}8LTxO8Boaic35?MMu^2~~Kw>r#SkTOo1Rw_l{nvD7W|GVT z!BqOu$Bc>74KRsF0}050C_|mX>L8*HG^FcgxFAEH3`#w~;xpTIajnnC6?RV_G2^~1 zqQ8G{n_&5(n=$oc)6hx*rtv5W&8BdGDX#<{k$JEE=|&bYIc?~5k9VY{g;NQH=p)Go zS%)LM15!qJ==>czV~5V$p_}N2DQ_C8(nFqt&IWq`Wv%32@LsVglz0AjmWvA*86@jvu^16+RnMdvv5o#Scg zTj^8QYzL@m@7Favr!j7FzpSagvKXhNUV9Ij{$E$0bnJ95GQ)6w!-i~{G3oaxzK%+Z zzY|$>^02k~F9#|r0%d`U@`?b1ySwi`$qjex+~`0SlWH4EQ%``JSW% zcx`+(XIJAYmg-kr~Pz3sk2zrp#s|SbefPF{FvAl734$F~3?934{K|dG6+9R`XcpZrbSwh58$G!ezu#iX%M>9$ z-wDC>vCeEn%pefla}ibU1&GYC2vKzBAgbLiQP76Ch}Ps1Xk8veor~zuJeo;)WUbF# zjA+PqA%Ov*GAw0qu9b+=bTH@lZ+=m+uR(Y_l z1L&OW^heU{^7N*Auf0BBZ})zE|NR2frhOMfw4VRqoKv0t+2q$zd2Q<2`-fW|z`W`Tbsd`K%O-}+)4+h!mk-b6L z3kFZ40v;d;HW4)O860v6YE4PA&bLC)K=3_`x;DC;F=3K~W5`|NF;rwHa8Q9Df24wj zVt_*PHu$AulgLLdCEF5@GNu|Of$5oBje<}M5S@UGi{>*Jkh4GZoMem@N|^u5QmRyp zRS*ePV6FlXwUkN?5}`JdKoSYU)da?Hf=G-aj3qI}5SY=#kpv(U1pPPEGjm85fDn`a z`A)_}rv%fVWzZ{mNV0c??4>fr%ib|mrW;j0RdvxmTu(A%qfZsT zx;5c;>#hdI)Gwr>g+is0fI=x14v_L@@HtuX_Fpb!A=8qE&i4d+Qc}-E0wMfV!g1Da z@7W89quX`HikE+}U-VPF$#E zSVT_|2o-8ti#D$-oku-g_xJ>3(kuf`BhA2rLcMFOJhNY(smL>7EN`Vt9QrTuvh-Jb zl?{6UDq5cCs$SI?x4Kqq%5JW}l!j*} zKe4SIVv_8gR&t8_LtTl?INYhVt*xzG+ukO@@wNqxnpp`AwO~({^BCMJDURe$pOoGU zWh^>945yM7Mgoo+C5IZ{;i!qrkWc4Sd-LSekl~=h)J%{#(a%%tF{fpd3?$FbHZ3dONnc}cBVO5;(0hr8zrJ!>9kl_jchhf{^`ZY%6W zW{S}ay{Nw+2(bKWViCVw4tU#^AT$4SGxh!KqOFgO!ePz~ELX)HbUneDgP)#8W*N?Q zt6;}lDE%=#Q% zq=LqbGZ#2*i>iGzcngfu55=EQTv$+Gm=f?A3da=yoV$He!#{G4w2LqEy7O;;SXb7< XY?{-}C$?N#NP}8U7YhCE`EmZ=3>PKu literal 0 HcmV?d00001 diff --git a/test/cljam/algo/cram_indexer_test.clj b/test/cljam/algo/cram_indexer_test.clj new file mode 100644 index 00000000..b155f5ed --- /dev/null +++ b/test/cljam/algo/cram_indexer_test.clj @@ -0,0 +1,35 @@ +(ns cljam.algo.cram-indexer-test + (:require [cljam.algo.cram-indexer :as indexer] + [cljam.io.crai :as crai] + [cljam.io.cram :as cram] + [cljam.test-common :as common] + [cljam.util :as util] + [clojure.java.io :as io] + [clojure.test :refer [deftest is]])) + +(defn- read-index-entries [f] + (with-open [r (io/reader (util/compressor-input-stream f))] + (doall (#'crai/read-index-entries r)))) + +(deftest create-index-test + (let [f (io/file common/temp-dir "medium.cram.crai")] + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} + (is (thrown-with-msg? Exception #"Cannot create CRAM index file .*" + (indexer/create-index common/medium-cram-file f))) + (indexer/create-index common/medium-cram-file f + :skip-sort-order-check? true) + (is (= (read-index-entries common/medium-crai-file) + (read-index-entries f)))))) + +(deftest cram-without-alignments-test + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} + (let [header {:HD {:SO "coordinate"} + :SQ [{:SN "chr1", :LN 100}]} + target (io/file common/temp-dir "no_aln.cram") + target-crai (io/file common/temp-dir "no_aln.cram.crai")] + (with-open [w (cram/writer target)] + (cram/write-header w header) + (cram/write-refs w header)) + (is (common/not-throw? (indexer/create-index target target-crai)))))) diff --git a/test/cljam/io/crai_test.clj b/test/cljam/io/crai_test.clj index a8889560..197e68b3 100644 --- a/test/cljam/io/crai_test.clj +++ b/test/cljam/io/crai_test.clj @@ -1,7 +1,9 @@ (ns cljam.io.crai-test (:require [cljam.io.crai :as crai] [cljam.test-common :as common] - [clojure.test :refer [deftest are]])) + [cljam.util :as util] + [clojure.java.io :as io] + [clojure.test :refer [are deftest is]])) (def ^:private test-refs (->> (concat (range 1 23) ["X" "Y"]) @@ -116,3 +118,24 @@ :container-offset 378365 :slice-offset 190037 :size 12326}]))) + +(deftest write-index-entries-test + (let [entries [{:ref-seq-id 0, :start 546609, :span 205262429, + :container-offset 324, :slice-offset 563, :size 22007} + {:ref-seq-id 0, :start 206547069, :span 42644506, + :container-offset 324, :slice-offset 22570, :size 7349} + {:ref-seq-id 1, :start 67302, :span 231638920, + :container-offset 30272, :slice-offset 563, :size 21618} + {:ref-seq-id -1, :start 0, :span 0, + :container-offset 354657, :slice-offset 563, :size 23119} + {:ref-seq-id -1, :start 0, :span 0, + :container-offset 378365, :slice-offset 171, :size 23494} + {:ref-seq-id -1, :start 0, :span 0, + :container-offset 378365, :slice-offset 23665, :size 23213}] + f (io/file common/temp-dir "test.cram.crai")] + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} + (with-open [w (crai/writer f)] + (crai/write-index-entries w entries)) + (with-open [r (io/reader (util/compressor-input-stream f))] + (is (= entries (#'crai/read-index-entries r))))))) diff --git a/test/cljam/io/cram/encode/alignment_stats_test.clj b/test/cljam/io/cram/encode/alignment_stats_test.clj index 00551aa9..980bb632 100644 --- a/test/cljam/io/cram/encode/alignment_stats_test.clj +++ b/test/cljam/io/cram/encode/alignment_stats_test.clj @@ -49,3 +49,19 @@ (into {} (stats/merge-stats [{:ri -1, :start 0, :end 0, :nbases 10000, :nrecords 20} {:ri -1, :start 0, :end 0, :nbases 15000, :nrecords 30}])))))) + +(deftest make-alignment-spans-builder-test + (let [builder (stats/make-alignment-spans-builder) + records [{:ri 0, :start 1, :end 100} + {:ri 0, :start 51, :end 150} + {:ri 1, :start 51, :end 150} + {:ri 1, :start 151, :end 300} + {:ri -1, :start 0, :end 0} + {:ri -1, :start 0, :end 0}]] + (run! (fn [{:keys [ri start end]}] + (stats/update-span! builder ri start end)) + records) + (is (= {0 {:start 1, :span 150} + 1 {:start 51, :span 250} + -1 {:start 0, :span 1}} + (stats/build-spans builder))))) diff --git a/test/cljam/io/cram/encode/record_test.clj b/test/cljam/io/cram/encode/record_test.clj index 87664a7d..a64f7317 100644 --- a/test/cljam/io/cram/encode/record_test.clj +++ b/test/cljam/io/cram/encode/record_test.clj @@ -68,6 +68,34 @@ [[{:code :softclip, :pos 1, :bases (mapv int "TT")}] 4])) +(deftest preprocess-slice-records-test + (let [rname->idx {"ref" 0} + records (object-array + [{:rname "ref", :pos 1, :cigar "5M", :seq "AGAAT", :qual "HFHHH"} + {:rname "ref", :pos 5, :cigar "2S3M",:seq "CCTGT", :qual "##AAC"} + {:rname "ref", :pos 10, :cigar "5M", :seq "GATAA", :qual "CCCFF"} + {:rname "ref", :pos 15, :cigar "1M1I1M1D2M", :seq "GAAAG", :qual "EBBFF"} + {:rname "*", :pos 0, :cigar "*", :seq "CTGTG", :qual "AEEEE"}])] + (record/preprocess-slice-records test-seq-resolver rname->idx subst-mat records) + (is (= [{:rname "ref", :pos 1, :cigar "5M", :seq "AGAAT", :qual "HFHHH" + ::record/flag 0x03, ::record/ref-index 0, ::record/end 5 + ::record/features [{:code :subst, :pos 3 :subst 0}]} + {:rname "ref", :pos 5, :cigar "2S3M",:seq "CCTGT", :qual "##AAC" + ::record/flag 0x03, ::record/ref-index 0, ::record/end 7 + ::record/features [{:code :softclip, :pos 1, :bases [(int \C) (int \C)]}]} + {:rname "ref", :pos 10, :cigar "5M", :seq "GATAA", :qual "CCCFF" + ::record/flag 0x03, ::record/ref-index 0, ::record/end 14 + ::record/features []} + {:rname "ref", :pos 15, :cigar "1M1I1M1D2M", :seq "GAAAG", :qual "EBBFF" + ::record/flag 0x03, ::record/ref-index 0, ::record/end 19 + ::record/features [{:code :insertion, :pos 2, :bases [(int \A)]} + {:code :deletion, :pos 4, :len 1}]} + {:rname "*", :pos 0, :cigar "*", :seq "CTGTG", :qual "AEEEE" + ::record/flag 0x03, ::record/ref-index -1, ::record/end 0 + ::record/features []}] + (walk/prewalk #(if (.isArray (class %)) (vec %) %) + records))))) + (deftest encode-slice-records-test (testing "mapped reads" (let [cram-header {:SQ @@ -76,6 +104,9 @@ :RG [{:ID "rg001"} {:ID "rg002"}]} + rname->idx (into {} + (map-indexed (fn [i {:keys [SN]}] [SN i])) + (:SQ cram-header)) tag-dict [[] [{:tag :MD, :type \Z} {:tag :NM, :type \c}]] @@ -87,34 +118,36 @@ :NM {\c {:codec :byte-array-len :len-encoding {:codec :huffman, :alphabet [1], :bit-len [0]} :val-encoding {:codec :external, :content-id 5131619}}}}) - records [{:qname "q001", :flag 99, :rname "ref", :pos 1, :end 5, :mapq 0, - :cigar "5M", :rnext "=", :pnext 151, :tlen 150, :seq "AGAAT", :qual "HFHHH" - :options [{:RG {:type "Z", :value "rg001"}} - {:MD {:type "Z", :value "2C2"}} - {:NM {:type "c", :value 1}}] - ::record/tags-index 1} - {:qname "q002", :flag 99, :rname "ref", :pos 5, :end 7, :mapq 15, - :cigar "2S3M", :rnext "=", :pnext 15, :tlen 15, :seq "CCTGT", :qual "##AAC" - :options [{:RG {:type "Z", :value "rg001"}} - {:MD {:type "Z", :value "3"}} - {:NM {:type "c", :value 0}}] - ::record/tags-index 1} - {:qname "q003", :flag 177, :rname "ref", :pos 10, :end 14, :mapq 60, - :cigar "5M", :rnext "ref2", :pnext 100, :tlen 0, :seq "GATAA", :qual "CCCFF" - :options [{:RG {:type "Z", :value "rg002"}} - {:MD {:type "Z", :value "5"}} - {:NM {:type "c", :value 0}}] - ::record/tags-index 1} - {:qname "q004", :flag 147, :rname "ref", :pos 15, :end 19, :mapq 15, - :cigar "1M1I1M1D2M", :rnext "=", :pnext 5, :tlen -15, :seq "GAAAG", :qual "EBBFF" - :options [{:RG {:type "Z", :value "rg002"}} - {:MD {:type "Z", :value "3^T2"}} - {:NM {:type "c", :value 2}}] - ::record/tags-index 1} - {:qname "q005", :flag 73, :rname "ref", :pos 20, :end 24, :mapq 0, - :cigar "5M", :rnext "*", :pnext 0, :tlen 0, :seq "CTGTG", :qual "AEEEE" - :options [], ::record/tags-index 0}] - stats (record/encode-slice-records test-seq-resolver cram-header tag-dict subst-mat + records (object-array + [{:qname "q001", :flag 99, :rname "ref", :pos 1, :end 5, :mapq 0, + :cigar "5M", :rnext "=", :pnext 151, :tlen 150, :seq "AGAAT", :qual "HFHHH" + :options [{:RG {:type "Z", :value "rg001"}} + {:MD {:type "Z", :value "2C2"}} + {:NM {:type "c", :value 1}}] + ::record/tags-index 1} + {:qname "q002", :flag 99, :rname "ref", :pos 5, :end 7, :mapq 15, + :cigar "2S3M", :rnext "=", :pnext 15, :tlen 15, :seq "CCTGT", :qual "##AAC" + :options [{:RG {:type "Z", :value "rg001"}} + {:MD {:type "Z", :value "3"}} + {:NM {:type "c", :value 0}}] + ::record/tags-index 1} + {:qname "q003", :flag 177, :rname "ref", :pos 10, :end 14, :mapq 60, + :cigar "5M", :rnext "ref2", :pnext 100, :tlen 0, :seq "GATAA", :qual "CCCFF" + :options [{:RG {:type "Z", :value "rg002"}} + {:MD {:type "Z", :value "5"}} + {:NM {:type "c", :value 0}}] + ::record/tags-index 1} + {:qname "q004", :flag 147, :rname "ref", :pos 15, :end 19, :mapq 15, + :cigar "1M1I1M1D2M", :rnext "=", :pnext 5, :tlen -15, :seq "GAAAG", :qual "EBBFF" + :options [{:RG {:type "Z", :value "rg002"}} + {:MD {:type "Z", :value "3^T2"}} + {:NM {:type "c", :value 2}}] + ::record/tags-index 1} + {:qname "q005", :flag 73, :rname "ref", :pos 20, :end 24, :mapq 0, + :cigar "5M", :rnext "*", :pnext 0, :tlen 0, :seq "CTGTG", :qual "AEEEE" + :options [], ::record/tags-index 0}]) + _ (record/preprocess-slice-records test-seq-resolver rname->idx subst-mat records) + stats (record/encode-slice-records cram-header rname->idx tag-dict ds-encoders tag-encoders records) ds-res (walk/prewalk #(if (fn? %) (%) %) ds-encoders) tag-res (walk/prewalk #(if (fn? %) (%) %) tag-encoders)] @@ -269,25 +302,30 @@ (let [cram-header {:SQ [{:SN "ref"} {:SN "ref2"}]} + rname->idx (into {} + (map-indexed (fn [i {:keys [SN]}] [SN i])) + (:SQ cram-header)) tag-dict [[]] ds-encoders (ds/build-data-series-encoders ds/default-data-series-encodings) - records [{:qname "q001", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0, - :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "AATCC", :qual "CCFFF" - :options [], ::record/tags-index 0} - {:qname "q001", :flag 141, :rname "*", :pos 0, :end 0, :mapq 0, - :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "ATTGT", :qual "BDFAD" - :options [], ::record/tags-index 0} - {:qname "q002", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0, - :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "TGGTA", :qual "ADDHF" - :options [], ::record/tags-index 0} - {:qname "q002", :flag 141, :rname "*", :pos 0, :end 0, :mapq 0, - :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "TCTTG", :qual "DDDFD" - :options [], ::record/tags-index 0} - {:qname "q003", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0, - :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "GCACA", :qual "BCCFD" - :options [], ::record/tags-index 0}] - stats (record/encode-slice-records test-seq-resolver cram-header tag-dict - subst-mat ds-encoders {} records) + records (object-array + [{:qname "q001", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0, + :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "AATCC", :qual "CCFFF" + :options [], ::record/tags-index 0} + {:qname "q001", :flag 141, :rname "*", :pos 0, :end 0, :mapq 0, + :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "ATTGT", :qual "BDFAD" + :options [], ::record/tags-index 0} + {:qname "q002", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0, + :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "TGGTA", :qual "ADDHF" + :options [], ::record/tags-index 0} + {:qname "q002", :flag 141, :rname "*", :pos 0, :end 0, :mapq 0, + :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "TCTTG", :qual "DDDFD" + :options [], ::record/tags-index 0} + {:qname "q003", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0, + :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "GCACA", :qual "BCCFD" + :options [], ::record/tags-index 0}]) + _ (record/preprocess-slice-records test-seq-resolver rname->idx subst-mat records) + stats (record/encode-slice-records cram-header rname->idx tag-dict + ds-encoders {} records) ds-res (walk/prewalk #(if (fn? %) (%) %) ds-encoders)] (is (= {:ri -1, :start 0, :end 0, :nbases 25, :nrecords 5} (into {} stats))) diff --git a/test/cljam/io/cram/writer_test.clj b/test/cljam/io/cram/writer_test.clj new file mode 100644 index 00000000..d4145f21 --- /dev/null +++ b/test/cljam/io/cram/writer_test.clj @@ -0,0 +1,70 @@ +(ns cljam.io.cram.writer-test + (:require [cljam.io.cram.encode.record :as record] + [cljam.io.cram.writer :as writer] + [clojure.test :refer [deftest is testing]])) + +(deftest container-index-entries-test + (testing "no multiple reference slices" + (is (= [{:ref-seq-id 0, :start 123, :span 456 + :container-offset 100, :slice-offset 200, :size 1000} + {:ref-seq-id 0, :start 987, :span 654 + :container-offset 100, :slice-offset 1200, :size 900} + {:ref-seq-id 1, :start 1234, :span 5678 + :container-offset 100, :slice-offset 2100, :size 1100}] + (#'writer/container-index-entries + 100 + {:landmarks [200 1200 2100]} + [{:header {:ref-seq-id 0, :start 123, :span 456} + :size 1000} + {:header {:ref-seq-id 0, :start 987, :span 654} + :size 900} + {:header {:ref-seq-id 1, :start 1234, :span 5678} + :size 1100}] + [[{::record/ref-index 0, :pos 123 ::record/end 273} + {::record/ref-index 0, :pos 428 ::record/end 578}] + [{::record/ref-index 0, :pos 987 ::record/end 1137} + {::record/ref-index 0, :pos 1490 ::record/end 1640}] + [{::record/ref-index 1, :pos 1234 ::record/end 1384} + {::record/ref-index 1, :pos 6761 ::record/end 6911}]]))) + (is (= [{:ref-seq-id -1, :start 0, :span 0 + :container-offset 100, :slice-offset 200, :size 1000} + {:ref-seq-id -1, :start 0, :span 0 + :container-offset 100, :slice-offset 1200, :size 900}] + (#'writer/container-index-entries + 100 + {:landmarks [200 1200]} + [{:header {:ref-seq-id -1, :start 0, :span 0} + :size 1000} + {:header {:ref-seq-id -1, :start 0, :span 0} + :size 900}] + [[{::record/ref-index -1, :pos 0 ::record/end 0} + {::record/ref-index -1, :pos 0 ::record/end 0}] + [{::record/ref-index -1, :pos 0 ::record/end 0} + {::record/ref-index -1, :pos 0 ::record/end 0}]])))) + (testing "some multiple reference slices" + (is (= [{:ref-seq-id 0, :start 123, :span 456 + :container-offset 100, :slice-offset 200, :size 1000} + {:ref-seq-id 0, :start 987, :span 654 + :container-offset 100, :slice-offset 1200, :size 900} + {:ref-seq-id 1, :start 1234, :span 5678 + :container-offset 100, :slice-offset 1200, :size 900} + ;; Both of :start/:span should be zero for unmapped slices in respect of + ;; the CRAM specification, but it's guaranteed by the CRAI writer, + ;; so it doesn't matter what values these keys actually take here + {:ref-seq-id -1, :start 0, :span 1 + :container-offset 100, :slice-offset 1200, :size 900}] + (#'writer/container-index-entries + 100 + {:landmarks [200 1200]} + [{:header {:ref-seq-id 0, :start 123, :span 456} + :size 1000} + {:header {:ref-seq-id -2, :start 0, :span 0} + :size 900}] + [[{::record/ref-index 0, :pos 123 ::record/end 273} + {::record/ref-index 0, :pos 428 ::record/end 578}] + [{::record/ref-index 0, :pos 987 ::record/end 1138} + {::record/ref-index 0, :pos 1490 ::record/end 1640} + {::record/ref-index 1, :pos 1234 ::record/end 1384} + {::record/ref-index 1, :pos 6761 ::record/end 6911} + {::record/ref-index -1, :pos 0 ::record/end 0} + {::record/ref-index -1, :pos 0 ::record/end 0}]]))))) diff --git a/test/cljam/io/cram_test.clj b/test/cljam/io/cram_test.clj index 8ff2902e..7e8d6269 100644 --- a/test/cljam/io/cram_test.clj +++ b/test/cljam/io/cram_test.clj @@ -9,6 +9,8 @@ [clojure.test :refer [are deftest is testing]])) (def ^:private temp-cram-file (io/file common/temp-dir "test.cram")) +(def ^:private temp-cram-file-2 (io/file common/temp-dir "test2.cram")) +(def ^:private temp-cram-file-3 (io/file common/temp-dir "test3.cram")) (defn- fixup-bam-aln [aln] (-> (into {} aln) @@ -105,3 +107,42 @@ (cram/read-header r'))) (is (= (cram/read-alignments r) (cram/read-alignments r')))))) + +(deftest writer-index-options-test + (with-before-after {:before (prepare-cache!) + :after (clean-cache!)} + (testing "A CRAM index file won't be created by default" + (with-open [r (cram/reader common/test-sorted-cram-file + {:reference common/test-fa-file}) + w (cram/writer temp-cram-file-2 + {:reference common/test-fa-file})] + (cram/write-header w (cram/read-header r)) + (cram/write-alignments w (cram/read-alignments r) (cram/read-header r))) + (is (not (.exists (io/file (str temp-cram-file-2 ".crai")))))) + (testing "A CRAM index file will be created if `:create-index?` is set to true" + (with-open [r (cram/reader common/test-sorted-cram-file + {:reference common/test-fa-file}) + w (cram/writer temp-cram-file-2 + {:reference common/test-fa-file + :create-index? true})] + (cram/write-header w (cram/read-header r)) + (cram/write-alignments w (cram/read-alignments r) (cram/read-header r))) + (is (.exists (io/file (str temp-cram-file-2 ".crai"))))) + (testing "Error when trying to create an index file for a CRAM file not declared as `SO:coordinate`" + (with-open [r (cram/reader common/test-sorted-with-unknown-so-cram-file + {:reference common/test-fa-file}) + w (cram/writer temp-cram-file-3 + {:reference common/test-fa-file + :create-index? true})] + (is (thrown-with-msg? Exception #"Cannot create CRAM index file for CRAM file not declared as sorted by coordinate" + (cram/write-header w (cram/read-header r)))))) + (testing "`:skip-sort-order-check?` skips the header check when creating an index file" + (with-open [r (cram/reader common/test-sorted-with-unknown-so-cram-file + {:reference common/test-fa-file}) + w (cram/writer temp-cram-file-3 + {:reference common/test-fa-file + :create-index? true + :skip-sort-order-check? true})] + (cram/write-header w (cram/read-header r)) + (cram/write-alignments w (cram/read-alignments r) (cram/read-header r))) + (is (.exists (io/file (str temp-cram-file-3 ".crai"))))))) diff --git a/test/cljam/test_common.clj b/test/cljam/test_common.clj index 5eb2ac4f..91524567 100644 --- a/test/cljam/test_common.clj +++ b/test/cljam/test_common.clj @@ -166,6 +166,8 @@ ;; ### CRAM files (def test-cram-file "test-resources/cram/test.cram") +(def test-sorted-cram-file "test-resources/cram/test.sorted.cram") +(def test-sorted-with-unknown-so-cram-file "test-resources/cram/test.sorted_with_unknown_so.cram") (def medium-cram-file "test-resources/cram/medium.cram") (def medium-with-standard-tags-cram-file "test-resources/cram/medium_with_standard_tags.cram") (def medium-without-index-cram-file "test-resources/cram/medium_without_index.cram")