From bc9566c16f4d7648b1eccceb4805a565d3e2ade7 Mon Sep 17 00:00:00 2001 From: Shogo Ohta Date: Mon, 11 Nov 2024 11:16:04 +0900 Subject: [PATCH] Omit read names if :omit-read-names is specified --- src/cljam/io/cram.clj | 10 ++- src/cljam/io/cram/encode/context.clj | 3 + src/cljam/io/cram/encode/record.clj | 76 ++++++++++++------- test-resources/cram/paired.sorted.cram | Bin 0 -> 101766 bytes test/cljam/io/cram/encode/record_test.clj | 86 ++++++++++++++++++++-- test/cljam/io/cram_test.clj | 72 ++++++++++++++---- test/cljam/test_common.clj | 5 ++ 7 files changed, 203 insertions(+), 49 deletions(-) create mode 100644 test-resources/cram/paired.sorted.cram diff --git a/src/cljam/io/cram.clj b/src/cljam/io/cram.clj index 267fb2f6..47ab52dd 100644 --- a/src/cljam/io/cram.clj +++ b/src/cljam/io/cram.clj @@ -81,7 +81,15 @@ sequences into a CRAM file, the CRAM writer, by default, checks if the header specifies `SO:coordinate` and raises an error if it does not. If this option is set to true, the CRAM writer will skip this header - check and proceed regardless of the header's sort order declaration." + check and proceed regardless of the header's sort order declaration. + - omit-read-names?: If set to true, the CRAM writer omits read names (QNAME) + from records while preserving consistency across mate records. + Read name omission does not apply to single-end reads, detached mate + records or mate records that involve secondary or supplementary alignments. + Whether or not secondary/supplementary alignments are involved in a set + of mate records is determined by the tags TC (for secondary alignments) + and SA (for supplementary alignments). Make sure that these tags are + appropriately attached to the records when such alignments are present." (^CRAMWriter [f] (writer f {})) (^CRAMWriter [f option] (cram/writer f option))) diff --git a/src/cljam/io/cram/encode/context.clj b/src/cljam/io/cram/encode/context.clj index edb3b28e..7e392055 100644 --- a/src/cljam/io/cram/encode/context.clj +++ b/src/cljam/io/cram/encode/context.clj @@ -11,6 +11,9 @@ (map-indexed (fn [i {:keys [SN]}] [SN i])) (:SQ cram-header)) preservation-map (cond-> {:RN true, :AP false, :RR true} + (:omit-read-names? options) + (assoc :RN false) + (= (sam.header/sort-order cram-header) sam.header/order-coordinate) (assoc :AP true)) diff --git a/src/cljam/io/cram/encode/record.clj b/src/cljam/io/cram/encode/record.clj index e347b36b..bfffba07 100644 --- a/src/cljam/io/cram/encode/record.clj +++ b/src/cljam/io/cram/encode/record.clj @@ -37,27 +37,35 @@ (AP' (:pos record)) (RG (if rg (get rg-id->idx rg) -1)))))) -(defn- build-read-name-encoder [{:keys [RN]}] - (fn [record] - (RN (.getBytes ^String (:qname record))))) +(defn- build-read-name-encoder [{:keys [options]} {:keys [RN]}] + (if (:omit-read-names? options) + (constantly nil) + (fn [record] + (RN (.getBytes ^String (:qname record)))))) -(defn- build-mate-read-encoder [{:keys [rname->idx]} {:keys [NF MF NS NP TS]}] - (fn [{:keys [^long flag rnext] :as record}] - (if (zero? (bit-and (long (::flag record)) CF_DETACHED)) - (when-let [nf (::next-fragment record)] - (NF nf)) - (let [mate-flag (cond-> 0 - (pos? (bit-and flag (sam.flag/encoded #{:next-reversed}))) - (bit-or 0x01) +(defn- build-mate-read-encoder [{:keys [rname->idx options]} {:keys [RN NF MF NS NP TS]}] + (let [RN' (if (:omit-read-names? options) + ;; read names must be preserved for detached mate records even when + ;; :omit-read-names? is true + #(RN (.getBytes ^String %)) + (constantly nil))] + (fn [{:keys [^long flag rnext] :as record}] + (if (zero? (bit-and (long (::flag record)) CF_DETACHED)) + (when-let [nf (::next-fragment record)] + (NF nf)) + (let [mate-flag (cond-> 0 + (pos? (bit-and flag (sam.flag/encoded #{:next-reversed}))) + (bit-or 0x01) - (pos? (bit-and flag (sam.flag/encoded #{:next-unmapped}))) - (bit-or 0x02))] - (MF mate-flag) - (NS (if (= rnext "=") - (::ref-index record) - (ref-index rname->idx rnext))) - (NP (:pnext record)) - (TS (:tlen record)))))) + (pos? (bit-and flag (sam.flag/encoded #{:next-unmapped}))) + (bit-or 0x02))] + (MF mate-flag) + (RN' (:qname record)) + (NS (if (= rnext "=") + (::ref-index record) + (ref-index rname->idx rnext))) + (NP (:pnext record)) + (TS (:tlen record))))))) (defn- build-auxiliary-tags-encoder [{:keys [tag-dict tag-encoders]} {:keys [TL]}] (let [tag-encoder (fn [{:keys [tag] :as item}] @@ -132,7 +140,7 @@ (defn- build-cram-record-encoder [{:keys [ds-encoders] :as slice-ctx}] (let [pos-encoder (build-positional-data-encoder slice-ctx ds-encoders) - name-encoder (build-read-name-encoder ds-encoders) + name-encoder (build-read-name-encoder slice-ctx ds-encoders) mate-encoder (build-mate-read-encoder slice-ctx ds-encoders) tags-encoder (build-auxiliary-tags-encoder slice-ctx ds-encoders) mapped-encoder (build-mapped-read-encoder ds-encoders) @@ -221,7 +229,17 @@ (bit-and (sam.flag/encoded #{:next-unmapped :next-reversed})) (unsigned-bit-shift-right 1))))) -(defn- resolve-mate! [mate-resolver ^List records ^long i record] +(defn- has-only-primary-mates? [record mate] + (and (if-let [tc (sam.option/value-for-tag :TC record)] + (= tc 2) + true) + (if-let [tc (sam.option/value-for-tag :TC mate)] + (= tc 2) + true) + (nil? (sam.option/value-for-tag :SA record)) + (nil? (sam.option/value-for-tag :SA mate)))) + +(defn- resolve-mate! [mate-resolver omit-read-names? ^List records i record] (when-let [^long mate-index (mate/resolve-mate! mate-resolver i record)] (let [upstream-mate (.get records mate-index)] (when (and (mate-consistent? record upstream-mate) @@ -232,22 +250,28 @@ (zero? s1) (zero? s2)) (if (<= s1 s2) (= tlen1 (- tlen2) (inc (- e2 s1))) - (= (- tlen1) tlen2 (inc (- e1 s2))))))) + (= (- tlen1) tlen2 (inc (- e1 s2)))))) + (or (not omit-read-names?) + ;; to omit the read name from these mate records, they + ;; must not have any secondary/supplementary alignments + (has-only-primary-mates? record upstream-mate))) (.set records mate-index (assoc upstream-mate ::flag (-> (long (::flag upstream-mate)) (bit-and (bit-not CF_DETACHED)) (bit-or CF_MATE_DOWNSTREAM)) - ::next-fragment (dec (- i mate-index)))) + ::next-fragment (dec (- (long i) mate-index)))) mate-index)))) (defn preprocess-slice-records "Preprocesses slice records to calculate some record fields prior to record encoding that are necessary for the CRAM writer to generate some header components." - [{:keys [rname->idx seq-resolver subst-mat-builder tag-dict-builder]} ^List records] + [{:keys [rname->idx seq-resolver subst-mat-builder tag-dict-builder options]} + ^List records] (let [mate-resolver (mate/make-mate-resolver) - stats-builder (stats/make-alignment-stats-builder)] + stats-builder (stats/make-alignment-stats-builder) + omit-read-names? (:omit-read-names? options)] (dotimes [i (.size records)] (let [record (.get records i) ri (ref-index rname->idx (:rname record)) @@ -258,7 +282,7 @@ record' (assoc record ::flag cf ::ref-index ri ::end end ::features fs ::tags-index tags-id) - mate-index (resolve-mate! mate-resolver records i record')] + mate-index (resolve-mate! mate-resolver omit-read-names? records i record')] (stats/update! stats-builder ri (:pos record) end (count (:seq record)) 1) (.set records i (cond-> record' diff --git a/test-resources/cram/paired.sorted.cram b/test-resources/cram/paired.sorted.cram new file mode 100644 index 0000000000000000000000000000000000000000..029c2671e3408afb2e72805dd6c74a44e8121df5 GIT binary patch literal 101766 zcmeEv2{={T-~T$t;h1L`4mojzN~U8dL#B$18Om7ZsVI_WonxLuk}Fr`qR zRV!A9diktU_6qd~Qr^Z3z!3Te&_5g)xa8D?10Xg2g2^`NBzDGCFto|FwbrMM=UMoG zLS=!VPVv_A1MCTkdG;cKtczs{na+i*98>lj+~Vrjt9|3qhk^30r(dM@T05N?`to(F zwf?s^pU-UG`0d;v{n4g(%^&A{{rXOM&fu3ry_>&1@74eIY(?+6&ja81h6eJ^eRP=V-62 zQS0W=w+b{4WUty&Dz7+`r+i@RmZcjV?rwgkrP6Fv_&Q+)*pvfN7V_?@>r4TF(_`t4h#lHk7vCadyLs>oH4~HzsKvcyiWLfYXR=L;Ylx6yfJ`@)Guf zj_*UY60$7fUwJu&8d?@sns9Et91vTz-KE@TTH&K_E}hreK3tWq+h_IK{V3;_(z%v{ zho25UNiUN~2!D1w`C?QSrDV1Q&zgO_eHpjbw?DEapH+EC%Gcglf5$sk_)~^M!I%D3 zxGzcRl%u()MYl+N_dQ<1cb(bZ z0+A-p{95i*7PL`=$)A96!Wh2{s z>8(a$Qu@0$pG^Ho4dOrMr073Lu1xWr;X(|!k);q3+qxQ`bN9L7flreIx@S&G8qhWE zT3vF(kw4v9Y5)FNG_PB(7w+9XcVRYr%8S0|p| zl|~lVp0D42@#)f@0B5DQftA4l6;$PeVf=fVwpxa4zeq@nN_u^9o$z!)a}vMU9f1w^ zCFEBhk-Mstu`0j1en)NhyiXVNuFQN&PU-ZyuQcTe;nR-(CbfOyt9TV{F3#Cvps6V@ zU%W0&vvGm7PUJJEd!@n6SG+%(ie9l3tW|0lu$4B`@OC(xS-GH-^Mv%yyC!{0pYQw3 z^8S*!!b6#(Ms(*H>co4y->f1ml_0%b|9I+APyRu2P;lA|!CULns=Fu;TJ;*jm#HoK zpcSmeIhVtbdq9`Uv)5M3g}cMJakkyogx<*k2gI5qr?b+gxOFajzY~JMB+^$a^Tw11c zFS`$Lz2fBA{^EmZ(-rv??L0|^(=+ZbII_n<{?W4s_7(f<)HwTZ@Ri+8Jh{TT#Cn%E zUz14sG`+ie$JiDepuZkW32ThB*qZ)Aq9`^Q1b>j-vFrdVZ<9?>(r)e5QO-1_(#uoI zUfDE%eX8TRYO_F@nY5hOGh3yilXQWX%OtyI7Lc|Quk?i(*4tzX$b`=oJ*IiW+uu~T z&8F<0CpEe8#JlXJIKdH}D(GrQM54YOWc<*N~5Ao0;=`?NgP_&cu;yz+GyP@mUD zuMoMlF==tBor_b;hN5sSc_BmYtb9|iX@$aO8^R3TI2|`xmtMG7F~jk+$r8S%*y-UR zuj3zDem&lE-+NoqhD@%&MzUFDa-xHONn++*c}3;fE5gn+Wn2?E;LGQb=BbiNwdEAUr429AMLJX36>7;i<-T4Qb$^u70`x%3og{oQDEt; z^z2@8W|HFR`lWm<^PIYr6!Pg_O3m8Wnxr-Mxv7^IIf%<1Htt;$o4H5t;6QZ6t=UI& zSM#mjBUhBTXSLYzD7W-NHHj_*wcg&yijI%FA3nuzTCnhRTm{i4@`ZzFQ_JSc9ap|pvzEP2Ov_lD&r+7s zA+hS9(AM?e<`so)(X*X>V6mIcsTT!uibsgA*!tM6y;!?XlO^+BN5kvL3O`A4zQ;7H zL%0Rc3}dXe&&Ulr_xw&}8vC8r`JdXCxS8hf62m2ye%lnJp}$cwcREcZuy1{Ndhj)J z_oau^?;h`+d{wsa{A{+<-5cDNI>=d5NP*qao{a|mx*M}i?%e&LckX6@OXnVSUo+1@ zEzU-LTl-w`H5SBx#rYWp8HHujnk2$1i8X^=2HnCkZsA|K4V&2-_2-x>N56gRSaba4 z5|ODMC$zL}TAPmW9r7%?8@#$g3wT>qIK(uOcfZmA-FjhJB~2^iCgV{gIb)a>0{MgH69Z z3*{?YA*ptZaywD9DcCEvzwP7oQ0WTO`GOnLxB^$baeTb%oYBJcb8fHS>g6jo#4g*O zsDw04L$$U&tP~EprMIQn zmQOmc$V*C_J4}>d%b|WInC&&${_e+2`n@XJm%c-G%a8G~JY%VtdX7uWC5p&WUF_}u zTQUDDEw1pOE0?dW@2sQFys$NQMa7y$r%KZeRUQqAinK&Hef_w14`F4Q-fiJaN_s24 zxGLm6|D?O!nf2wm$Inw5SJi!CHBj8Ai|d*`^!jYiyJ=Oc*IQjTFlRmTO@Vyqy!ftF zxit>QYIaOpo7u!_knbdD(8v|onQYKVqB&Q)9}e8F*>`?#P6YkyMoBL3Nx^}@;HvP0 zwzZ}=I$Jrx(Dd}KUWdW(RYM<+&+6;CbJ4rO^0vdq-2UFybCaJBSZEukt><^21{8_A zVr^v41z)WV8LIT&nZAT(tVdXHw%UYL|MggE|Jh|}28Um++Y#8YbKSc08@;{tTarSx zH{@T6h~ziVk6dCs=|EG$mk`gw;<&U%hcB%~q&Ae~#ba^PI0o1{Y@&>vpbRcTI5hiohp(-pwiH zFMV*?xbcDoSG{vY@Id)s^~uM5A71Y|_0^WYL8aS^C71W0%anjAqyWLW4GX3eXYW}t$d{Y#D&Yg`j`8C#2 zO{Y^&1j4V}b>f}QXyj9YLl5=cV2dBI+OUuBYa5326wBIm5sx#lW=Yh(h{aLR%@6>A^EDCh) zo%&L4XMj(x`R;V#m0Si*4f(-J#;4}3Ji?GUHj0 z_@@trUK!8aEe>Dly=(ZqjJ&e&rA}v{((9WmHi+j|eww58>7K_n696P8_p+LC!lmf* z)v{gB-Ka)H5G^qkN6*+dWsn!HS?O`~I6qcfE1aPX&ej%ZXK7CI+DfuFu>*#7#+Js; zmL|?tmR^pA&W?7*f4t;1=8_#F)k+WFKVJ1hS81^lM)PnuoH0D$ZOw2v7F!D(jsQ!> zvD(hZ;n-}gSn(W0D|nmT5*|3L>~J_vD|nmB9{!Kp9;W88T!_Q*&$Ghe1m?jTg7b`V zI3XiL9B$G=m_*bF9>mO0HsU5QwS+A^C)pXpb4eQ$98Ss=rV;NUi8CT$08*Do(D+l`12KxY$9Gp1~{F z4B%NC>=8y90wV)>=AU(jAK(GqfNp?!TNGIwo`t~5Mr7yUIUiva7hq}!3xZsh+# zYwk;=v&Vh}tnh|1@#|Xx0XLrurw`A#;176216tfrdNcrrv@Jux1087eXpkN^R17bs z8^#TJ!~;<50j~r0Y(M~HfFG0wgnV`)KkwIubOHfSA>v3dDM0F7&7&k+cO8N4`ImEk_zh^k z|0X-q0IRfi297|qqMI=#TJP9DCa+0Z+0XMPq)d&fay<0LHDzn7Ti4!(6{`_AKuq$5 z++sRCLGBQd<2sL}f8m38YE4?fgSs^PPH+(S^tc%mk_mII)W8wgg#j~L*ij?@yrZpm z;s_ihBJ-w~x3{~uyE_1!?q5t%zgG#yeXzybF9avXt6U6PqAP7-tsJ&97j-2HyljqdlMK|Qr=Xg#&zGCd88xA)W@ z_D{9_W+@4CHcv~fAP~5{L~+jWOcWIcqSWb8>QzzdL$SVqn;th*1>dI%>O{Rdc%b*g z!x0sDs5$}2wxOK8bqnXY={U0pFId!V!4J zwBFal)6>(N01jO?^^Ww```DD7pm1g;Iq|-40eE@o`$%)i zbP-AlD-a<^sZ)|~0unE+LWEX9j&`FY&IDvUZ8tCNvWPrMo*d&ziSGdvJb{dM_gC9 zVxH2`wF@L@(~X~2hqzY0je8Mr_~42i32{N~JBWSnDPd`5xT6twb)>9L$lM~q**DlbeIR6Txz=Q&C8*paP6zQTW$=T9LU1?vd#54uOEG3wRCvO>@XZb zY)mh^ySo!)KuUbGE$U@lRckDcAU^Rx|N6oT!jFUqj@h|hc(6Z}TNjRDU_HS74o4tO z{HTadOE-=nIq}Ui2R(9d1gVK{t}qjXg-)LMrtQj);lqH49e-V~cW8z*TuuNxqk9jc zRtc_X>x0Q4N=nKy>#k1Kc;lmT*&F~>Kkmn9W{Fx+8;{bNIM1KOOviJrm8)fV38Fh8j9?8O7(LDuH?Y$a!x3bWVq-30^pX2C2? z#XI1zMTk76*L@jdJIH^gCaT}$d3{%4B8Z39R?KqEYZcTIKYpO9X{l&wO;b;{zNuKR ziaT*@b=YcdOP-CZlV^l+9}&+`PvqWz{iemf>&X^Mw{C`&?!TUFa2;IU#b0LyvtHWP ziYNnwdD>QbAT#q#wT7IRRQO{&PVLF#xM`u*E7wnu*|XnNYo=?$+K8B<1jmZn?VBHM zEPolMD;9H=Z+C6;hWOZA_E({tYCRuT`Sk~}Y2TW3&`T)q{^IP!jV4P2rd~R6V{_ia z1X^9h^kejLU|QQb2Tw=t zhUpbYB0+{JKZYejwld}%zB$+cSolAOHAb-`zJ=j+Bdn3Z5&u8v;+VXCkRz%#6nc$t zL}q`3pTZHXXVu*#zR}8(JW=HJTa>u`uf`GAo7YE@s71idgu@vP zec%x_6cP<+kQ7G469z*}htQoK0uPAg;fILu#|3{Z05A1mXcy#!QY-*bYV>(A*dT@s zLY@aQwqDveh77VX$RHak?vG>;PX;(x1`hp2q%bw%r7t9d%0w^~re~mcq?tja*^)>z zdZgK-NVB#`v))KEKqNOhAQng%3yQ&@l)JVRGNi~|lN76AvI3yj}kK^mOuXyABzktrl~Q@EK`-f;Tq!#i>2 zkFw?{J4EmRU2sf@8)guGYpoWfaE9zSZFq)eF2EfPAn_lH8v@*jF)}C{hNHE`I1Ire z0iXc9lO+vTk_f8;7XweE??8?&q9nmSz;p7_j)`!R7L)1fl#~+ybUSJ-w9ivp{9#BR|A6L=SNUSzE3sGP zoB)m6|Mi2r(Y)3zRr#zRoH{lOm9_om^wr$IR@b0!#ubw#egm^pXW2PG;)HF1m%F<+ z0B$`?wNVS6%ySs{E8a9Qg}GRN7*d%3E82v8t2x@mL=dz2@)q>g6;BZ|A)Z3YBsYoq zVi}aFjY`-$Y0|lQGkxb7OaeK-O`ZzrG_c=5q@xE+oM%X2A2_j6OU=8Ds=1O%EhRKO ztIf)WW*BH;b}Xg?Z=w7KhM zF%o!9!ZYh3B+3R(H9x(8hbh?XUEN^{NCdD$Md(~hSjrswoZsKcPa-MuuO8hs+_?T(gV>!&IkgwR8cA6#P*^caQaobUC#||{>VhRH zR3dIV9PtbS;1M_UM~1?XTH%M;1B0OuZ2<0}1zieBtymc>00GE%q*ge78n}`mHioDu za^lGF0VG2P3?{@BS)t^kKP2$l;k_UVvJ^dTK!u4B=mNTfFx_DR-C;@OQF>(2F}lNT zxMt|Je zH1Pg3{So9W^Jrbn`QryU%dA60o=lfI+wufec~JUPL8%LtIm#85nYhhj5C=Ck;WS0qD+%Kw{;DEEWJSqYgqKhmqlBHbG~J>xQ$Y zgX^EZ?Go6LQiBV+RWSw04e`9GXCa9(;lt^`GX|f-l9;$5It26Y@fe&5zz0at2}5>( z@a|SD&)bXnP=vrUra4f|y52fHcth)<| znJ_6}mmna2-RtxOrI?|zc_CyoSS(uR?+MFbuXabcD6hL{8RRlp07!z3@R8(NgaF`j z*9Dz1Oy+204=xwp=s|Z8jX~li#94TG*D@$f{s@KPrSBlocaWoRQsQw)s+7-5%O`1( zWBe!yB7nk6s}x})QOR^JO3F7_TwMVux}G_c?6`1Jf9e^QbtSg%1Fx6X+`el5vExPc z^z&~7bnVMqz3@vuKgf|5Op7|$vwrLTP3BFUX99Pj)7!m%IsGO{;zcWm3 z>KNR{;%QYWT4VKZ7?OjzKyKqmOZ#3nBQm6VO{$jq@w(w`N`2cbs`E{o>mD=+7hMHz z>A0H|bs?_P5)_dvw(1spm6do<;U3-$1=C=Gg3lsU$XJ4u!v?W`EKs-mEu0PtbKZoX&`|Mb`M&-0*$#b`ZcMmR${?%yCTSwQlQJSOv;YDMN zsgg97=1AcfvJ~_MjEYQs{M=&iyNXe)qrbS-; zr!61>!LG12eCo}e3f|Z)E+D{cPJ3%YO2&=JW}=Sab$kYqp65GNi5q;K0J#MAZV3)> zZVmc|D#~WGA1?O=uy?)KfYv!Kb9A?@b&KCP4k6@2Wd&W9gd)TtO49t#LJooD7*HMp zUs{1~BDzm{;2(n}5$Rv>KrJ^=_Jh-I;gMC;b=>E5?idVqx`7dcIK;w467~f$qPS1*vD203OJzV zDsvAwSV3lh3LM*-&Z12T+>WgHq3mS52Gc4I1P zP6XkkCL9{O)n%+3V1jY*Y;JB2;n{LZ#s~C$x!s?#<`RTwwr9E)GW32tKYa#-XW5X7 z77hf@VstSDEIdPSMgk1sG4PC-3KUrUQ;ckU*?tt8zi(p$o5TQaIR+@hGt|u(1q?iM z`*g8n1cFC9@8>}9N`;dF)cAXTyLq_p>F<1z^s7NI_xu$CqY%7jQ`C8zu(^6z2$p35 zNSKJGDvuZkT8u+_j00du-Nk4)2PqmH+B9&6FRuv!F`fa}2w8szVhm{Bck%DT0`Uw2 zP8KFb1Ai2WF?KNcVc`XC^snOW6NwO%G4+AEfF&^w^pW~7>H_P8>T~0H?->NmoD9(c zLuA2(=21xn`irC$mA6F>Luj6}ggJfu05n6(mUP8K#DMEKnX<;dC9nx1@w%V>3ZB2W z`n$g>P6gpJvjf>i@fp*B>E}PphWL!t!*1~HXnk|DdOimTyL(E49ae*AzMQ=*9yfd~9_IKtbq=GJ3mWDMfc6hGTD+ z6yrdg=v>$W0Q}bkuY(|tz3t`Ae0ugOF6VJ5zRceW2V9P@nxJu(Kle85$RCyj_|s?L zmcqwopjq~JJqEp-L=&*#=e%d?RPZPrd9c)+|$Au-SrHMJ994iMZY&f(fEO8y7xOhg?FnF=G8WdyDIpjzZ~Tve`M5ur|sqkSN!hc=5?rg>y`wV#CY-A7yl9Dhf?}3gBQ?b6adGs3U|jL|JD;YaBy4BBL)JN4_1sv zes?B7o_|auLFAu!j(naW2=(%{>(Px}xvzzwzXH|=3;H9{f4T#S?m(tHP?F(- zpEuHs9BD>L3W5}XuFOkUCP&Lr5;h_V5J~bN(}gLikTk97?8hxwR}WgIl!$MUC}CN0 z%Oz8y;K!`5+uVIQGKz;`! zCi1gh)ZwJ5a08oOujq}W+`~zxwKloDZuVE(K5sgl@#AGqS25?XI7L>zc=}vy|G+o~@_2zDVL!Wc#m%>zTBt z;-k2JWPoDs`}C9je{ubX{8fsf4I^CD>mB}j2jP0GO&(ku{JDe5m{ox{Q>XjKRlUFj zxGsMpOlSP6UUK>4kJ>G@0OYo1S)i%&i+XD1e+1hx>+=7PRlPfN>i&7yz5!x;C~WgE zwhznVN3cCoh8!tFNjfv4+89pvrwQtS+QDzh>F@s-q_%J%y6I$F~`b-gu+yQF5i+-E!B#t$jXf8kwa z!WI7i7Q9c)u9${|6W^bBp?<@L0C+pL?L&Nq@U3@5Nj`B7XrW#M(OIO*D<6XMq+tR4 zhfvMu2%LXh0RPWfng2`y{IHYjumFDClH5I|2-?8|x*lC`r0;$XQVZqne-*O|)TS>y zyx`Q^9I;;wsatjBghr9tc8>GBdnz*){}-u$45^x>l*Ge81gX?Vh42S?X$M7U2gy;*l*Bm@rs)I{nM`YBEW$OO>;W%VS92cdx-@f2 zbdOuSRYI_n_B-GHlb04QTIGJiswLWU9)4T%%0rKv%CDd$IJTBv0HpNd8&EC3)KbB} zehse81MckYRY5Ow;NG5KxT5^=HTV=B<^WYc!LCY0egeZncw4PhHy)U$+TD3BFwgNz zam+Y5V_aDRQ~bbEeC{~cO6nGd5MD(Knb#>VxM_avHjJU6M%AF!GsW=xEt;>^UEPYy1-ZkyBEpppRQnE`g0WQ%#amWc|6pt>vf5+ zAz(M48!-Fr=U}@5XG7KY(Azb(Upm&@xt2-()v#TBAYXG7+XZ4Yl_^turem>Pi=W|Y z$d5+vgjW29QR*JhF&^l68Q}cmX?VW~8orW_V|B1My@(9?xfuFwWG90$UG)8uzh?d~ZFoM3uM#zPr6K+7P~GDx(H zS+tC~w2UQ@ZJxA@<+KcXWa}1M#t~Y^QJA8VmT`ejxk<}tqh++i^A1`DP`NObKk4%> z+%}P&xIP|v{sThchti{?r>xg2S-zL*un=kOwHU3^LPasP_S5`nVDx7|qXD@C#3ZP- zcZkFo0RNcUzF3uE@ZVrtPG?ohBxO8v5X{UGL#G1rAo0YUn`LLui5lsS3#`D#Z+LMptvArieip6cbDJrPFr+x-?=J%9Hl|{yUh8XH^pv zfqGOfqv_1K4SIPr4Yki1T#ja}b_`B?t|7_tHmtM3^DTnc-^pT>Ed`%vvtaV5J1 z`*7zLC1?yFI;OeNETHSPx;SbqWmsB$C+Y-C-OL z;K4Qz2ohKYQ6gXB#+i_oV6DcyIF2t6WCNGNr{a-HN_BSUa%9|xdEwAO01vLp#gw7@ zZ(coxMkvex6m-$pP#iBL1fhtSjTiaX*zD%23S;&Li(WA0~x0J@dOi1Ni`#R@ambROYdd4*20y>Ulfb78o zl|j>Q^*X931G7yHTer;2DjL|hT}XV^7x_sAVOcTm^St9EBfgw{kmP^7BRRBtmiC1~ zZttnV(*M2C{vBPvh@Irm-H{xVob&iu^i2S zC#3;G^tJa*0RP*vD<@5nZ@0K2Lz4=_X&x-8%575l-pN;@Z7C%b$i}Z?lYKKa z=}6>-=Ub93%tH@7DA}wqDJi#q;GI_UjmDDv^^IB)pWk-fet%%Occ{;y-o*obZ$2d! z9JO=SO5C#T{IxksllY$NG*t_ld^MZfy2*@MNepUGm~&Wbo6*51yrpxkr=)hC+q6m9 z=B#mNdbu%|Z3S79EzM%Dshq4x?ZeQLe%iFz4#})CWi3JMiXYCzU+uY)$R!$hhV!sP zySuM>Pk_RXndeWxHCD5}?Q>`2>4dxZ4{m2$%TKTTc4{`!f7MIep73){x5cs~I?ipA z>&UMj$V;jVuya{ag!g-WFe7fyid%O@ID#y%nYK7*RAq%t@?7||*>gbY+U(sv+ZP%| zJiqTca6fwYG0R@X(`|+#%6Y=q*?DTdRZO{Leqq{k;_Ap5FY9+bt$Em}+B?tc)E+Tw zkEEh+DsyCX^KFz&_j4*u;}sBZFAli@Zmxga^nA<1WRDMLX2hv^Wu+U6r8}8zP0#7P z6?nOxthRL3feV@Q)-P!glv#bOu8;F0?JalptM^~{W|2>A2>fzBK+=QTr@E^7_OX3C zBpQ3H69xOvc>D2G>+Fd)-#Tb0^-VC{rhCrXbgA^#XK%Tu;@3Ip&zfp0y6xu19+IMZ zaE_n#F8^8TK{DG``gltQ4=s{Rr8RF-*?98ZcJoEjH>*09e&li=2!k%K3wO6XfP?1; zELbEkpNNay?S62+f>2YFvRtENw3_39F zU|_GJp0(;zIR|Mn)=l?3&tiu!R!d2<%wP|O7N=FBy}7EPFmr=`~?w&1eh6+}Mm zm8se&Xb~va@#&_DqO!M_m!d^9QNX)DIooNC&vmWqR;MRH8;f7hB?kLybF@Ps@3d!K zzZSh(Ue9UJw22|X6J~yG!@Sc(4s2|?p@hEHo6QCsC>f0OL1HE=p>n8d_&@<33iv^2 z$Y(RSbopxiF1iq)yYsf9*o|fPSfB+vqL|n!j2&HbLsCp(8i+~by!5@$ZJWZ5HziRMq1O zRoSMgMirNC!t+O1 z&6swlW%)sWp4~xTQP)3u$HflwfN+XU!}UVx6cKprJbMSF(Yd;-V*8TpX~QZ4Se-XQQn9|bA4&s`%JE*+h;ocY8=KdF?#kWhgoz`sLbbf z&~_|`(PCu?4-qbah0#cow?7*GkHIbW!x*;l0PII6=pP;Pp z7E%}oPMifiW4JPfMGc*cW`vD}FI;pje8>Lh;2}&D9@YYQh)1mNTs%Cq`NPBKFnAdH z3=d#P9>%JZg}>1k277ia{gfC+{ z?DEoflWDstsaHm$mch1?dFeVL0i*zO%sEOtbvSNWw+JVh_K}iGghIjAZ18Mm;mo_! zrcDAj&vMRrG^l>)x8Lra-D9m0*SkZCTxc|tT1jY-iD`JpEph1Tv=04AXH2j2-^NRI zL-K;Hu$Md23hLkL0)%~okh@5Lq1gOT^agvJgdRiuM9Rl8&OIo?YqSs$4!-DyoO-yn ztQ-kaCMU$?1?p7Q5G_PqRKuw(w)t!@I++WL$Q~j1fUD_a6jl^+7HrXQ@G%F^%rHGG znB)f}Vze`KjAQ-kG;_{NHr#V}S7haZO~0txs3NuE z&=(O2yO(d|9q3|N{)G0J4b4WIgx{<%l~{cBZ69rF$~OJzJL_FbN!F{9zb<$l<%DW%ebsh_v^Pwle~r<(*1--SG z56K3(Jq8k=Sapayvp00GUUcP+JRPm+`Ygo`IbllUP8+ z*7_xAa0w`4daBx)UTtrDZiPVKKQvGfcBq6bv zDRJINbW#eXOxt8(7)8S3@`dRN73Vz6kgcK+lbyjtL!BfH<uD>t$l~3{Uj6cd?2gEJ^BuHJmmW^c zSo(SDKGByp8F6~?FXm=YFXx|dJ-gdZvRCryYx1D4yxgpdS9wahXPRkk2r8e_in=G^ z!!20TA5iR!nFrjNjI@nyW-ZL&Pnm%<{K}Q~Q(!cl8H|8n(`#a-a}X~e)5&8EqDbE} zh%)}ixFQGtut5|<-NMFjGyc9SG7k$YF4E`wzQ~cQyp|)H06mX~yYT0bgzGNfeiK`) zaPFORD{k3#r-5IMBzPyOwU3g73w0@rH4hCHVMzie#?-G7`b!tHLisU0O9He>NCdDe^$o7iZt#E?w?6t2U=8*r z*dgy?%zHti1_qzNWb?OUe_<-RabDp~C`|ZHGhAeoEyqE82LUK9esmZ?e81FTD2Vt` zh&O{XodJC6_qboYNR}{cq&%`i4aD$WBsVhcJ|%SqbX*NRCG`A&Gs~(kGgwTvmBS$N zrf>O9MQtlMpIV^K1?{PSHMqvUgdBFT)PfVn-3Q=W22U&m6NuJWLoJ3P156O!3u`G~ z0u5tSy#cVzuA7AvuQwO=LeUdKX>3`T;c&ZpDbj}g07%ohBYl`C?UQb9#iGh@Qog0t zcIbh0NF=+@wb_Z6WVqNH*gL;wi?djrP&)tHtMJ{mT*hYe=5r7OPZJ~bcZ>E_-AI+% z{cb^A=h}5&zS4{mBy~MF=FU}V^0D)gnpbe$#InI});?R=RSP%dlKgUBHs5G5__o02 za`7#3ZjFT&{2{hdiN&W6JS33pJ-BbzkxomL$X72nl%4dv>dLOSj~3d*yY+f&h}_Jz zy!-g5ii>oenVVj+jZ!ORsNZm!_%8JadMytUPH#Bgy}IVxt!m<=1nP>}6#<9qwJ(=T zhi)8tWs{i7<4<~!<}PKLx5VVZws$oD*|)ASic6$k~`o~c2T6>ei9ZZxKOh#}FKC_o5^J3t45bV{@cNhlCS=$^UJ_l092rhYwqist84)m~kYl1L2|Y-( za0Ln$=tN3NJpg;+0*ObRNh}%LQv|AYO&8v>UGNEIC86jT_-Y}A8F zLraCKGU`EP#Wt;>zKp7-rmCW%X*B6d{+*_l2MuBeu8N(KQ8!*?9GN)7!g7X3q8O+$ zTyo$rtj#rg-4R%uaY}||a5uR9=Y>w7`tB%fE?}hiAFzUjkQJzQ6&4gdyl9VC9`>Hz zKjm2i;sw99_cX?K^Si=??^;f{S@h~7HULdMfA77Y!v})y{&r6N9J@}`;~lX_Wa|=t zH9oM@)23jQ4|sSG?;JK;pZXm>5Ww@LMCmA7JS5FGPcf z(TsKU@J@1QM>)8g98X|1j6wdNrUbMnIQ6oBYYzR|T zh-TyU4Ktp^PZ_Uo*rco8@&3BVVG#W}y#&r5U~*;o5l9Lw*>NWD3|#@|qlREF!6>GK zu&-p%p1S7|FSr2C!{fsX@X&7v4}-Gs0RO-vKz0tC`-W{Dklh15_-^SR=mE3-6GT60 z^oAA;DSMGW7`SB_c5OU8aA(5j&x?Mt4ibZvus{YefRV__(SQPXrW=k-PL6&}NfN?F zKjCG#;gMrbQWE5#kpn;4GFKHjRd-TS-UD#>U^Ko^kq!SQ3~q*jeYWe8~cQICQ^+_2V&~=crU4Nv$`*; zZLmMl@i<GCu_^~Y^{HzaInS_QdFjIHktA*p0^~;#pJZ$qNMz%vDs!SSCXi9{ED0>7Bs0J z^FFor?ri&84_ZqKmpXgK`iNIGMOgXp*p&&Y-e2BkNsiNt*gJpErgzE_3HP5!cCfcw z9uYKNHf=r5k)FI&4Sp&QUDO^ZkbPbUd=n2m3`Jcm5_zOxJ3SwDpYabkKx$Eb_{j0H*m$fo{qb%j2 zlFK&Fc-ehemO|xVIHWKPB`5R*XE>y=s6Z=DXt{^bj1@Yhe4>2hKn^Kv41W)_mimrE z3fkS37V=RM>yU!=O@_v!S#Yh2@kv41qYY5mA(S8C~ESU7&$3@PsbVNf!WH-8ZSuH5=j-nYU5ZuSX5P-R_-Hl!E0v zL~SNBsy!Lyg|K`G9RS+QZS*H>N&^Eq&*3tQXiCTIefeYRlXS>!pmFC;=-FM&T6i5d zdA)^~1eAVy+;|Lwm;bpcMGO<-1-hybjDQSfRx9e^15+rAkgE!&&(&eQ)zN{;cAyXh z^?8^~?+eIwl*E|R`GyyIL641l3puP!Hg)ffUq5H#OSgq@8qT=#1MMgDK(xI-UG;G+qhyJxY3rl{iy%m4|iqT$dB$P#_IN0f;lVD)p~;;$a%2g7c|QX+a942UY2?j~=>Ztq9S8h^ z&IFb!R=>a~f`2Jac4fCMv(43^ho}pDE)oLXSq9B zTlD>g&SY1Gg_7J^lOo?O-~IIKny24N)!X{&+65c97xKLRO{>!@GZJ#GF#;ubcLIoh zIWHbHleK6*j6?VDizJI37|09eazGE*dF`s!NU zlW0Cj`;-RFqmd{^NsjsHl&GxKD)~L2Je45ytS7d5o(eCVxmdQXsFs5dO6-qkfe%8V z47M7`69zjzujq!t@rML6sHmuED35FzQ(CH0rqVdp8OHY0@rUxTuCS9i!|D(+ z-x9Ky>q+CX=vq!i?`)B9%K?>&Z1<)4djYM*uSjZ}LA3+RDb+LN$&UjiwK!|^KB5=9UxK+nY{%+-|L7bePXvH^ zA!RvKn`$u(L_|nOCdDA8H9lM|)bt>xH9lPJKS#0iH^bEu|I9kXZk+#%=1Rqm5uMIR zZ~Yv0G(XQZ>NQbNCM)f#j#SCj>%SU1`X+wMdz2kHoRZyd*YbD@mK~AD?AVqLzjr(t zG9e-yfPe;vF%9&&p}P=bZ4K@f5IMwS=G6Hvl{-EJ;-zx5Uu0A zjG^JKA@cpWJ%|gO*lb~&#@d4n8+ndy{zjrx$k7>;L^H%6ctxg=XmMn^EhYIQgzB_7 zF3OT%4cT}d!9;Jm77yK~J5~4G+&q?nJ%-Es&M%2`_^*Y?8>sF2;|6dfA`>{(snj)8 zO-}%xy)8Nf5eEdlZ9(w#GfYG#wsT;{lf`~57<-qUM$1sd`u~xk#AkK@G}hdfmy=V?N8vW`)oGT(q38uzxYZ&uJWJ6Rh>ebcHh_W4OjPgyv4`aFCl1QtWmhBsa?!@ekGjL9kQCzYY%f3m z?{6=^e-=Xuu{Hm})olAtAnzdT)CKFSp}P#)b^o~#0&8N2;$=W>L~Be=2P=I1a5pgq zqPQ9>P7$7=D?lS@NE1NmfQCk5ktPxhXaWr+lQ?{`i19=2zvzXX#vOo$3|#Vp%!zly z>5qX`Rh5gkebAm0!s!$7MEI;Qa6FiGKoI47PQwY}{FtrgVf?sQ-~K4|p(u60jm$v5 z7e)iGZGgMXXT-&(V#wyR>>2;sb6!*^e;Cs;Pfu@8JXpaha09g?ewV%%6hB~7loWd$ zhk-P{v}Kr}6nl21=R#UACU}`QoC=OUGWvq!ze!gHitLUIS0X2o}wq`G9Tpe3f;+a3M0>+PI)W|M3p@5y?p7cZ ziXX69rEQ^OoUVgvEjce4aL0O_+H*zR)Hh@5MV9!|J^QzXw_lQ0$a}7}z&s>u=;^nH zrSpTWQ_?x|#9MnbV^}08$7$T#X*Cbe4MiQ0ea3n*7JOYKmGaPQ?IXQva$i-brc6$w znyE$hY6oe%y<2HJk67;r&AeelKOk%O`IWHC+se5`E-zRGcIyPOz;DCRbP;VBZpW~p zzuJY~i{IBBP7{|Xa|p1LA|@3V!c6|u-DDwz7R|H9kaX)YqzTgw9zBK@b98CwqB|~t z7?LJTu)*y=N1zb&9ZS(aDrhj;llwXbV5bo;xj8dKf(oA=m558hp%7a-BLtV(PAz}FY4kyL4<#qYu{8wG?O&n zy#MMyAYkZ##w33C9w07o$p=gxFJNFc!*jc&9pn)cuN?o@W_2ELrgMblF~tkK!#mWa zI6$&693cd5PY2)*wIn(qM*&Lw5Rx*8kmi!<`zR?8(7a~n0fX)-B8x-#IV2i~M6~V~ zH}xGmJ)Q2{bM8n^X)f6G{xec1OkCT4>#lUz0hYtWp_1EqUm?aoaL3Zb?|PwNW!uPO z6abrr8r#qi``TTE!e)*aFfhl!htS)iFu@Pt`E*exeLxnRcj`4%wT@HORG53uLVt-j zAM-?Exz`@AK)x^I95jBi+(7}#9kgD)_zfL!6b$+6?z+ly{m(^E`;HFRE6 ztjjTq&c_10TLWu+r(x0gpY7A}f4T>K@f3yu=ZdoP#>jw^kY0H3uTUFnD-Rbhf8x`j zh2#eawlUECdGp~s2-{-o`jCa%>dP+B+4P6QQmuhDo5AoghRr65vkC1Gkj*CYDuq&l9c0=~O3F^G z*(MAbL5@kFB-}$duL!jXQ^<67N=h=+v&wPc+7J8T`*R9K9ZG&P)m!^CR?cqnweXC| z8{SzIJuW;DWh?gB)wh;6#6R7whSpRBVISKL>^rTvOoC#jf1eQnwmsM#VE$)2(rfam zQmJrN&jCGAecA|MAbkEZWXeQP@#Y?;0oZcApizv1&=qAg9+*4QkB^?2G_v8m%CwQ_ znZ+tvYIS1`2DTmF|6(f4Wq?Hmk6_=>Sxa^-TNs~9y=JkBrdHkPb~!M<(|o0uvM`H- z-{Q-MnZv|PZ{i*Bgdp?Ox#6sino|B|s}0QIFLnM_9MC7KBixd!)1ya##efkIGa%w2 zv@n7MVhMf87z0a^m;8?#SQ^8!!9OJ?CM}cWNB6J&eo#zE85j%qF$R|3b@uq9GUJb2 zVya4`A>tCy){^-Weh!;>*VsM#CA-3cMp+_xcd!00XKT4f^;qdBn@}#`$^li<0$4VI z4V>u_jRI#DM?*yi4bIiH^kH3i@vwShBxol5Xz-lT0RQ;WZAoNg@V^i^Q<(_Rguzh3 zM4CxOnk|Vm^N2K~N1Am;n!S!R1H?CxVqw#x#GbH4iM8s}Sk}rk?m*g%zY{RigqFgb zg?`MYG69DE=D5zIWa_{Rw$m;>MG-S~_G)qb73aWO5sZoPS0~K{1ASs6W-7(}zxJ*J ztck3PK04A=nn+v-5<6-Fp^JzV3l;=XDM}M8h$41TNdW0huz?j73n<+#iUkl5lww;O z;99Xa?Ed$qm;tG4+<#@+{ajX($;`Z&%(?g6bI-NagECW!EJJ|nnu+OxlIX5jj-VpJ zJ7kBwuWEv9Z-kT9E2G|3TK?D^(#;)cu~)uc(!oMW0*xzI2Hu!)$Z zJyB&vDXvoxUUzeac*|(W?R)t3x5;T=8mJ`imW~xgpX5H*;nyRKuGc{fLfpQ2x9<&L zju{0Tvdl4Mg1_M?1nz9Y$QxEoBJ%Ke`>JHJe=?rY2Q`X4BpY(zJxUlE2PDc0iX=TU z(}oghhS2OzO5*Wz;-yF2Ek_qLX~K#mG~?C2>TM1qzV%7FA?KSnAZ2LSsxG~D2YHWH zdG0@guD0*K?Wlnh=yT_sJ}Yo?%fRK8Dm|v{fGCA{D&U)nkj+E;`}kBDq8C2>j$sP% zglgkqWZ%t0Q#|kq^?#HY4j>B*MJru1`w{2^ExNZ3J>6SZ$GIvy+gn!~5APGn6i^>D zvNI+Rgq0Y*;InZVqN!W!;^7)ScniQ%LxJOWvu|#=a)_QqFXMzOG|h5ax(5uuqPXAf z1A7{~A{6fl;uS_Mb1UzQ=M|GP+mt3{ar~WK>Y#@gW5-+_l(_U&?7%>c?wYI1AEZ6e zV-BCJ`p3#`D~{fdP7jus>?|ZZd!pf`f_uwnT#L61i7j;U2`srVYV@e2cc)c``Ak00 zdz{3%vW*)O)AxA~ov)k_M1Pbk*?qvKpkWKG=WZVm7_MF*o-yv`!#KT1**yokN>2B) z-N=}^w70!y{DhBhrYGIgU$$1c^x%&0h-V3R7`>cN^j^VW7{!@Q9T8{lk$1vJ(sc5H z_fMvYzNom${6pw&@VEyz>ckJI96eI?KKXFnx1hy76g$(6rl$r(<%NUyF4}74$;2mK z-$26?@K`##3^Rw}#RcnFZ*U_UR%$G5A_xh)emWers6F*a2#6o*+Iwwyo4N?cn`h>Upa#-GT@+BtaR zp1ods)Nd9^?NDEQuf6dsJLM#DxseeX^V!OB*^{@#$mlm5i_(WR5XG5U05eg}F$=1| zOxzTYMF|@&zhME4ML9KGzWDtDy%-!DwPh^I@HUP`p~X<|zuBJXV`{xl2yM4cn|T7h z-x;>sZnP8`5y@#R@@RG_iax}s`uIOQT82Zd`0hf_!)Ph_MrwuNyO7^c`k}GUWcjA zob>P~^b2>G)IiUXB8(Jz-}uCO688=;Z-bsA116&Eg%4#-MB(T-2m#Uyvi}@P_)eGz z7?w-{4Gl;+3P{WXGR#SVm=waK!!nG+3PvO&@@gSUs5};ORDjc`A(=^~L}q{z=S4K? zBB>^DVL==+6gLz)Q$GH*UCx4u_I0~H?=;j}IW}_Pid&imv37f7Bh0&dDVMK z#iZo17ZktRe-#+8e6RKqEA0ber4oK2mc0Ux+R$hR=PtSj18>@!!x-_7*relCLq}45 z3uyR)pY$Xf0CeKusddv*de#e`@Q^-XISbKe7VE8KkXp=D6C5?&lRP#WJ`|!R;*QMV`R6z z=gY;9_f$VJYFJb+vrsjeUdQhD&R;KXgDzGVc*np|-v3y~?u<2UiQZ{&7S1r6^^Tbu zUcO6SbBfuKN8_JSSVGw?2~meZO`^lQs?W47f+JYfA*~=!@-ZBB^kw#~=ii0xyP!Js zTlsf+v|5$XG^+z~FN@W2Aryfc$T99(+<{I%7EQxjPW}WVAJUd)t=p3ld|dWZ;+V)8 zHtj7KsVUs($PGq@eKPRg6?OJFF&NPh7Ep(~8Q;;EqrsXCw)7MeHIUS@8$h@}&r@HE zV{S$e;Xz6`0LRMHhQ*6N9cI3&*8^LB9}Rk7xu4Db+3#Xz81%sQ6G3G7e5^79YC$YO zi^gIXK7$q#QwUlE7=j*y7=oxTL(pF$aXpSa+T!aqd>Ze`(mGNm15Cp^6^*^1FK{>0 zZWt~DeO@u2+*05{OkCWYc}^A{o}1@kX{j(vTFP2}=nzQ=L!dk+aP!&K%$AvWcI*Ui_;AY0A*(nr>Cu^hG+_E>*%P{ei?;Tn8HNJph7e z93v3#j>hXiji2+>ou;?p6975vU3gz(93WJdX=_onb?J1i?-pLAeI-iRg&*o6x`r0K zgHELnj}@t7$UU9*}0$6@>)|Eez!o-wLLpG-gD1LTX(Wk^xLb<&~ z3Z*A)S8yW*ktzJ*3?{%6ZVkn#4Iu-3T^PvLUGLfN)gIb*@n&MD zK4^7egeMknzL-8Ht|j~@YU|vZa|{2D(457@3rZWm4>yqzlMvre2jA1t#r%&ah2aPm z-Sk$&K+kK)ZR}&(>=rI3gl^hj0i6~?y^Mrf+6K~Fh=bsbok0stzXsGRg z`U!|Q703|9kT`S#umpD%V74mgIz#@#48cBvXla0IxUOMzIP2dQ9%bvv7lsW&^aARD50s zKtue7IUE*Bm50tL)LE;n+E7$!%fZ{faa_#e*oGqJfPQl!Gw2Rzdqe}>hk1_ z_6E>Lb`*+p0s7Vw_a}o-m&i5&(2zv1fb0^H{1KVSc7(!8Oft;t!WIsp6s&P5CaIH|MwEz3u)O-9ATvqr)5y^>X(-zz z!Ox+}I>21$#)<(KZ1xA39tyJmI2!s5I&irW{{~_N__uL+5W(9GV{17L@?YI_Dt8Du z4Ny57(6uT*%1aq8wWzDt1q_}h_Wc#>W!akLkVCUKR zg{2($jSv(+j*z?FuDA?O+@(B6=nH3 zh{AUOf#4A!m?d~*xd?)(Lk3hfGuV7CNAM^JKPR~$p#^{W8vjFr$NnNn4Gxb(s!wA{ z_WXffztsq==rhiZC*so~dD3Cf{>#GrorH<5-2F5MHHW6|`>L`p}Wp0bX zkozcQ(`+EtDME+@f5X24mj(g&1Bypv`v*e+OCFID`~m#7QJv%q_}xJ6cw*v*w>v$) zs3U`f=`6Y9;;)xqU>N5~QopC!9ANO8EWl2Hyk6GlE?@Ejj! zS3LtrR_ZGmot;7Bbln83%3%iiy1R<}>6B!|fQB&`K9Z$j{GmS2U^& zAO|j|gpR`5_7#PJr1326qCG;DGe(O1Rh-!6P_#tX6_;}aC)FH#u<*I-A13Ps9gRqZA9!q-H$q6Ap%FsK4v@IiU^+|b$F1%8_gJf_k{iK)6ObyNKYTS z+G4>@3d(29V(3VgAR3WxE-`(ulq*2Gn%S|+K3#?E zLpv!9gPZ4`eu5ZYU>j57?N$5ekucJ}Iz%4dUYWY416XhQN#0}nS+Qdf_>W`r9%1<@ zCpvf!BJV-^f%j;}R3c2Ad4Lmman_YTftUWZnX?_D7t4%})1KBN-c7%~1zu@;t17sG z*GZ$xD@v3nXAuK0LSf>+%zO0fgU-F*;DdRO?hM(FEd@|S#?-Aj-h*Yi-GKMF{J{Vk zH|rNBdOrTr1)bNR@5Y6ReN*RbZy&_RRCxYPgfn6jAKwcTgi)q4{yj;|uDB2pyaZDo zDI`V|_Qj5HMQCEsUeRNkF9lZ3T_|%fY#q9!RQT4`b*Sqb*F8cOHS1oBA2YKu2mgx> z*5g;P9)n4|5CpRxWwb1+HZ>!iewo{j!J$1C-0NO|Q-%quLJ92nrJ>nGS(!F>TORL0 z&pp-`D<3tsd783OnfT4(Gw;EGTEz0)lU8s~j%Q=C1ogs{4ooIu^4}E;aMqPSn4C6) zwE5Z=`!~x5uDW-}KI?LO!{jA@TsPrj@+78DkL|M^ZggOBN#V)}JhRS9ZoLW81 zsjvF%l+KGkTbAG1l|MBM%EBF({Hrjzp+ca7M%U8P%%ba1xt0#xS_EAy^Ep-sym6i! z%m(F#3V{l!MQ}Ifq4Jn<6ZT+*z~|FDxBhZejsjF>Y)Lo-s0_%AC46tzkA2yJ%0yKD zF;muWS|Q+dIcpv#Q|7EIe^5DJzCu+yPCdL}rK+JoYT3s2hRXWnce-3u9%$_nwql3* z+7481h{|h3`k+gCet8&`Pgo?%@C}s(xgvW4D(C3WfawbL6#^=2Iw7$);WW(D4coOy z;unB2A){|>z5_4c=bwVl4nO|jj2capu}K0;knO+G{szVhmKQ?SV zGc78f8I`Cl1Xs4xWt<_eG2Fh54aTPW^^y@ta@MDS^@g8dvx6~4 zKR>Od0e9&HHb_6}dv9F_NE1Q&Z>7%Q!^mA&{-AVFnBND9=bMe+yH%W5Un8L2-cb6W zbI~*|N@q{g@Y~!kkzl(s?0{NkKP_!;-na0fW)|9YMM zba?(b!2Vb$%|T_hfg)ZQ_ADlu04++y1?a&`aso#z50PJ4}*bOsi`jw3AY`Tsv zU6(sW5ogecK+QL2!Hb?lJ?ME&~#C7m+_mT#>bLK#aAMuKbpm6$z6@Pik*SJU7nMi;Ki1 zImgxSClwwhBC(nv!e0U0unfKNOawh|f(=F@rn&5~{k?kL+787v!iX*`KLkT@zT7Mr z_}BbEC1U-Hkca-Ls8?zPig#;fyEla5Cp(sPL8N7R1{jL9?#$Qt1yD>l(0?jn_JN1p zEI1<;6brHwWI^2G2BLZj?q`pPxhAar&H2 zgP(>9dOAFlnz!&Rdq^ox7x_N56|>?3|ar+US5)O759O zmiH~!ddIB_$*!cIy)C_WSG0+jo0Qsn+1s(N?$kbxd@?k^^ypN87G&z0HTOrDCrpE= zQa$_E&%%6n=q+T9Gk{46HY_h3^)1Jx?qQHuVk-T1Am4iLo7RAQlXCs3W1bJi;0~jR z*h|MBXcsI#w>W)fw~KQ=;9;LA$NVu|r&FkJ%J+bMwV1HbiLe7OR*#Ph1SB8sjkDr~~h zV7SgrU?l6C-#GKUaZcS~kocq2_XFRX#jrz`?+sU|C^kr`Jh5j(gsLVy;I5klj! ze;xy79ATb4#1HfA$G|%edgG^4BEh%#lJFDjjqitxism~jFD~37A>`35+gjlGgSstp zN@jaIhZ(DGoG>!Mu6oth1kLv(bL8_yL((la4f!?Ud?=uj}eZbZjhxJ@E(* z*eqE-Rk$mfC?lMJ?XPuq-(8O124Z}19+>0CJgTrO+8V7egVjWzP>O5MK(G>b=jP12 zPsAK{KY@3E7Qe-?A&haAw53Ox#m8=Ki@tHrUVep1tcCaqdPZ^P+!(3;RnsPxYDJ8FBdHeuOv7ar+Av33 zJgdCU<+J7HwI&iu(sJ3=_P(^I=9S%($G))KZM2MW@z&SWV(Li&bK56USK<8qjG43H zxEna*c+M6?1`>Z-;)RSFAp#Rf;T=Cg*+*kiV9M${5H8rV!UdTq?Q4^9xFEz7>eYxd zPS<+QII!=3jVI1Tk)NM_@WX88(yvu7OjUs%KD`r0B%< z78|2o+(42A@dp<8Wv|aXbpz z<>uLdde6S*LxU;o&|nHHG?>y@Xdq}6@@E5f!1J(IPG_VTSUnK41Nqf@djE&mprw&W z&cbL`&)Yiq$r$9>MFe64B1uvqgof8t%3w0}(~iiut@qho4AFrXY}LROkImDosq++3cuX8Zx{ zq3X4Vt7{T>JGj$Ey_@=_-F4ni>~r(G*!*18ys|FZD&IN)yLEuwi3Qjm(RQU6uyyMq zTdbrLpMovq@^);%wqpUdU1NY<8}A79f`45uf7cntuWUr2_$imJCE)c!*ouwU0pCdMcCtVyE3tEW=0dXnLzXi>{yT{c z*({4&5?I_~Q=MKIwV6tUNr?5x-JIYgfU zBY^QW*Zhhv&Cr z^u1aQ|18WGV_`m!K#fNMtoNuiUA5oww)a?PKh-=V%5f7rfz&L`!A z?Ja$m^7eScP2X$vzMfXGfZ=A?(PpsN=a<650%V>~Efnn=v%!1iuzS((RkLrqPi-iz z58Agyv?ngwudTF+Xemu^B9;5GXy1&;o#;~LyZ>xIar9OpWu7n`fz1f!onKJ2mlH&J zTaT+@AYP$-ExDn7jwb_jP>+Cq^?FHniWnjX9FfJ_3jUP}Pjxxs5`-gUsgrpazgqp# z;kK}_B0?j(d+rhz?&b~;J4g6Et8CAKZ9@g7Atf@jex_|Gbn)sJN-l7wZ&l-6#qy2$3 zaghnl(S@$X8)x@y|EaJT}(K|_uhn-q<%>{H#P^?A03>F4FLf4ZY=fP(|GDh4P z!F>5*zEdZ7Ln|_CO%>jL>SJ_&z-S%1HpkBv3+8SV_=Ksz?p;+P@L-q*26%iH%x%QL z>*((adAB_)?^c*S+u|GsUJ0f|J#O3$&y|^I_V4A!9a_zevk+CZIn=cwX5&DZCrrHB znTxUP)#4Av#>hG(#hPzZ@5u;upC)AZsl8$B_I#1yT#U83YPP}ZOQ#t`jQ!74&xf{c zObqDOtKX2v>Z6>|V%a$1_CGEj&=T~vA>nppC)|##gxj&Pgqz~4Z`hC_A9`smDEN|P zKvEC>oG-gR{ueUj&5w==&k#mwA?4yYZ^_=Y5THVixN6}b%yjP|ISIdft~S?@2-iy> z=bj6{XIIcUDmxBrQxupK%J)IN*ujN8e(tOc<19Hyi4x+1v+Z2q$eqcIbV`&eLg$%f z^b=wQ0^@>vOKo|ODbhvX=x|j^w&re6v#OBpOG8G@>pgn&<*QlROGDoMq2C(-oOqya z&b;F9e|}gDG2PChFQgxIrfM%ZOa~}k^)28NPPY#PzJS@#Kc?F}orZtizyqy=^2SN< zzekdXzZ5}XM#r4+{gy8O3e)t>TFA4|+AGW-(AtrW);muQb8HN)NhCMR|14TpD9#+s zMQiqI@dvF%VNWM(1bkuI+o8=e6(rctlZG0f#!_s~u?FGCxj>N&kz5 z);_uS{4iQmcdrHu6~5K8z1V2&#X@VZ#?bn%br3Yy)bnhqU3KZQFtXfF@y2NFGJguV z+5Y2Nx0dSJEui&bF=V%Da}j<#uIzgQY;X90)?|M~iG&U{jtGv!jWd-M7?I?_I!Y*p zYGx;Z>fvN&CracdgxalFxuI1Nm`iT|3(A>Gk&QmMw?rR%ak;*w4Sf4?M~H_$e_*uQvBfckr=I2#BxLajcBda z4n{HS19&H}fCAsr|62OhymTrT^ zTBrc4!2R@d{zr`9wP@{TJ|g&hHiFM*A^7~p5WGB85HvFN3)~uUFtYa9sYO_ z{r>>qZqB?!v$097gqQgzR-Xr{He_%0Qf(O-C}>A^Lkc8zc1;4%DWEy+#TOt8FoL4&!x5GHiYwI?zI7z&VByLJl}i z3EhNI{)vJbNt4V}r9>uz>rGXX zW-G;Q-Ou-W{-NN+By+1T6E5Bx*a7rS<>voX7utw1^?^QeNga(!rD~@4)ruHA`hsQQ z#mi+nG^!^3D%VG5WPa-CtLJhnrIL#-B?zGY9+jWR!2L8mVA1v;p}%&kI6ClRw+g<; zbwBKo`&sB$!_c4Lza9h|t}Jz$f^W_K6^+yGEOC0fDUvrUO>0;rPccFlxhT(GE&iZ< zdCL35vA&iYO#h^+bhb(*w>OmU*3WYl7v=TB1J>(m`MxHiJYg!g3x*nT2He-YKg=Vi zm$rKqntmU2V9IO@tU^m!=WE;&ls6&Rx#s*BM$t(S7Sy1T1yz#|I@kcZeTN)=Fc?Lf z`k7(q*79bLapUu}^eh1+5$E*a`U#-I$2}*F)9vJ)Eh6SgEv^(( zkd?nRB4?)&b1i^vy^FzfjQw4^yNIX5C*_$=CmNj&UNV(iW8wj8(B9=^!~wMUp*(Ax z(rA6Tns~OwqG;-5S8;rkEl`q$?!)G*)sonyC5a3Pvl}a6R==s?_z9zCBQ7ntSpBbV z>1w@8OS3IqEa{nD)0uLff^j65_+~8~e<0L$Drw(H)yw`{A8PmcaB0to_6DIfha~m5 z5LzT~@{1;AvJ(+P|1&zXwiDlpy;zBIc9tTG_;!0Ee6J;RB@&A*bap|aoP-nQB*59E zMib@0!!#^3tLM_P9}GMWbipF0UsQpA~U$ zc5sjLpjr4qcIM2xbz0w>J)4oq%b9IUj!Wl~r2?y;2(4M6bB*kAUY@=$%WsXCjO^1r z6@Fh%Xo!kiIJK}DCQ?wdk$F55oFI{c%~!IFaUdg{xR{A#4bOEL!7 z^vqALJ}#Fhb?2k6h;=3$cv55)Yjl~7DeT%Ws-9jx|PYMH-ZT_+gprq zUSBhT8zU?YoJdQfPSfj%5n4s2XRyh1o0_$Xm`p$WJQa|EZ}K*X5mIrCkP0zEYNIiN zZ-5&n)0IVek3d7J*5}+e+t5hqz9yn@OrKId(0_|Cf%ruA+$f6jPUyH{qO$PD+fi+Z z69OPkSPq;0$f%S3dr~5A@dR!%fm0Qj3n^iPaD?!lw1ONs75@dO^ot5zNsRqurX3|x z8KGCJyP1@3$w4Qw=Ll#N_7E{I$ymQ)v#71zIe+xZhp_nzYDRn)8m2OiF zgiQDqAQ%%yq_bPhIHbB}KWH&KL>J3k$eyLLzk6)iy+s2{B1;lq1znLsgO*dYhQYzy z|6qunWPT{4^UnW_&oQ;11${XT`Wd-aEO5flN33*SN8@vE!^+O%Ge3ng1ypc9=IIFbd9y)z3|4 zljky=pX_-kEE^VbkOwwbrO2#MP$dl|ub@OFBXrt7UaU~;ykOA%gX;UI*QlBv5cSQn za<(nL_W3F}WVSP58u3CAoWZj$CVghZYpZ&3EI%02*Wy`Cf{VkDsfK) z^5UJ(#cuG1Ry1z6M4uCPKsXhCn4GI(q&#@~h!fmbGucQPS0qySUqH&{d$Sve%idd_ z{lY~`&T{bwDWjzg7+XT_-@dZEN8+5DY1HuSbk09QJ{X0k=ZiQiJHUT6=sIZkPG&;znrul(1;_aF%q_T z(c9>ELS#_1v=?X70Ti#V{^*YxlwcY2GmW4oFW_sv-AP+e(-XK%>FhFmGU3Gqpj?0q z^e1H62Ds>)lRIY?kjk^zr1C@aD%J$BAIi>PXwrU2<-Z1pad0AU7hElfJb~#JgIFyK?9fjv6J;2v=Wfh)PYToa` zq;mN?!@`!}mB^j8TzJmGq;d{qlR1r&%JCkjz{{&XsT||`dDyt#$TVY<1%4!&RVN(( ze~`*ugpsu7tO1yFl&D<>wh#?mMR~pz1`8zZ!H|r&Fj`~8(4lJUiFoT8s^3|_Wa7)s zd7StFsf;IyHR5sNznJGs#%;Y4WF(V=Uz1Rs zQDcP+?wZb&5ARWKF~n+-P_^+F-Rb(Wi_dovtyAB-g|RznCEzvj7~PzC#rOZby-80Q zUe!Y@!&{e5old1weROCu5b8a0Qa>5EVC&CYOlD(uy=9!vyHQ$iaKcDpPUr1$^+hAe zGsC1rt5UxV99tc0IjEcCRI|DLOU;kzg#?70x+8C7R$GuTyX2=+}x zsl8RBZ%EI2QKC@rG*>@O4Usx$E5*TMy|SF&+Okf9tky)6))Ye%zVTOL~=4#ced?qq*U9 zh!@sRj)tnx4}|r#PKX@)AP3+6-D#$vz4*C+VkS<*&lS8Vf`HYNm4^2}l3%8c)A0X* zH)*q+HK&i;q3L4v)iqmoaN`Qj8uBTw=sbIxo8uobN91#294EfI(ca>UoB*pzZd?&l zCB5xplsDr=s?V$2o{bU5}VBPdWC9q z9s~}84d%02mBzpzqIxnl!>^-+kgU(vrI^Q7wVn|F3vzP;Y57m_*f#K>8;vmq{vZy2 z&B@F;t=B9bTM#CL1%IOiVW}D87%VB|8HLC1P30bLwPt1yMSv_57Emwjn(7ecOiA}=$g9ebowBzi18Qtm6hpa z(=*el1ruFeT#$_*{oGCA)Ib5W^~;qIJo;Vt>M3X+#Q016a`Bl3XJYuxD|pox?#elA z_!S?XSJnZ)-}|mx3BQ~*M)uJzXZHsv=?vie-wdv06{pa$Fkfve@wxw!qp7=bGu?-gqw z2s{g(b%y>$a2SM4zX3Ak07lj<)MG2n^P|Va*Fefp>Snl9k zKw)&C2-%m(ciGj;d#L2`n}aTIDZ1h&z(}55zT2~0Z`A$TU|p%o4qX2C;PNGy*r7?) zrG3X`(Xc#UU*&kxlqpJUyXW`0zxLrnt)) (is (= 1 @g->c)))) -(defn- preprocess-slice-records [cram-header subst-mat-init records] - (let [opts {:ds-compressor-overrides (constantly :raw) - :tag-compressor-overrides (constantly (constantly (constantly {:external :raw})))} - container-ctx (-> (context/make-container-context cram-header test-seq-resolver opts) +(defn- preprocess-slice-records [cram-header subst-mat-init opts records] + (let [opts' (merge opts + {:ds-compressor-overrides (constantly :raw) + :tag-compressor-overrides (constantly + (constantly + (constantly {:external :raw})))}) + container-ctx (-> (context/make-container-context cram-header test-seq-resolver opts') (assoc :subst-mat-builder (test-subst-mat-builder subst-mat-init))) stats (record/preprocess-slice-records container-ctx records)] (context/finalize-container-context container-ctx [stats]))) @@ -130,7 +133,7 @@ {:qname "q005", :flag 141, :rname "*", :pos 0, :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "*", :qual "*" :options []}]) - container-ctx (preprocess-slice-records cram-header subst-mat-init records)] + container-ctx (preprocess-slice-records cram-header subst-mat-init {} records)] (is (= [{:qname "q001", :flag 99, :rname "ref", :pos 1, :cigar "5M", :rnext "=", :pnext 151, :tlen 150, :seq "AGAAT", :qual "HFHHH" :options [{:RG {:type "Z", :value "rg001"}} @@ -239,7 +242,7 @@ {:qname "q004", :flag 73, :rname "ref", :pos 20, :end 24, :mapq 0, :cigar "5M", :rnext "*", :pnext 0, :tlen 0, :seq "CTGTG", :qual "AEEEE" :options []}]) - slice-ctx (-> (preprocess-slice-records cram-header subst-mat-init records) + slice-ctx (-> (preprocess-slice-records cram-header subst-mat-init {} records) (context/make-slice-context 0)) _ (record/encode-slice-records slice-ctx records) ds-res (walk/prewalk #(if (fn? %) (%) %) (:ds-encoders slice-ctx)) @@ -411,7 +414,7 @@ {:qname "q003", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0, :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "GCACA", :qual "BCCFD" :options []}]) - slice-ctx (-> (preprocess-slice-records cram-header {} records) + slice-ctx (-> (preprocess-slice-records cram-header {} {} records) (context/make-slice-context 0)) _ (record/encode-slice-records slice-ctx records) ds-res (walk/prewalk #(if (fn? %) (%) %) (:ds-encoders slice-ctx)) @@ -555,5 +558,72 @@ (map #(+ (long %) 33)) byte-array String.))) + (is (= {} tag-res)))) + (testing "omit read names" + (let [cram-header {:SQ + [{:SN "ref"} + {:SN "ref2"}]} + opts {:omit-read-names? true} + records (ArrayList. + [{:qname "q001", :flag 99, :rname "ref", :pos 1, :end 5, :mapq 60, + :cigar "5M", :rnext "=", :pnext 11, :tlen 15, :seq "AGAAT", :qual "AAAAA" + :options []} + {:qname "q002", :flag 97, :rname "ref", :pos 6, :end 10, :mapq 15, + :cigar "5M", :rnext "=", :pnext 16, :tlen 15, :seq "GTTAG", :qual "AAAAA" + :options [{:TC {:type "i", :value 3}}]} + {:qname "q003", :flag 97, :rname "ref", :pos 11, :end 15, :mapq 30, + :cigar "4M1S", :rnext "=", :pnext 21, :tlen 15, :seq "ATAAA", :qual "AAAAA" + :options [{:SA {:type "Z", :value "ref2,12345,+,4S1M,10,0"}}]} + {:qname "q004", :flag 73, :rname "ref", :pos 16, :end 20, :mapq 60, + :cigar "5M", :rnext "=", :pnext 19, :tlen 15, :seq "ATAGC", :qual "AAAAA" + :options []} + {:qname "q001", :flag 147, :rname "ref", :pos 11, :end 15, :mapq 60, + :cigar "5M", :rnext "=", :pnext 1, :tlen -15, :seq "ATAAG", :qual "AAAAA" + :options []} + {:qname "q002", :flag 145, :rname "ref", :pos 16, :end 20, :mapq 15, + :cigar "5M", :rnext "=", :pnext 6, :tlen -15, :seq "ATAGC", :qual "AAAAA" + :options [{:TC {:type "i", :value 3}}]} + {:qname "q003", :flag 145, :rname "ref", :pos 21, :end 25, :mapq 30, + :cigar "5M", :rnext "=", :pnext 11, :tlen -15, :seq "TGTGC", :qual "AAAAA" + :options []} + {:qname "q005", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0, + :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "ATGCA", :qual "AAAAA" + :options []} + {:qname "q005", :flag 141, :rname "*", :pos 0, :end 4, :mapq 0, + :cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "TGCAT", :qual "AAAAA" + :options []}]) + slice-ctx (-> (preprocess-slice-records cram-header {} opts records) + (context/make-slice-context 0)) + _ (record/encode-slice-records slice-ctx records) + ds-res (walk/prewalk #(if (fn? %) (%) %) (:ds-encoders slice-ctx))] + (is (= 1 (count (get ds-res :CF)))) + (is (= 3 (get-in ds-res [:CF 0 :content-id]))) + (is (= [5 3 3 3 1 3 3 5 1] (seq (get-in ds-res [:CF 0 :data])))) + + (is (= 1 (count (get ds-res :RN)))) + (is (= 8 (get-in ds-res [:RN 0 :content-id]))) + (is (= "q002\tq003\tq004\tq002\tq003\t" (String. ^bytes (get-in ds-res [:RN 0 :data])))) + + (is (= 1 (count (get ds-res :MF)))) + (is (= 9 (get-in ds-res [:MF 0 :content-id]))) + (is (= [1 1 2 0 0] (seq (get-in ds-res [:MF 0 :data])))) + + (is (= 1 (count (get ds-res :NS)))) + (is (= 10 (get-in ds-res [:NS 0 :content-id]))) + (is (= [0 0 0 0 0] + (map #(bit-and % 0xff) (get-in ds-res [:NS 0 :data])))) + + (is (= 1 (count (get ds-res :NP)))) + (is (= 11 (get-in ds-res [:NP 0 :content-id]))) + (is (= [16 21 19 6 11] + (map #(bit-and % 0xff) (get-in ds-res [:NP 0 :data])))) + + (is (= 1 (count (get ds-res :TS)))) + (is (= 12 (get-in ds-res [:TS 0 :content-id]))) + (is (= [15 15 15 0xff 0xff 0xff 0xff 0x01 0xff 0xff 0xff 0xff 0x01] + (map #(bit-and % 0xff) (get-in ds-res [:TS 0 :data])))) + + (is (= 1 (count (get ds-res :NF)))) + (is (= 13 (get-in ds-res [:NF 0 :content-id]))) + (is (= [3 0] (seq (get-in ds-res [:NF 0 :data]))))))) - (is (= {} tag-res))))) diff --git a/test/cljam/io/cram_test.clj b/test/cljam/io/cram_test.clj index b04b9f7c..e6c1a8e7 100644 --- a/test/cljam/io/cram_test.clj +++ b/test/cljam/io/cram_test.clj @@ -3,11 +3,10 @@ [cljam.io.cram.decode.structure :as struct] [cljam.io.cram.reader :as reader] [cljam.io.sam :as sam] - [cljam.test-common :as common :refer [clean-cache! deftest-remote - prepare-cache! - prepare-cavia! - with-before-after]] + [cljam.io.sam.util.flag :as flag] + [cljam.test-common :as common :refer [deftest-remote deftest-slow]] [clojure.java.io :as io] + [clojure.set :as set] [clojure.test :refer [are deftest is testing]]) (:import [cljam.io.cram.reader CRAMReader] [java.nio.channels FileChannel])) @@ -44,7 +43,7 @@ {:chr "ref2", :start 35, :end 35} 2))) (deftest-remote reader-with-multiple-containers-test - (with-before-after {:before (prepare-cavia!)} + (common/with-before-after {:before (common/prepare-cavia!)} (testing "read all the alignments" (with-open [bam-rdr (sam/reader common/medium-bam-file) cram-rdr (cram/reader common/medium-cram-file @@ -77,8 +76,8 @@ {:chr "chr19", :start 54000000, :end 55000000} 12))))) (deftest writer-test - (with-before-after {:before (prepare-cache!) - :after (clean-cache!)} + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} (testing "unsorted" (with-open [r (cram/reader common/test-cram-file {:reference common/test-fa-file}) @@ -111,9 +110,9 @@ (cram/read-alignments r'))))))) (deftest-remote writer-with-multiple-containers-test - (with-before-after {:before (do (prepare-cavia!) - (prepare-cache!)) - :after (clean-cache!)} + (common/with-before-after {:before (do (common/prepare-cavia!) + (common/prepare-cache!)) + :after (common/clean-cache!)} (with-open [r (cram/reader common/medium-cram-file {:reference common/hg19-twobit-file}) w (cram/writer temp-cram-file @@ -141,8 +140,8 @@ (step)))) (deftest writer-with-reference-embedding-test - (with-before-after {:before (prepare-cache!) - :after (clean-cache!)} + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} (testing "Reference embedding will be enabled if `:embed-reference?` is set to true" (with-open [r (cram/reader common/test-sorted-cram-file {:reference common/test-fa-file}) @@ -184,8 +183,8 @@ (is (common/not-throw? (cram/write-header w (cram/read-header r)))))))) (deftest writer-index-options-test - (with-before-after {:before (prepare-cache!) - :after (clean-cache!)} + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} (testing "A CRAM index file won't be created by default" (with-open [r (cram/reader common/test-sorted-cram-file {:reference common/test-fa-file}) @@ -221,3 +220,48 @@ (cram/write-header w (cram/read-header r)) (cram/write-alignments w (cram/read-alignments r) (cram/read-header r))) (is (.exists (io/file (str temp-cram-file-3 ".crai"))))))) + +(deftest-slow writer-with-read-name-omission-test + (common/with-before-after {:before (do (common/prepare-cavia!) + (common/prepare-cache!)) + :after (common/clean-cache!)} + (with-open [r (cram/reader common/paired-sorted-cram-file + {:reference common/hg38-twobit-file}) + w (cram/writer temp-cram-file + {:reference common/hg38-twobit-file + :omit-read-names? true + :min-single-ref-slice-size 1})] + (cram/write-header w (cram/read-header r)) + (cram/write-alignments w (cram/read-alignments r) (cram/read-header r))) + (with-open [r (cram/reader common/paired-sorted-cram-file + {:reference common/hg38-twobit-file}) + r' (cram/reader temp-cram-file + {:reference common/hg38-twobit-file})] + (is (= (cram/read-header r) + (cram/read-header r'))) + (let [alns (vec (cram/read-alignments r)) + alns' (vec (cram/read-alignments r')) + mates (->> alns + (map-indexed #(assoc %2 :idx %1)) + (group-by :qname) + (into {} (map (fn [[qname xs]] + [qname (into #{} (map :idx) xs)])))) + mates' (->> alns' + (map-indexed #(assoc %2 :idx %1)) + (group-by :qname) + (into {} (map (fn [[qname xs]] + [qname (into #{} (map :idx) xs)])))) + qnames (set (keys mates)) + qnames' (set (keys mates'))] + (is (= (map #(dissoc % :qname) alns) + (map #(dissoc % :qname) alns'))) + (is (not= qnames qnames')) + (is (every? #(= (get mates %) (get mates' %)) + (set/intersection qnames qnames'))) + (is (every? (fn [qname] + (let [indices (get mates' qname)] + (and (= (count indices) 2) + (every? #(flag/primary? (:flag (nth alns' %))) + indices)))) + (set/difference qnames' qnames))) + (is (= (set (vals mates)) (set (vals mates')))))))) diff --git a/test/cljam/test_common.clj b/test/cljam/test_common.clj index 91524567..9a00ba61 100644 --- a/test/cljam/test_common.clj +++ b/test/cljam/test_common.clj @@ -35,6 +35,9 @@ {:resources [{:id "hg19.2bit" :url "https://test.chrov.is/data/refs/hg19.2bit" :sha1 "95e5806aee9ecc092d30a482aaa008ef66cbc468"} + {:id "hg38.2bit" + :url "https://test.chrov.is/data/refs/hg38.2bit" + :sha1 "6fb20ba4de0b49247b78e08c2394d0c4f8594148"} {:id "large.bam" :url "https://test.chrov.is/data/GSM721144_H3K36me3.nodup.bam" :sha1 "ad282c3779120057abc274ad8fad1910a4ad867b"} @@ -168,6 +171,7 @@ (def test-cram-file "test-resources/cram/test.cram") (def test-sorted-cram-file "test-resources/cram/test.sorted.cram") (def test-sorted-with-unknown-so-cram-file "test-resources/cram/test.sorted_with_unknown_so.cram") +(def paired-sorted-cram-file "test-resources/cram/paired.sorted.cram") (def medium-cram-file "test-resources/cram/medium.cram") (def medium-with-standard-tags-cram-file "test-resources/cram/medium_with_standard_tags.cram") (def medium-without-index-cram-file "test-resources/cram/medium_without_index.cram") @@ -200,6 +204,7 @@ (def test-twobit-be-n-file "test-resources/twobit/be-test-n.2bit") (def medium-twobit-file "test-resources/twobit/medium.2bit") (def hg19-twobit-file (cavia/resource mycavia "hg19.2bit")) +(def hg38-twobit-file (cavia/resource mycavia "hg38.2bit")) ;; ### FASTQ files