diff --git a/bench/cljam/io/sam_bench.clj b/bench/cljam/io/sam_bench.clj index fe09d712..3eaaf76a 100644 --- a/bench/cljam/io/sam_bench.clj +++ b/bench/cljam/io/sam_bench.clj @@ -40,9 +40,10 @@ (are [f decode-opts?] (c/quick-bench (with-open [r (sam/reader f)] - (doseq [a (sam/read-alignments r) - opts (when decode-opts? (:options a))] - opts))) + (run! (fn [aln] + (when decode-opts? + (dorun (:options aln)))) + (sam/read-alignments r)))) tcommon/test-bam-file false tcommon/test-bam-file true tcommon/medium-bam-file false @@ -53,7 +54,8 @@ (are [f] (c/quick-bench (with-open [r (sam/reader f)] - (doseq [a (vec (take 10000000 (sam/read-alignments r))) - opts (:options a)] - opts))) + (transduce (take 10000000) + (completing #(dorun (:options %2))) + nil + (sam/read-alignments r)))) tcommon/large-bam-file)) diff --git a/bench/cljam/io/vcf_bench.clj b/bench/cljam/io/vcf_bench.clj index a77f8288..dd1e6620 100644 --- a/bench/cljam/io/vcf_bench.clj +++ b/bench/cljam/io/vcf_bench.clj @@ -1,17 +1,45 @@ (ns cljam.io.vcf-bench (:require [cljam.io.vcf :as vcf] [cljam.test-common :as tcommon] + [cljam.util :as util] + [clojure.java.io :as cio] [libra.bench :refer [are defbench]] [libra.criterium :as c])) -(defbench decode-small-bcf-bench +(defbench encode-variant-small-bcf-bench + (are [f] + (util/with-temp-dir [d "encode-variant-small-bench"] + (with-open [r (vcf/reader f) + w (vcf/writer (cio/file d "out.bcf") + (vcf/meta-info r) + (vcf/header r))] + (let [vs (vec (vcf/read-variants r))] + (c/quick-bench + (vcf/write-variants w vs))))) + tcommon/test-bcf-complex-file)) + +(defbench encode-variant-large-bcf-bench + (tcommon/prepare-cavia!) + (are [f] + (util/with-temp-dir [d "encode-variant-large-bench"] + (with-open [r (vcf/reader f) + w (vcf/writer (cio/file d "out.bcf") + (vcf/meta-info r) + (vcf/header r))] + (let [vs (vec (vcf/read-variants-randomly r {:chr "chr1" :end 30000000} {}))] + (c/quick-bench + (vcf/write-variants w vs))))) + tcommon/test-large-bcf-file)) + +(defbench decode-variant-small-bcf-bench (are [f] (c/quick-bench (with-open [r (vcf/reader f)] (run! (constantly nil) (vcf/read-variants r)))) tcommon/test-bcf-complex-file)) -(defbench decode-large-bcf-bench +(defbench decode-variant-large-bcf-bench + (tcommon/prepare-cavia!) (are [f] (c/quick-bench (with-open [r (vcf/reader f)] diff --git a/src/cljam/algo/convert.clj b/src/cljam/algo/convert.clj index 2be67ed1..989fcfa9 100644 --- a/src/cljam/algo/convert.clj +++ b/src/cljam/algo/convert.clj @@ -1,19 +1,19 @@ (ns cljam.algo.convert "Converters between equivalent formats: SAM/BAM and FASTA/TwoBit." - (:require [clojure.tools.logging :as logging] - [clojure.string :as cstr] - [cljam.common :refer [*n-threads* get-exec-n-threads]] - [cljam.io.sam :as sam] + (:require [cljam.common :refer [*n-threads* get-exec-n-threads]] [cljam.io.bam.encoder :as encoder] + [cljam.io.fastq :as fq] + [cljam.io.sam :as sam] [cljam.io.sam.util.flag :as flag] [cljam.io.sam.util.refs :as refs] - [cljam.util.sequence :as util-seq] [cljam.io.sequence :as cseq] - [cljam.io.fastq :as fq] [cljam.io.util :as io-util] + [cljam.util.sequence :as util-seq] + [clojure.string :as cstr] + [clojure.tools.logging :as logging] [com.climate.claypoole :as cp]) - (:import [java.nio ByteBuffer] - [cljam.io.fastq FASTQRead])) + (:import [cljam.io.fastq FASTQRead] + [java.io ByteArrayOutputStream])) ;;; SAM <-> BAM @@ -30,9 +30,9 @@ n-threads (get-exec-n-threads)] (doseq [blocks (cp/pmap (if (= n-threads 1) :serial (dec n-threads)) (fn [chunk'] - (mapv #(let [bb (ByteBuffer/allocate (encoder/get-block-size %))] - (encoder/encode-alignment bb % refs) - {:data (.array bb)}) + (mapv #(let [baos (ByteArrayOutputStream. (encoder/get-block-size %))] + (encoder/encode-alignment baos % refs) + {:data (.toByteArray baos)}) chunk')) (partition-all num-block (sam/read-alignments rdr {})))] (sam/write-blocks wtr blocks)))) diff --git a/src/cljam/io/bam/core.clj b/src/cljam/io/bam/core.clj index c3bf7505..bae24bf5 100644 --- a/src/cljam/io/bam/core.clj +++ b/src/cljam/io/bam/core.clj @@ -6,7 +6,7 @@ [writer :as writer]] [cljam.io.bam-index :as bai] [cljam.io.util.bgzf :as bgzf] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.data-io :as lsb] [cljam.util :as util]) (:import java.util.Arrays [java.io DataInputStream DataOutputStream IOException FileNotFoundException] diff --git a/src/cljam/io/bam/encoder.clj b/src/cljam/io/bam/encoder.clj index aa25e47b..a5a9d339 100644 --- a/src/cljam/io/bam/encoder.clj +++ b/src/cljam/io/bam/encoder.clj @@ -7,7 +7,7 @@ [cljam.io.sam.util.quality :as qual] [cljam.io.sam.util.cigar :as cigar] [cljam.io.sam.util.sequence :as seq] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.io-stream :as lsb] [cljam.io.bam.common :as common])) (def ^:private ^:const fixed-tag-size 3) diff --git a/src/cljam/io/bam/reader.clj b/src/cljam/io/bam/reader.clj index 7ad3b3b6..be337582 100644 --- a/src/cljam/io/bam/reader.clj +++ b/src/cljam/io/bam/reader.clj @@ -5,7 +5,7 @@ [cljam.io.sam.util.header :as header] [cljam.io.bam-index.core :as bai] [cljam.io.bam.decoder :as decoder] - [cljam.io.util.lsb :as lsb]) + [cljam.io.util.lsb.data-io :as lsb]) (:import [java.io Closeable FileNotFoundException] [cljam.io.bam.decoder BAMRawBlock] [bgzf4j BGZFInputStream])) diff --git a/src/cljam/io/bam/writer.clj b/src/cljam/io/bam/writer.clj index e99cbe0a..6acdfc87 100644 --- a/src/cljam/io/bam/writer.clj +++ b/src/cljam/io/bam/writer.clj @@ -4,7 +4,7 @@ [cljam.io.protocols :as protocols] [cljam.io.sam.util.refs :as refs] [cljam.io.sam.util.header :as header] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.io-stream :as lsb] [cljam.io.bam.common :as common] [cljam.io.bam.encoder :as encoder] [cljam.io.bam.decoder :as bam-decoder] diff --git a/src/cljam/io/bam_index/reader.clj b/src/cljam/io/bam_index/reader.clj index 556fa17d..998b6c69 100644 --- a/src/cljam/io/bam_index/reader.clj +++ b/src/cljam/io/bam_index/reader.clj @@ -1,6 +1,6 @@ (ns cljam.io.bam-index.reader (:require [clojure.java.io :as cio] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.io-stream :as lsb] [cljam.io.bam-index.common :refer [bai-magic]] [cljam.io.util.chunk :as chunk] [cljam.util :as util]) diff --git a/src/cljam/io/bam_index/writer.clj b/src/cljam/io/bam_index/writer.clj index 8b1d00ef..071edf36 100644 --- a/src/cljam/io/bam_index/writer.clj +++ b/src/cljam/io/bam_index/writer.clj @@ -2,7 +2,7 @@ (:require [com.climate.claypoole :as cp] [cljam.common :refer [get-exec-n-threads]] [cljam.io.util.bgzf :as bgzf] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.io-stream :as lsb] [cljam.io.util.bin :as util-bin] [cljam.io.bam-index.common :refer [linear-index-shift linear-index-depth diff --git a/src/cljam/io/bcf/reader.clj b/src/cljam/io/bcf/reader.clj index a6d1169c..57cfd560 100644 --- a/src/cljam/io/bcf/reader.clj +++ b/src/cljam/io/bcf/reader.clj @@ -4,7 +4,7 @@ [cljam.io.protocols :as protocols] [cljam.io.util.bgzf :as bgzf] [cljam.io.util.byte-buffer :as bb] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.io-stream :as lsb] [cljam.io.vcf.reader :as vcf-reader] [cljam.io.vcf.util :as vcf-util] [cljam.util :as util] diff --git a/src/cljam/io/bcf/writer.clj b/src/cljam/io/bcf/writer.clj index 372a9131..2a3f4a1a 100644 --- a/src/cljam/io/bcf/writer.clj +++ b/src/cljam/io/bcf/writer.clj @@ -2,7 +2,7 @@ (:require [clojure.string :as cstr] [cljam.io.protocols :as protocols] [cljam.io.util.bgzf :as bgzf] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.io-stream :as lsb] [cljam.io.vcf.writer :as vw] [cljam.io.vcf.util :as vcf-util] [cljam.util :as util]) diff --git a/src/cljam/io/bigwig.clj b/src/cljam/io/bigwig.clj index ab416d69..007a9cc5 100644 --- a/src/cljam/io/bigwig.clj +++ b/src/cljam/io/bigwig.clj @@ -4,7 +4,7 @@ specifications." (:require [clojure.java.io :as cio] [cljam.io.protocols :as protocols] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.data-io :as lsb] [cljam.util :as util]) (:import [java.net URL] [java.io Closeable IOException RandomAccessFile] diff --git a/src/cljam/io/csi.clj b/src/cljam/io/csi.clj index 752c4ece..8372ce43 100644 --- a/src/cljam/io/csi.clj +++ b/src/cljam/io/csi.clj @@ -1,13 +1,14 @@ (ns cljam.io.csi "Basic I/O of CSI:Coordinate Sorted Index files." - (:require [clojure.string :as cstr] - [cljam.io.util.bgzf :as bgzf] - [cljam.io.util.lsb :as lsb] + (:require [cljam.io.util.bgzf :as bgzf] + [cljam.io.util.bin :as util-bin] [cljam.io.util.chunk :as chunk] - [cljam.io.util.bin :as util-bin]) - (:import java.util.Arrays - [java.io DataInputStream DataOutputStream IOException] - [java.nio ByteBuffer ByteOrder])) + [cljam.io.util.lsb.data-io :as lsb.data] + [cljam.io.util.lsb.io-stream :as lsb.stream] + [clojure.string :as cstr]) + (:import [java.io DataInputStream DataOutputStream IOException] + [java.nio ByteBuffer ByteOrder] + java.util.Arrays)) (def ^:const ^:private csi-magic "CSI\1") @@ -69,17 +70,17 @@ (defn- read-chunks! [rdr] - (let [n-chunk (lsb/read-int rdr)] - (->> #(let [beg (lsb/read-long rdr) end (lsb/read-long rdr)] + (let [n-chunk (lsb.data/read-int rdr)] + (->> #(let [beg (lsb.data/read-long rdr) end (lsb.data/read-long rdr)] (chunk/->Chunk beg end)) (repeatedly n-chunk) vec))) (defn- read-bin-index [rdr] - (let [n-ref (lsb/read-int rdr)] - (->> #(let [bin (lsb/read-int rdr) - loffset (lsb/read-long rdr) + (let [n-ref (lsb.data/read-int rdr)] + (->> #(let [bin (lsb.data/read-int rdr) + loffset (lsb.data/read-long rdr) chunks (read-chunks! rdr)] {:bin (long bin), :loffset loffset, :chunks chunks}) (repeatedly n-ref) @@ -87,14 +88,14 @@ (defn- read-index* ^CSI [^DataInputStream rdr] - (when-not (Arrays/equals ^bytes (lsb/read-bytes rdr 4) (.getBytes csi-magic)) + (when-not (Arrays/equals ^bytes (lsb.data/read-bytes rdr 4) (.getBytes csi-magic)) (throw (IOException. "Invalid CSI file"))) - (let [min-shift (lsb/read-int rdr) - depth (lsb/read-int rdr) - l-aux (lsb/read-int rdr) - aux (lsb/read-bytes rdr l-aux) + (let [min-shift (lsb.data/read-int rdr) + depth (lsb.data/read-int rdr) + l-aux (lsb.data/read-int rdr) + aux (lsb.data/read-bytes rdr l-aux) tabix-aux (try (parse-tabix-aux aux) (catch Throwable _ nil)) - n-ref (lsb/read-int rdr) + n-ref (lsb.data/read-int rdr) bins (vec (repeatedly n-ref #(read-bin-index rdr))) max-bin (util-bin/max-bin depth) bidx (mapv #(into {} (map (juxt :bin :chunks)) %) bins) @@ -211,24 +212,24 @@ [f ^CSI csi] (let [max-bin (util-bin/max-bin (.depth csi))] (with-open [w (DataOutputStream. (bgzf/bgzf-output-stream f))] - (lsb/write-bytes w (.getBytes ^String csi-magic)) - (lsb/write-int w (.min-shift csi)) - (lsb/write-int w (.depth csi)) + (lsb.stream/write-bytes w (.getBytes ^String csi-magic)) + (lsb.stream/write-int w (.min-shift csi)) + (lsb.stream/write-int w (.depth csi)) (let [tabix-aux (some-> (.aux csi) create-tabix-aux)] - (lsb/write-int w (count tabix-aux)) + (lsb.stream/write-int w (count tabix-aux)) (when tabix-aux - (lsb/write-bytes w tabix-aux))) - (lsb/write-int w (count (.bidx csi))) + (lsb.stream/write-bytes w tabix-aux))) + (lsb.stream/write-int w (count (.bidx csi))) (doseq [[offsets loffset] (map vector (.bidx csi) (.loffset csi))] - (lsb/write-int w (count offsets)) + (lsb.stream/write-int w (count offsets)) (doseq [[bin chunks] offsets] - (lsb/write-int w bin) - (lsb/write-long + (lsb.stream/write-int w bin) + (lsb.stream/write-long w (if (<= (long bin) max-bin) (get loffset (util-bin/bin-beg bin (.min-shift csi) (.depth csi))) 0)) - (lsb/write-int w (count chunks)) + (lsb.stream/write-int w (count chunks)) (doseq [chunk' chunks] - (lsb/write-long w (:beg chunk')) - (lsb/write-long w (:end chunk')))))))) + (lsb.stream/write-long w (:beg chunk')) + (lsb.stream/write-long w (:end chunk')))))))) diff --git a/src/cljam/io/tabix.clj b/src/cljam/io/tabix.clj index 4e7dfc07..0af361f5 100644 --- a/src/cljam/io/tabix.clj +++ b/src/cljam/io/tabix.clj @@ -2,7 +2,7 @@ "Alpha - subject to change. Reader of a TABIX format file." (:require [cljam.io.util.bgzf :as bgzf] - [cljam.io.util.lsb :as lsb] + [cljam.io.util.lsb.data-io :as lsb] [cljam.io.util.bin :as util-bin] [clojure.string :as cstr]) (:import java.util.Arrays diff --git a/src/cljam/io/util/lsb.clj b/src/cljam/io/util/lsb.clj index 2231d054..642b1d0d 100644 --- a/src/cljam/io/util/lsb.clj +++ b/src/cljam/io/util/lsb.clj @@ -1,5 +1,6 @@ (ns cljam.io.util.lsb "Reading/writing functions of stream and buffer for little-endian data." + {:deprecated "0.8.5"} (:refer-clojure :exclude [read-string]) (:require [cljam.util :refer [string->bytes]]) (:import [java.io DataInput InputStream DataOutputStream EOFException ByteArrayOutputStream] diff --git a/src/cljam/io/util/lsb/data_io.clj b/src/cljam/io/util/lsb/data_io.clj new file mode 100644 index 00000000..ced40d5a --- /dev/null +++ b/src/cljam/io/util/lsb/data_io.clj @@ -0,0 +1,92 @@ +(ns cljam.io.util.lsb.data-io + "Functions for reading little-endian data using DataInput. + + NOTE: This namespace is intended to be used only from within cljam, and should + not be used outside cljam." + (:refer-clojure :exclude [read-string]) + (:require [cljam.io.util.byte-buffer :as bb]) + (:import [java.io ByteArrayOutputStream DataInput])) + +(defn skip + "Skips over 'length' bytes of data, discarding the skipped bytes." + [^DataInput input ^long length] + (.skipBytes input length)) + +(defn read-byte + "Reads 1 byte. Returns a byte value." + [^DataInput input] + (.readByte input)) + +(defn read-ubyte + "Reads 1 byte. Returns an unsigned byte value as long." + [^DataInput input] + (.readUnsignedByte input)) + +(defn read-short + "Reads 2 bytes. Returns a short value." + [^DataInput input] + (let [bb (bb/allocate-lsb-byte-buffer 2)] + (.readFully input (.array bb)) + (.getShort bb))) + +(defn read-ushort + "Reads 2 bytes. Returns an unsigned short value as long." + [^DataInput input] + (bit-and (short (read-short input)) 0xFFFF)) + +(defn read-int + "Reads 4 bytes. Returns an int value." + [^DataInput input] + (let [bb (bb/allocate-lsb-byte-buffer 4)] + (.readFully input (.array bb)) + (.getInt bb))) + +(defn read-uint + "Reads 4 bytes. Returns an unsigned int value as long." + [^DataInput input] + (bit-and (int (read-int input)) 0xFFFFFFFF)) + +(defn read-long + "Reads 8 bytes. Returns a long value. " + [^DataInput input] + (let [bb (bb/allocate-lsb-byte-buffer 8)] + (.readFully input (.array bb)) + (.getLong bb))) + +(defn read-float + "Reads 4 bytes. Returns a float value." + [^DataInput input] + (Float/intBitsToFloat (read-int input))) + +(defn read-double + "Reads 8 bytes. Returns a double value." + [^DataInput input] + (Double/longBitsToDouble (read-long input))) + +(defn read-bytes + "Reads 'length' bytes to buffer starting from offset bytes. Returns a new byte-array if called without buffer." + ([^DataInput input ^long length] + (let [ba (byte-array length)] + (.readFully input ba) + ba)) + ([^DataInput input buffer ^long offset ^long length] + (.readFully input buffer offset length) + buffer)) + +(defn read-string + "Reads 'length' bytes. Returns a String." + [^DataInput input ^long length] + (let [ba (byte-array length)] + (.readFully input ba) + (String. ba))) + +(defn read-null-terminated-string + "Reads until next null character. Returns a String without the null." + [^DataInput input] + (with-open [baos (ByteArrayOutputStream. 32)] + (loop [] + (let [b (.readByte input)] + (when-not (zero? b) + (.write baos b) + (recur)))) + (.toString baos))) diff --git a/src/cljam/io/util/lsb/io_stream.clj b/src/cljam/io/util/lsb/io_stream.clj new file mode 100644 index 00000000..71b72cb8 --- /dev/null +++ b/src/cljam/io/util/lsb/io_stream.clj @@ -0,0 +1,175 @@ +(ns cljam.io.util.lsb.io-stream + "Functions for reading/writing little-endian data using InputStream/OutputStream. + + NOTE: This namespace is intended to be used only from within cljam, and should + not be used outside cljam." + (:refer-clojure :exclude [read-string]) + (:require [cljam.io.util.byte-buffer :as bb] + [cljam.util :refer [string->bytes]]) + (:import [java.io + ByteArrayOutputStream + EOFException + InputStream + OutputStream])) + +(declare read-bytes) + +(defn skip + "Skips over 'length' bytes of data, discarding the skipped bytes." + [^InputStream stream ^long length] + (.skip stream length)) + +(defn read-byte + "Reads 1 byte. Returns a byte value." + [^InputStream stream] + (unchecked-byte (.read stream))) + +(defn read-ubyte + "Reads 1 byte. Returns an unsigned byte value as long." + [^InputStream stream] + (bit-and (.read stream) 0xFF)) + +(defn read-short + "Reads 2 bytes. Returns a short value." + [^InputStream stream] + (let [bb (bb/allocate-lsb-byte-buffer 2)] + (read-bytes stream (.array bb) 0 2) + (.getShort bb))) + +(defn read-ushort + "Reads 2 bytes. Returns an unsigned short value as long." + [^InputStream stream] + (bit-and (short (read-short stream)) 0xFFFF)) + +(defn read-int + "Reads 4 bytes. Returns an int value." + [^InputStream stream] + (let [bb (bb/allocate-lsb-byte-buffer 4)] + (read-bytes stream (.array bb) 0 4) + (.getInt bb))) + +(defn read-uint + "Reads 4 bytes. Returns an unsigned int value as long." + [^InputStream stream] + (bit-and (int (read-int stream)) 0xFFFFFFFF)) + +(defn read-long + "Reads 8 bytes. Returns a long value. " + [^InputStream stream] + (let [bb (bb/allocate-lsb-byte-buffer 8)] + (read-bytes stream (.array bb) 0 8) + (.getLong bb))) + +(defn read-float + "Reads 4 bytes. Returns a float value." + [^InputStream stream] + (Float/intBitsToFloat (read-int stream))) + +(defn read-double + "Reads 8 bytes. Returns a double value." + [^InputStream stream] + (Double/longBitsToDouble (read-long stream))) + +(defn read-bytes + "Reads 'length' bytes to buffer starting from offset bytes. Returns a new byte-array if called without buffer." + ([^InputStream stream ^long length] + (let [ba (byte-array length)] + (read-bytes stream ba 0 length))) + ([^InputStream stream buffer ^long offset ^long length] + (loop [total-read 0] + (when (< total-read length) + (let [n (.read stream buffer (+ offset total-read) (- length total-read))] + (if (neg? n) + (throw (EOFException. "Premature EOF")) + (recur (+ total-read n)))))) + buffer)) + +(defn read-string + "Reads 'length' bytes. Returns a String." + [^InputStream stream ^long length] + (String. ^bytes (read-bytes stream length))) + +(defn read-null-terminated-string + "Reads until next null character. Returns a String without the null." + [^InputStream stream] + (with-open [baos (ByteArrayOutputStream. 32)] + (loop [] + (let [b (.read stream)] + (when-not (zero? b) + (.write baos b) + (recur)))) + (.toString baos))) + +(defn write-ubyte + "Writes 1 byte." + [^OutputStream stream b] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putShort bb b) + (.write stream (.array bb) 0 1))) + +(defn write-char + "Writes a 1-byte ascii character." + [^OutputStream stream b] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putChar bb b) + (.write stream (.array bb) 0 1))) + +(defn write-short + "Writes a 2-byte short value." + [^OutputStream stream n] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putShort bb n) + (.write stream (.array bb) 0 2))) + +(defn write-ushort + "Writes a 2-byte unsigned short value." + [^OutputStream stream n] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putInt bb n) + (.write stream (.array bb) 0 2))) + +(defn write-int + "Writes a 4-byte integer value." + [^OutputStream stream n] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putInt bb n) + (.write stream (.array bb) 0 4))) + +(defn write-uint + "Writes a 4-byte unsigned integer value." + [^OutputStream stream n] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putInt bb (unchecked-int n)) + (.write stream (.array bb) 0 4))) + +(defn write-long + "Writes an 8-byte long value." + [^OutputStream stream n] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putLong bb n) + (.write stream (.array bb) 0 8))) + +(defn write-float + "Writes a 4-byte float value." + [^OutputStream stream n] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putFloat bb n) + (.write stream (.array bb) 0 4))) + +(defn write-double + "Writes a 8-byte double value." + [^OutputStream stream n] + (let [bb (bb/allocate-lsb-byte-buffer)] + (.putDouble bb n) + (.write stream (.array bb) 0 8))) + +(defn write-bytes + "Writes a byte-array." + [^OutputStream stream ^bytes b] + (.write stream b 0 (alength b))) + +(defn write-string + "Writes a string as a sequence of ascii characters." + [^OutputStream stream s] + (let [data-bytes (string->bytes s)] + (.write stream data-bytes 0 (alength data-bytes)))) diff --git a/test/cljam/io/bam/encoder_test.clj b/test/cljam/io/bam/encoder_test.clj index cee2f5e5..ca36249d 100644 --- a/test/cljam/io/bam/encoder_test.clj +++ b/test/cljam/io/bam/encoder_test.clj @@ -4,13 +4,12 @@ [clojure.string :as cstr] [cljam.io.bam.encoder :as encoder] [cljam.test-common :as test-common]) - (:import [java.io ByteArrayOutputStream DataOutputStream] - [java.nio ByteBuffer ByteOrder])) + (:import [java.io ByteArrayOutputStream DataOutputStream])) (defn- get-encoded-option-data [?type ?values] - (let [bb (ByteBuffer/allocate 200)] - (#'encoder/encode-tag-value bb ?type ?values) - (take (.position bb) (.array bb)))) + (let [baos (ByteArrayOutputStream.)] + (#'encoder/encode-tag-value baos ?type ?values) + (seq (.toByteArray baos)))) (deftest encode-tag-value-test (testing "non-array types" @@ -175,12 +174,11 @@ ))] (are [?aln ?expected-byte] - (= (doto (ByteBuffer/wrap (byte-array (map unchecked-byte ?expected-byte))) - (.order ByteOrder/LITTLE_ENDIAN)) - (with-open [baos (ByteArrayOutputStream. 4096) + (= ?expected-byte + (with-open [baos (ByteArrayOutputStream.) dos (DataOutputStream. baos)] (encoder/encode-alignment dos ?aln '({:name "ref", :len 0})) - (ByteBuffer/wrap (byte-array (.toByteArray baos))))) + (seq (.toByteArray baos)))) (test-common/to-sam-alignment {:qname "r003", :flag 16, :rname "ref", :pos 29, :end 33, :mapq 30, diff --git a/test/cljam/io/bam_test.clj b/test/cljam/io/bam_test.clj index 4cf0b2a2..4743c772 100644 --- a/test/cljam/io/bam_test.clj +++ b/test/cljam/io/bam_test.clj @@ -1,18 +1,18 @@ (ns cljam.io.bam-test "Tests for Processing BAM files." - (:require [clojure.test :refer [deftest are testing]] + (:require [cljam.io.bam.decoder :as decoder] [cljam.io.bam.encoder :as encoder] - [cljam.io.bam.decoder :as decoder]) - (:import [java.nio ByteBuffer])) + [cljam.io.util.byte-buffer :as bb] + [clojure.test :refer [are deftest testing]]) + (:import [java.io ByteArrayOutputStream])) (deftest aux-data-codec-test (testing "non-array types" (are [?type ?value] (= ?value - (as-> (#'encoder/encode-tag-value - (ByteBuffer/allocate 100) ?type ?value) ^ByteBuffer bb - (.position bb 0) - (decoder/parse-tag-single ?type bb))) + (let [baos (ByteArrayOutputStream.)] + (#'encoder/encode-tag-value baos ?type ?value) + (decoder/parse-tag-single ?type (bb/make-lsb-byte-buffer (.toByteArray baos))))) \A \@ \A \A \A \z @@ -52,10 +52,9 @@ (testing "NULL-terminated text" (are [?type ?value] (= ?value - (as-> (#'encoder/encode-tag-value - (ByteBuffer/allocate 100) ?type ?value) ^ByteBuffer bb - (.position bb 0) - (decoder/parse-tag-single ?type bb))) + (let [baos (ByteArrayOutputStream.)] + (#'encoder/encode-tag-value baos ?type ?value) + (decoder/parse-tag-single ?type (bb/make-lsb-byte-buffer (.toByteArray baos))))) \Z "aaaBBB0011223344@@@+++" \Z (str "!\"#$%&'()*+,-./0123456789:;<=>?@" "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"))) @@ -63,11 +62,9 @@ (testing "array types" (are [?value] (= ?value - (as-> - (#'encoder/encode-tag-value - (ByteBuffer/allocate 100) \B ?value) ^ByteBuffer bb - (.position bb 0) - (#'decoder/parse-tag-array bb))) + (let [baos (ByteArrayOutputStream.)] + (#'encoder/encode-tag-value baos \B ?value) + (#'decoder/parse-tag-array (bb/make-lsb-byte-buffer (.toByteArray baos))))) "c,0" "C,0" "s,0" diff --git a/test/cljam/io/bcf/reader_test.clj b/test/cljam/io/bcf/reader_test.clj index 8df3c50e..8dde92be 100644 --- a/test/cljam/io/bcf/reader_test.clj +++ b/test/cljam/io/bcf/reader_test.clj @@ -1,17 +1,15 @@ (ns cljam.io.bcf.reader-test - (:require [clojure.test :refer [deftest are]] - [cljam.io.bcf.reader :as bcf-reader]) - (:import [java.nio ByteBuffer ByteOrder])) + (:require [cljam.io.bcf.reader :as bcf-reader] + [clojure.test :refer [are deftest]]) + (:import [java.io ByteArrayInputStream])) (deftest parse-data-line-deep (are [?bytes ?var] (= ?var (@#'bcf-reader/parse-data-line-deep (@#'bcf-reader/read-data-line-buffer - (doto (ByteBuffer/wrap - (byte-array - (map unchecked-byte ?bytes))) - (.order ByteOrder/LITTLE_ENDIAN))))) + (ByteArrayInputStream. + (byte-array (map unchecked-byte ?bytes)))))) [0x1c 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 diff --git a/test/cljam/io/util/lsb/data_io_test.clj b/test/cljam/io/util/lsb/data_io_test.clj new file mode 100644 index 00000000..d8607121 --- /dev/null +++ b/test/cljam/io/util/lsb/data_io_test.clj @@ -0,0 +1,169 @@ +(ns cljam.io.util.lsb.data-io-test + (:require [cljam.io.util.byte-buffer :as bb] + [cljam.io.util.lsb.data-io :as lsb] + [cljam.test-common :as common] + [clojure.java.io :as cio] + [clojure.test :refer [deftest is]]) + (:import [java.io DataInputStream FileInputStream RandomAccessFile])) + +(deftest about-random-access-file + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} + (let [filename (cio/file common/temp-dir "raf.bin")] + (with-open [raf (RandomAccessFile. filename "rw")] + (let [bb (bb/allocate-lsb-byte-buffer 24)] + (.putLong bb 0x789ABCDEF0123456) + (doseq [c "ABCDEFGH"] + (.put bb (byte (int c)))) + (doseq [c [\I \J \K \L 0 \M \N \O]] + (.put bb (byte (int c)))) + (.write raf (.array bb)))) + (with-open [raf (RandomAccessFile. filename "r")] + (.seek raf 0) + (is (= 0x56 (lsb/read-byte raf))) + (is (= 0x34 (lsb/read-byte raf))) + (is (= 0x12 (lsb/read-byte raf))) + (is (= (unchecked-byte 0xF0) (lsb/read-byte raf))) + (is (= (unchecked-byte 0xDE) (lsb/read-byte raf))) + (is (= (unchecked-byte 0xBC) (lsb/read-byte raf))) + (is (= (unchecked-byte 0x9A) (lsb/read-byte raf))) + (is (= 0x78 (lsb/read-byte raf))) + + (.seek raf 0) + (is (= 0x56 (lsb/read-ubyte raf))) + (is (= 0x34 (lsb/read-ubyte raf))) + (is (= 0x12 (lsb/read-ubyte raf))) + (is (= 0xF0 (lsb/read-ubyte raf))) + (is (= 0xDE (lsb/read-ubyte raf))) + (is (= 0xBC (lsb/read-ubyte raf))) + (is (= 0x9A (lsb/read-ubyte raf))) + (is (= 0x78 (lsb/read-ubyte raf))) + + (.seek raf 0) + (is (= 0x3456 (lsb/read-short raf))) + (is (= (unchecked-short 0xF012) (lsb/read-short raf))) + (is (= (unchecked-short 0xBCDE) (lsb/read-short raf))) + (is (= 0x789A (lsb/read-short raf))) + + (.seek raf 0) + (is (= 0x3456 (lsb/read-ushort raf))) + (is (= 0xF012 (lsb/read-ushort raf))) + (is (= 0xBCDE (lsb/read-ushort raf))) + (is (= 0x789A (lsb/read-ushort raf))) + + (.seek raf 0) + (is (= (unchecked-int 0xF0123456) (lsb/read-int raf))) + (is (= 0x789ABCDE (lsb/read-int raf))) + + (.seek raf 0) + (is (= 0xF0123456 (lsb/read-uint raf))) + (is (= 0x789ABCDE (lsb/read-uint raf))) + + (.seek raf 0) + (is (= 0x789ABCDEF0123456 (lsb/read-long raf))) + + (.seek raf 0) + (is (= (Float/intBitsToFloat (unchecked-int 0xF0123456)) (lsb/read-float raf))) + (is (= (Float/intBitsToFloat 0x789ABCDE) (lsb/read-float raf))) + + (.seek raf 0) + (is (= (Double/longBitsToDouble 0x789ABCDEF0123456) (lsb/read-double raf))) + + (.seek raf 0) + (is (= [0x56 0x34 0x12 0xF0 0xDE 0xBC 0x9A 0x78] + (map #(bit-and % 0xFF) (lsb/read-bytes raf 8)))) + (.seek raf 0) + (is (= [0 0 0x56 0x34 0x12 0] + (map #(bit-and % 0xFF) (lsb/read-bytes raf (byte-array 6) 2 3)))) + + (.seek raf 0) + (lsb/skip raf 8) + (is (= "ABCDEFGH" (lsb/read-string raf 8))) + (is (= "IJKL" (lsb/read-null-terminated-string raf))))))) + +(deftest about-data-input-stream + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} + (let [filename (cio/file common/temp-dir "raf.bin")] + (with-open [raf (RandomAccessFile. filename "rw")] + (let [bb (bb/allocate-lsb-byte-buffer 24)] + (.putLong bb 0x789ABCDEF0123456) + (doseq [c "ABCDEFGH"] + (.put bb (byte (int c)))) + (doseq [c [\I \J \K \L 0 \M \N \O]] + (.put bb (byte (int c)))) + (.write raf (.array bb)))) + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x56 (lsb/read-byte dis))) + (is (= 0x34 (lsb/read-byte dis))) + (is (= 0x12 (lsb/read-byte dis))) + (is (= (unchecked-byte 0xF0) (lsb/read-byte dis))) + (is (= (unchecked-byte 0xDE) (lsb/read-byte dis))) + (is (= (unchecked-byte 0xBC) (lsb/read-byte dis))) + (is (= (unchecked-byte 0x9A) (lsb/read-byte dis))) + (is (= 0x78 (lsb/read-byte dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x56 (lsb/read-ubyte dis))) + (is (= 0x34 (lsb/read-ubyte dis))) + (is (= 0x12 (lsb/read-ubyte dis))) + (is (= 0xF0 (lsb/read-ubyte dis))) + (is (= 0xDE (lsb/read-ubyte dis))) + (is (= 0xBC (lsb/read-ubyte dis))) + (is (= 0x9A (lsb/read-ubyte dis))) + (is (= 0x78 (lsb/read-ubyte dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x3456 (lsb/read-short dis))) + (is (= (unchecked-short 0xF012) (lsb/read-short dis))) + (is (= (unchecked-short 0xBCDE) (lsb/read-short dis))) + (is (= 0x789A (lsb/read-short dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x3456 (lsb/read-ushort dis))) + (is (= 0xF012 (lsb/read-ushort dis))) + (is (= 0xBCDE (lsb/read-ushort dis))) + (is (= 0x789A (lsb/read-ushort dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= (unchecked-int 0xF0123456) (lsb/read-int dis))) + (is (= 0x789ABCDE (lsb/read-int dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0xF0123456 (lsb/read-uint dis))) + (is (= 0x789ABCDE (lsb/read-uint dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x789ABCDEF0123456 (lsb/read-long dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= (Float/intBitsToFloat (unchecked-int 0xF0123456)) (lsb/read-float dis))) + (is (= (Float/intBitsToFloat 0x789ABCDE) (lsb/read-float dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= (Double/longBitsToDouble 0x789ABCDEF0123456) (lsb/read-double dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= [0x56 0x34 0x12 0xF0 0xDE 0xBC 0x9A 0x78] + (map #(bit-and % 0xFF) (lsb/read-bytes dis 8))))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= [0 0 0x56 0x34 0x12 0] + (map #(bit-and % 0xFF) (lsb/read-bytes dis (byte-array 6) 2 3))))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (lsb/skip dis 8) + (is (= "ABCDEFGH" (lsb/read-string dis 8))) + (is (= "IJKL" (lsb/read-null-terminated-string dis))))))) diff --git a/test/cljam/io/util/lsb/io_stream_test.clj b/test/cljam/io/util/lsb/io_stream_test.clj new file mode 100644 index 00000000..ac72bc1f --- /dev/null +++ b/test/cljam/io/util/lsb/io_stream_test.clj @@ -0,0 +1,182 @@ +(ns cljam.io.util.lsb.io-stream-test + (:require [cljam.io.util.byte-buffer :as bb] + [cljam.io.util.lsb.io-stream :as lsb] + [cljam.test-common :as common] + [clojure.java.io :as cio] + [clojure.test :refer [deftest is]]) + (:import [java.io + ByteArrayOutputStream + DataInputStream + FileInputStream + RandomAccessFile])) + +(deftest about-data-input-stream + (common/with-before-after {:before (common/prepare-cache!) + :after (common/clean-cache!)} + (let [filename (cio/file common/temp-dir "raf.bin")] + (with-open [raf (RandomAccessFile. filename "rw")] + (let [bb (bb/allocate-lsb-byte-buffer 24)] + (.putLong bb 0x789ABCDEF0123456) + (doseq [c "ABCDEFGH"] + (.put bb (byte (int c)))) + (doseq [c [\I \J \K \L 0 \M \N \O]] + (.put bb (byte (int c)))) + (.write raf (.array bb)))) + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x56 (lsb/read-byte dis))) + (is (= 0x34 (lsb/read-byte dis))) + (is (= 0x12 (lsb/read-byte dis))) + (is (= (unchecked-byte 0xF0) (lsb/read-byte dis))) + (is (= (unchecked-byte 0xDE) (lsb/read-byte dis))) + (is (= (unchecked-byte 0xBC) (lsb/read-byte dis))) + (is (= (unchecked-byte 0x9A) (lsb/read-byte dis))) + (is (= 0x78 (lsb/read-byte dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x56 (lsb/read-ubyte dis))) + (is (= 0x34 (lsb/read-ubyte dis))) + (is (= 0x12 (lsb/read-ubyte dis))) + (is (= 0xF0 (lsb/read-ubyte dis))) + (is (= 0xDE (lsb/read-ubyte dis))) + (is (= 0xBC (lsb/read-ubyte dis))) + (is (= 0x9A (lsb/read-ubyte dis))) + (is (= 0x78 (lsb/read-ubyte dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x3456 (lsb/read-short dis))) + (is (= (unchecked-short 0xF012) (lsb/read-short dis))) + (is (= (unchecked-short 0xBCDE) (lsb/read-short dis))) + (is (= 0x789A (lsb/read-short dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x3456 (lsb/read-ushort dis))) + (is (= 0xF012 (lsb/read-ushort dis))) + (is (= 0xBCDE (lsb/read-ushort dis))) + (is (= 0x789A (lsb/read-ushort dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= (unchecked-int 0xF0123456) (lsb/read-int dis))) + (is (= 0x789ABCDE (lsb/read-int dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0xF0123456 (lsb/read-uint dis))) + (is (= 0x789ABCDE (lsb/read-uint dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= 0x789ABCDEF0123456 (lsb/read-long dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= (Float/intBitsToFloat (unchecked-int 0xF0123456)) (lsb/read-float dis))) + (is (= (Float/intBitsToFloat 0x789ABCDE) (lsb/read-float dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= (Double/longBitsToDouble 0x789ABCDEF0123456) (lsb/read-double dis)))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= [0x56 0x34 0x12 0xF0 0xDE 0xBC 0x9A 0x78] + (map #(bit-and % 0xFF) (lsb/read-bytes dis 8))))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (is (= [0 0 0x56 0x34 0x12 0] + (map #(bit-and % 0xFF) (lsb/read-bytes dis (byte-array 6) 2 3))))) + + (with-open [fis (FileInputStream. filename) + dis (DataInputStream. fis)] + (lsb/skip dis 8) + (is (= "ABCDEFGH" (lsb/read-string dis 8))) + (is (= "IJKL" (lsb/read-null-terminated-string dis))))))) + +(deftest about-data-output-stream + (let [ba (-> (bb/allocate-lsb-byte-buffer 8) + (.putLong 0x789ABCDEF0123456) + (.array))] + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-ubyte baos 0x56) + (lsb/write-ubyte baos 0x34) + (lsb/write-ubyte baos 0x12) + (lsb/write-ubyte baos 0xF0) + (lsb/write-ubyte baos 0xDE) + (lsb/write-ubyte baos 0xBC) + (lsb/write-ubyte baos 0x9A) + (lsb/write-ubyte baos 0x78) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret))))) + + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-short baos 0x3456) + (lsb/write-short baos (unchecked-short 0xF012)) + (lsb/write-short baos (unchecked-short 0xBCDE)) + (lsb/write-short baos 0x789A) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret))))) + + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-ushort baos 0x3456) + (lsb/write-ushort baos 0xF012) + (lsb/write-ushort baos 0xBCDE) + (lsb/write-ushort baos 0x789A) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret))))) + + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-int baos 0xF0123456) + (lsb/write-int baos 0x789ABCDE) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret))))) + + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-uint baos 0xF0123456) + (lsb/write-uint baos 0x789ABCDE) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret))))) + + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-long baos 0x789ABCDEF0123456) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret))))) + + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-float baos (Float/intBitsToFloat (unchecked-int 0xF0123456))) + (lsb/write-float baos (Float/intBitsToFloat 0x789ABCDE)) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret))))) + + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-double baos (Double/longBitsToDouble 0x789ABCDEF0123456)) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret))))) + + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-bytes baos (byte-array [0x56 0x34 0x12 0xF0 0xDE 0xBC 0x9A 0x78])) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret)))))) + + (let [s "ABCDEFGH" + ba (-> (bb/allocate-lsb-byte-buffer 8) + (.put (.getBytes s)) + (.array))] + (with-open [baos (ByteArrayOutputStream. 8)] + (lsb/write-string baos s) + (let [ret (.toByteArray baos)] + (is (= (count ba) (count ret))) + (is (= (seq ba) (seq ret)))))))