From fbe89665a2eda2b1e77953e91c961458ae64b6cc Mon Sep 17 00:00:00 2001 From: Shogo Ohta Date: Fri, 15 Dec 2023 11:11:23 +0900 Subject: [PATCH] wip --- src/cljam/io/cram/codecs/rans4x8.clj | 83 ++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/cljam/io/cram/codecs/rans4x8.clj diff --git a/src/cljam/io/cram/codecs/rans4x8.clj b/src/cljam/io/cram/codecs/rans4x8.clj new file mode 100644 index 00000000..6d7b83df --- /dev/null +++ b/src/cljam/io/cram/codecs/rans4x8.clj @@ -0,0 +1,83 @@ +(ns cljam.io.cram.codecs.rans4x8 + {:clj-kondo/ignore [:missing-docstring]} + (:require [cljam.io.util.byte-buffer :as bb] + [cljam.io.cram.itf8 :as itf8])) + +(defn read-frequencies0 [bb] + (loop [sym (long (bb/read-ubyte bb)) + rle 0 + freqs (transient {})] + (let [freqs' (assoc! freqs sym (itf8/decode-itf8 bb))] + (if (pos? rle) + (recur (inc sym) (dec rle) freqs') + (let [sym' (long (bb/read-ubyte bb)) + rle' (if (= sym' (inc sym)) + (long (bb/read-ubyte bb)) + rle)] + (if (zero? sym') + (persistent! freqs') + (recur sym' rle' freqs'))))))) + +(defn cumulative-frequencies [freqs] + (loop [i 0 + sum 0 + cum-freqs (transient [])] + (if (< i 256) + (let [f (get freqs i 0)] + (recur (inc i) (+ sum (long f)) (conj! cum-freqs sum))) + (persistent! (conj! cum-freqs sum))))) + +(defn lookup-symbol ^long [cum-freqs ^long f] + (loop [s 0] + (if (>= f (long (nth cum-freqs (inc s)))) + (recur (inc s)) + s)) + #_(loop [l 0 + r (count cum-freqs)] + (if (< l r) + (let [m (+ l (quot (- r l) 2)) + f' (long (nth cum-freqs m))] + (if (<= f f') + (recur l m) + (recur (inc m) r))) + l))) + +(defn advance-step ^long [^long c ^long f ^long state] + (-> (* f (bit-shift-right state 12)) + (+ (bit-and state 0xfff)) + (- c))) + +(defn renormalize-state ^long [bb ^long state] + (loop [state state] + (if (< state 0x800000) + (recur (+ (bit-shift-left state 8) + (long (bb/read-ubyte bb)))) + state))) + +(defn decode-rans0 [bb ^long n-out] + (let [freqs (read-frequencies0 bb) + cum-freqs (cumulative-frequencies freqs) + out (byte-array n-out)] + (loop [i 0 + states (into [] (map (fn [_] (bb/read-uint bb))) (range 4))] + (if (< i n-out) + (let [j (rem i 4) + state (long (nth states j)) + f (bit-and state 0xfff) + sym (lookup-symbol cum-freqs f) + state' (->> state + (advance-step (nth cum-freqs sym) (get freqs sym 0)) + (renormalize-state bb))] + (aset out i (byte sym)) + (recur (inc i) (assoc states j state'))) + out)))) + +(defn decode-rans1 [_in _n-out]) + +(defn decode-rans [bb] + (let [order (long (bb/read-ubyte bb)) + _n-in (bb/read-uint bb) + n-out (bb/read-uint bb)] + (if (zero? order) + (decode-rans0 bb n-out) + (decode-rans1 bb n-out))))