Skip to content

Commit

Permalink
fix: confidence is set properly after categorization
Browse files Browse the repository at this point in the history
  • Loading branch information
ozangulle committed Feb 21, 2025
1 parent 1008ab5 commit b8755ed
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 14 deletions.
10 changes: 6 additions & 4 deletions src/plauna/analysis.clj
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
GISTrainer/MAXENT_VALUE
"")

(defn serialize-model! [^DoccatModel model ^OutputStream os]
(defn serialize-and-write-model! [^DoccatModel model ^OutputStream os]
(when (some? model) (.serialize model os)))

(defn train-data [training-files]
Expand All @@ -71,9 +71,11 @@
(defn categorize [text ^File model-file]
(if (.exists model-file)
(let [doccat (DocumentCategorizerME. (DoccatModel. model-file))
probabilities (.categorize doccat (into-array String (cs/split text #" ")))]
(if (> (get probabilities 0) (p/categorization-threshold))
{:name (.getBestCategory doccat probabilities) :confidence (get probabilities 0)}
cat-results (.categorize doccat (into-array String (cs/split text #" ")))
best-category (.getBestCategory doccat cat-results)
best-probability (get cat-results (.getIndex doccat best-category))]
(if (> best-probability (p/categorization-threshold))
{:name best-category :confidence best-probability}
{:name nil :confidence 0}))
{:name nil :confidence 0}))

Expand Down
8 changes: 4 additions & 4 deletions src/plauna/client.clj
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@
(async/go-loop [event (async/<! local-chan)]
(when (some? event)
(when (and (true? (:refolder (:options event))) (some? (:category (:metadata (:payload event)))))
(let [message-id (-> event :payload :header :message-id)
category-name (-> event :payload :metadata :category)]
(let [message-id (get-in event [:payload :header :message-id])
category-name (get-in event [:payload :metadata :category])]
(when (some? category-name)
(t/log! :info (str "Moving email: " (-> event :payload :header :subject) " categorized as: " (-> event :payload :metadata :category)))
(try (move-messages-by-id (-> event :options :store) message-id (-> event :options :original-folder) category-name)
(t/log! :info (str "Moving email: " (get-in event [:payload :header :subject]) " categorized as: " (get-in event [:payload :metadata :category])))
(try (move-messages-by-id (get-in event [:options :store]) message-id (get-in event [:options :original-folder]) category-name)
(catch Exception e (t/log! :error (.getMessage e)))))))
(recur (async/<! local-chan))))))

Expand Down
2 changes: 1 addition & 1 deletion src/plauna/parser.clj
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@
(construct-email headers (flatten (parse-body (:message-id headers) [] message)) participants)))

(defn with-message-id? [parsed-email]
(let [message-id (-> parsed-email :header :message-id)]
(let [message-id (get-in parsed-email [:header :message-id])]
(if (or (nil? message-id) (empty? message-id))
(do (t/log! :error ["Dropping parsed-email with headers" (into {} (:header parsed-email)) "Reason: message-id is empty"])
false)
Expand Down
10 changes: 5 additions & 5 deletions src/plauna/server.clj
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@

(defn redirect-request
([request]
(let [redirect-url (-> request :params :redirect-url)]
(let [redirect-url (get-in request [:params :redirect-url])]
(if (some? redirect-url)
{:status 301 :headers {"Location" redirect-url}}
{:status 301 :headers {"Location" (-> request :uri)}})))
([request messages]
(swap! global-messages (fn [m] (conj m messages)))
(let [redirect-url (-> request :params :redirect-url)]
(let [redirect-url (get-in request [:params :redirect-url])]
(if (some? redirect-url)
{:status 301 :headers {"Location" redirect-url}}
{:status 301 :headers {"Location" (-> request :uri)}}))))
Expand Down Expand Up @@ -117,7 +117,7 @@
(do (write-all-categorized-emails-to-training-files)
(doseq [training-model (analysis/train-data (files/training-files))]
(let [os (io/output-stream (files/model-file (:language training-model)))]
(analysis/serialize-model! (:model training-model) os))))
(analysis/serialize-and-write-model! (:model training-model) os))))
{:type :alert :content "There are no selected languages to train in. Cannot proceed."}))

(defn categorize-content [content language] ;; FIXME This kills the process if content is nil
Expand Down Expand Up @@ -225,7 +225,7 @@
(success-html-with-body (markup/administration))))

(comp/POST "/emails/parse" request
(let [temp-file (:tempfile (:filename (:params request)))]
(let [temp-file (get-in request [:params :filename :tempfile])]
(files/read-emails-from-mbox (io/input-stream temp-file) @messaging/main-chan)
(redirect-request request {:type :success :content (str "Starting to parse file: " temp-file)})))

Expand Down Expand Up @@ -261,7 +261,7 @@

(comp/POST "/admin/categories" {params :params}
(db/create-category (:name params))
(doseq [client-config (:clients (:email (files/config)))]
(doseq [client-config (get-in (files/config) [:email :clients])]
(client/initialize-client-setup! client-config))
{:status 301
:headers {"Location" "/admin/categories"}
Expand Down

0 comments on commit b8755ed

Please sign in to comment.