FEATURE: llm quotas (#1047)

Adds a comprehensive quota management system for LLM models that allows: - Setting per-group (applied per user in the group) token and usage limits with configurable durations - Tracking and enforcing token/usage limits across user groups - Quota reset periods (hourly, daily, weekly, or custom) - Admin UI for managing quotas with real-time updates This system provides granular control over LLM API usage by allowing admins to define limits on both total tokens and number of requests per group. Supports multiple concurrent quotas per model and automatically handles quota resets. Co-authored-by: Keegan George <kgeorge13@gmail.com>
discourse · Jan 14, 2025 · d07cf51 · d07cf51
1 parent 20612fd
commit d07cf51
Show file tree

Hide file tree

Showing 50 changed files with 1,684 additions and 151 deletions.
diff --git a/.discourse-compatibility b/.discourse-compatibility
@@ -1,3 +1,4 @@
+< 3.4.0.beta4-dev: 20612fde52d3f740cad64823ef8aadb0748b567f
 < 3.4.0.beta3-dev: decf1bb49d737ea15308400f22f89d1d1e71d13d
 < 3.4.0.beta1-dev: 9d887ad4ace8e33c3fe7dbb39237e882c08b4f0b
 < 3.3.0.beta5-dev: 4d8090002f6dcd8e34d41033606bf131fa221475

diff --git a/app/controllers/discourse_ai/admin/ai_llm_quotas_controller.rb b/app/controllers/discourse_ai/admin/ai_llm_quotas_controller.rb
@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Admin
+    class AiLlmQuotasController < ::Admin::AdminController
+      requires_plugin ::DiscourseAi::PLUGIN_NAME
+
+      def index
+        quotas = LlmQuota.includes(:group)
+
+        render json: {
+                 quotas:
+                   ActiveModel::ArraySerializer.new(quotas, each_serializer: LlmQuotaSerializer),
+               }
+      end
+
+      def create
+        quota = LlmQuota.new(quota_params)
+
+        if quota.save
+          render json: LlmQuotaSerializer.new(quota), status: :created
+        else
+          render_json_error quota
+        end
+      end
+
+      def update
+        quota = LlmQuota.find(params[:id])
+
+        if quota.update(quota_params)
+          render json: LlmQuotaSerializer.new(quota)
+        else
+          render_json_error quota
+        end
+      end
+
+      def destroy
+        quota = LlmQuota.find(params[:id])
+        quota.destroy!
+
+        head :no_content
+      rescue ActiveRecord::RecordNotFound
+        render json: { error: I18n.t("not_found") }, status: 404
+      end
+
+      private
+
+      def quota_params
+        params.require(:quota).permit(
+          :group_id,
+          :llm_model_id,
+          :max_tokens,
+          :max_usages,
+          :duration_seconds,
+        )
+      end
+    end
+  end
+end
diff --git a/app/controllers/discourse_ai/admin/ai_llms_controller.rb b/app/controllers/discourse_ai/admin/ai_llms_controller.rb
@@ -6,7 +6,7 @@ class AiLlmsController < ::Admin::AdminController
       requires_plugin ::DiscourseAi::PLUGIN_NAME
 
       def index
-        llms = LlmModel.all.order(:display_name)
+        llms = LlmModel.all.includes(:llm_quotas).order(:display_name)
 
         render json: {
                  ai_llms:
@@ -40,6 +40,11 @@ def edit
 
       def create
         llm_model = LlmModel.new(ai_llm_params)
+
+        # we could do nested attributes but the mechanics are not ideal leading
+        # to lots of complex debugging, this is simpler
+        quota_params.each { |quota| llm_model.llm_quotas.build(quota) } if quota_params
+
         if llm_model.save
           llm_model.toggle_companion_user
           render json: LlmModelSerializer.new(llm_model), status: :created
@@ -51,6 +56,25 @@ def create
       def update
         llm_model = LlmModel.find(params[:id])
 
+        if params[:ai_llm].key?(:llm_quotas)
+          if quota_params
+            existing_quota_group_ids = llm_model.llm_quotas.pluck(:group_id)
+            new_quota_group_ids = quota_params.map { |q| q[:group_id] }
+
+            llm_model
+              .llm_quotas
+              .where(group_id: existing_quota_group_ids - new_quota_group_ids)
+              .destroy_all
+
+            quota_params.each do |quota_param|
+              quota = llm_model.llm_quotas.find_or_initialize_by(group_id: quota_param[:group_id])
+              quota.update!(quota_param)
+            end
+          else
+            llm_model.llm_quotas.destroy_all
+          end
+        end
+
         if llm_model.seeded?
           return render_json_error(I18n.t("discourse_ai.llm.cannot_edit_builtin"), status: 403)
         end
@@ -110,6 +134,19 @@ def test
 
       private
 
+      def quota_params
+        if params[:ai_llm][:llm_quotas].present?
+          params[:ai_llm][:llm_quotas].map do |quota|
+            mapped = {}
+            mapped[:group_id] = quota[:group_id].to_i
+            mapped[:max_tokens] = quota[:max_tokens].to_i if quota[:max_tokens].present?
+            mapped[:max_usages] = quota[:max_usages].to_i if quota[:max_usages].present?
+            mapped[:duration_seconds] = quota[:duration_seconds].to_i
+            mapped
+          end
+        end
+      end
+
       def ai_llm_params(updating: nil)
         return {} if params[:ai_llm].blank?
 

diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
@@ -4,6 +4,7 @@ class LlmModel < ActiveRecord::Base
   FIRST_BOT_USER_ID = -1200
   BEDROCK_PROVIDER_NAME = "aws_bedrock"
 
+  has_many :llm_quotas, dependent: :destroy
   belongs_to :user
 
   validates :display_name, presence: true, length: { maximum: 100 }

diff --git a/app/models/llm_quota.rb b/app/models/llm_quota.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+class LlmQuota < ActiveRecord::Base
+  self.table_name = "llm_quotas"
+
+  belongs_to :group
+  belongs_to :llm_model
+  has_many :llm_quota_usages
+
+  validates :group_id, presence: true
+  # we can not validate on create cause it breaks build
+  validates :llm_model_id, presence: true, on: :update
+  validates :duration_seconds, presence: true, numericality: { greater_than: 0 }
+  validates :max_tokens, numericality: { only_integer: true, greater_than: 0, allow_nil: true }
+  validates :max_usages, numericality: { greater_than: 0, allow_nil: true }
+
+  validate :at_least_one_limit
+
+  def self.check_quotas!(llm, user)
+    return true if user.blank?
+    quotas = joins(:group).where(llm_model: llm).where(group: user.groups)
+
+    return true if quotas.empty?
+    errors =
+      quotas.map do |quota|
+        usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
+        begin
+          usage.check_quota!
+          nil
+        rescue LlmQuotaUsage::QuotaExceededError => e
+          e
+        end
+      end
+
+    return if errors.include?(nil)
+
+    raise errors.first
+  end
+
+  def self.log_usage(llm, user, input_tokens, output_tokens)
+    return if user.blank?
+
+    quotas = joins(:group).where(llm_model: llm).where(group: user.groups)
+
+    quotas.each do |quota|
+      usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
+      usage.increment_usage!(input_tokens: input_tokens, output_tokens: output_tokens)
+    end
+  end
+
+  def available_tokens
+    max_tokens
+  end
+
+  def available_usages
+    max_usages
+  end
+
+  private
+
+  def at_least_one_limit
+    if max_tokens.nil? && max_usages.nil?
+      errors.add(:base, I18n.t("discourse_ai.errors.quota_required"))
+    end
+  end
+end
+
+# == Schema Information
+#
+# Table name: llm_quotas
+#
+#  id               :bigint           not null, primary key
+#  group_id         :bigint           not null
+#  llm_model_id     :bigint           not null
+#  max_tokens       :integer
+#  max_usages       :integer
+#  duration_seconds :integer          not null
+#  created_at       :datetime         not null
+#  updated_at       :datetime         not null
+#
+# Indexes
+#
+#  index_llm_quotas_on_group_id_and_llm_model_id  (group_id,llm_model_id) UNIQUE
+#  index_llm_quotas_on_llm_model_id               (llm_model_id)
+#
diff --git a/app/models/llm_quota_usage.rb b/app/models/llm_quota_usage.rb
@@ -0,0 +1,120 @@
+# frozen_string_literal: true
+
+class LlmQuotaUsage < ActiveRecord::Base
+  self.table_name = "llm_quota_usages"
+
+  QuotaExceededError = Class.new(StandardError)
+
+  belongs_to :user
+  belongs_to :llm_quota
+
+  validates :user_id, presence: true
+  validates :llm_quota_id, presence: true
+  validates :input_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :output_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :usages, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :started_at, presence: true
+  validates :reset_at, presence: true
+
+  def self.find_or_create_for(user:, llm_quota:)
+    usage = find_or_initialize_by(user: user, llm_quota: llm_quota)
+
+    if usage.new_record?
+      now = Time.current
+      usage.started_at = now
+      usage.reset_at = now + llm_quota.duration_seconds.seconds
+      usage.input_tokens_used = 0
+      usage.output_tokens_used = 0
+      usage.usages = 0
+      usage.save!
+    end
+
+    usage
+  end
+
+  def reset_if_needed!
+    return if Time.current < reset_at
+
+    now = Time.current
+    update!(
+      input_tokens_used: 0,
+      output_tokens_used: 0,
+      usages: 0,
+      started_at: now,
+      reset_at: now + llm_quota.duration_seconds.seconds,
+    )
+  end
+
+  def increment_usage!(input_tokens:, output_tokens:)
+    reset_if_needed!
+
+    increment!(:usages)
+    increment!(:input_tokens_used, input_tokens)
+    increment!(:output_tokens_used, output_tokens)
+  end
+
+  def check_quota!
+    reset_if_needed!
+
+    if quota_exceeded?
+      raise QuotaExceededError.new(
+              I18n.t(
+                "discourse_ai.errors.quota_exceeded",
+                relative_time: AgeWords.distance_of_time_in_words(reset_at, Time.now),
+              ),
+            )
+    end
+  end
+
+  def quota_exceeded?
+    return false if !llm_quota
+
+    (llm_quota.max_tokens.present? && total_tokens_used > llm_quota.max_tokens) ||
+      (llm_quota.max_usages.present? && usages > llm_quota.max_usages)
+  end
+
+  def total_tokens_used
+    input_tokens_used + output_tokens_used
+  end
+
+  def remaining_tokens
+    return nil if llm_quota.max_tokens.nil?
+    [0, llm_quota.max_tokens - total_tokens_used].max
+  end
+
+  def remaining_usages
+    return nil if llm_quota.max_usages.nil?
+    [0, llm_quota.max_usages - usages].max
+  end
+
+  def percentage_tokens_used
+    return 0 if llm_quota.max_tokens.nil? || llm_quota.max_tokens.zero?
+    [(total_tokens_used.to_f / llm_quota.max_tokens * 100).round, 100].min
+  end
+
+  def percentage_usages_used
+    return 0 if llm_quota.max_usages.nil? || llm_quota.max_usages.zero?
+    [(usages.to_f / llm_quota.max_usages * 100).round, 100].min
+  end
+end
+
+# == Schema Information
+#
+# Table name: llm_quota_usages
+#
+#  id                 :bigint           not null, primary key
+#  user_id            :bigint           not null
+#  llm_quota_id       :bigint           not null
+#  input_tokens_used  :integer          not null
+#  output_tokens_used :integer          not null
+#  usages             :integer          not null
+#  started_at         :datetime         not null
+#  reset_at           :datetime         not null
+#  created_at         :datetime         not null
+#  updated_at         :datetime         not null
+#
+# Indexes
+#
+#  index_llm_quota_usages_on_llm_quota_id              (llm_quota_id)
+#  index_llm_quota_usages_on_user_id_and_llm_quota_id  (user_id,llm_quota_id) UNIQUE
+#
diff --git a/app/serializers/llm_model_serializer.rb b/app/serializers/llm_model_serializer.rb
@@ -20,6 +20,7 @@ class LlmModelSerializer < ApplicationSerializer
              :used_by
 
   has_one :user, serializer: BasicUserSerializer, embed: :object
+  has_many :llm_quotas, serializer: LlmQuotaSerializer, embed: :objects
 
   def used_by
     llm_usage =

diff --git a/app/serializers/llm_quota_serializer.rb b/app/serializers/llm_quota_serializer.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+class LlmQuotaSerializer < ApplicationSerializer
+  attributes :id, :group_id, :llm_model_id, :max_tokens, :max_usages, :duration_seconds, :group_name
+
+  def group_name
+    object.group.name
+  end
+end
diff --git a/assets/javascripts/discourse/admin/models/ai-llm.js b/assets/javascripts/discourse/admin/models/ai-llm.js
@@ -21,7 +21,7 @@ export default class AiLlm extends RestModel {
   updateProperties() {
     const attrs = this.createProperties();
     attrs.id = this.id;
-
+    attrs.llm_quotas = this.llm_quotas;
     return attrs;
   }