Skip to content

Commit

Permalink
FEATURE: llm quotas (#1047)
Browse files Browse the repository at this point in the history
Adds a comprehensive quota management system for LLM models that allows:

- Setting per-group (applied per user in the group) token and usage limits with configurable durations
- Tracking and enforcing token/usage limits across user groups
- Quota reset periods (hourly, daily, weekly, or custom)
-  Admin UI for managing quotas with real-time updates

This system provides granular control over LLM API usage by allowing admins
to define limits on both total tokens and number of requests per group.
Supports multiple concurrent quotas per model and automatically handles
quota resets.


Co-authored-by: Keegan George <kgeorge13@gmail.com>
  • Loading branch information
SamSaffron and keegangeorge authored Jan 14, 2025
1 parent 20612fd commit d07cf51
Show file tree
Hide file tree
Showing 50 changed files with 1,684 additions and 151 deletions.
1 change: 1 addition & 0 deletions .discourse-compatibility
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
< 3.4.0.beta4-dev: 20612fde52d3f740cad64823ef8aadb0748b567f
< 3.4.0.beta3-dev: decf1bb49d737ea15308400f22f89d1d1e71d13d
< 3.4.0.beta1-dev: 9d887ad4ace8e33c3fe7dbb39237e882c08b4f0b
< 3.3.0.beta5-dev: 4d8090002f6dcd8e34d41033606bf131fa221475
Expand Down
59 changes: 59 additions & 0 deletions app/controllers/discourse_ai/admin/ai_llm_quotas_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# frozen_string_literal: true

module DiscourseAi
module Admin
class AiLlmQuotasController < ::Admin::AdminController
requires_plugin ::DiscourseAi::PLUGIN_NAME

def index
quotas = LlmQuota.includes(:group)

render json: {
quotas:
ActiveModel::ArraySerializer.new(quotas, each_serializer: LlmQuotaSerializer),
}
end

def create
quota = LlmQuota.new(quota_params)

if quota.save
render json: LlmQuotaSerializer.new(quota), status: :created
else
render_json_error quota
end
end

def update
quota = LlmQuota.find(params[:id])

if quota.update(quota_params)
render json: LlmQuotaSerializer.new(quota)
else
render_json_error quota
end
end

def destroy
quota = LlmQuota.find(params[:id])
quota.destroy!

head :no_content
rescue ActiveRecord::RecordNotFound
render json: { error: I18n.t("not_found") }, status: 404
end

private

def quota_params
params.require(:quota).permit(
:group_id,
:llm_model_id,
:max_tokens,
:max_usages,
:duration_seconds,
)
end
end
end
end
39 changes: 38 additions & 1 deletion app/controllers/discourse_ai/admin/ai_llms_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class AiLlmsController < ::Admin::AdminController
requires_plugin ::DiscourseAi::PLUGIN_NAME

def index
llms = LlmModel.all.order(:display_name)
llms = LlmModel.all.includes(:llm_quotas).order(:display_name)

render json: {
ai_llms:
Expand Down Expand Up @@ -40,6 +40,11 @@ def edit

def create
llm_model = LlmModel.new(ai_llm_params)

# we could do nested attributes but the mechanics are not ideal leading
# to lots of complex debugging, this is simpler
quota_params.each { |quota| llm_model.llm_quotas.build(quota) } if quota_params

if llm_model.save
llm_model.toggle_companion_user
render json: LlmModelSerializer.new(llm_model), status: :created
Expand All @@ -51,6 +56,25 @@ def create
def update
llm_model = LlmModel.find(params[:id])

if params[:ai_llm].key?(:llm_quotas)
if quota_params
existing_quota_group_ids = llm_model.llm_quotas.pluck(:group_id)
new_quota_group_ids = quota_params.map { |q| q[:group_id] }

llm_model
.llm_quotas
.where(group_id: existing_quota_group_ids - new_quota_group_ids)
.destroy_all

quota_params.each do |quota_param|
quota = llm_model.llm_quotas.find_or_initialize_by(group_id: quota_param[:group_id])
quota.update!(quota_param)
end
else
llm_model.llm_quotas.destroy_all
end
end

if llm_model.seeded?
return render_json_error(I18n.t("discourse_ai.llm.cannot_edit_builtin"), status: 403)
end
Expand Down Expand Up @@ -110,6 +134,19 @@ def test

private

def quota_params
if params[:ai_llm][:llm_quotas].present?
params[:ai_llm][:llm_quotas].map do |quota|
mapped = {}
mapped[:group_id] = quota[:group_id].to_i
mapped[:max_tokens] = quota[:max_tokens].to_i if quota[:max_tokens].present?
mapped[:max_usages] = quota[:max_usages].to_i if quota[:max_usages].present?
mapped[:duration_seconds] = quota[:duration_seconds].to_i
mapped
end
end
end

def ai_llm_params(updating: nil)
return {} if params[:ai_llm].blank?

Expand Down
1 change: 1 addition & 0 deletions app/models/llm_model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ class LlmModel < ActiveRecord::Base
FIRST_BOT_USER_ID = -1200
BEDROCK_PROVIDER_NAME = "aws_bedrock"

has_many :llm_quotas, dependent: :destroy
belongs_to :user

validates :display_name, presence: true, length: { maximum: 100 }
Expand Down
85 changes: 85 additions & 0 deletions app/models/llm_quota.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# frozen_string_literal: true

class LlmQuota < ActiveRecord::Base
self.table_name = "llm_quotas"

belongs_to :group
belongs_to :llm_model
has_many :llm_quota_usages

validates :group_id, presence: true
# we can not validate on create cause it breaks build
validates :llm_model_id, presence: true, on: :update
validates :duration_seconds, presence: true, numericality: { greater_than: 0 }
validates :max_tokens, numericality: { only_integer: true, greater_than: 0, allow_nil: true }
validates :max_usages, numericality: { greater_than: 0, allow_nil: true }

validate :at_least_one_limit

def self.check_quotas!(llm, user)
return true if user.blank?
quotas = joins(:group).where(llm_model: llm).where(group: user.groups)

return true if quotas.empty?
errors =
quotas.map do |quota|
usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
begin
usage.check_quota!
nil
rescue LlmQuotaUsage::QuotaExceededError => e
e
end
end

return if errors.include?(nil)

raise errors.first
end

def self.log_usage(llm, user, input_tokens, output_tokens)
return if user.blank?

quotas = joins(:group).where(llm_model: llm).where(group: user.groups)

quotas.each do |quota|
usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
usage.increment_usage!(input_tokens: input_tokens, output_tokens: output_tokens)
end
end

def available_tokens
max_tokens
end

def available_usages
max_usages
end

private

def at_least_one_limit
if max_tokens.nil? && max_usages.nil?
errors.add(:base, I18n.t("discourse_ai.errors.quota_required"))
end
end
end

# == Schema Information
#
# Table name: llm_quotas
#
# id :bigint not null, primary key
# group_id :bigint not null
# llm_model_id :bigint not null
# max_tokens :integer
# max_usages :integer
# duration_seconds :integer not null
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_llm_quotas_on_group_id_and_llm_model_id (group_id,llm_model_id) UNIQUE
# index_llm_quotas_on_llm_model_id (llm_model_id)
#
120 changes: 120 additions & 0 deletions app/models/llm_quota_usage.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# frozen_string_literal: true

class LlmQuotaUsage < ActiveRecord::Base
self.table_name = "llm_quota_usages"

QuotaExceededError = Class.new(StandardError)

belongs_to :user
belongs_to :llm_quota

validates :user_id, presence: true
validates :llm_quota_id, presence: true
validates :input_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
validates :output_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
validates :usages, presence: true, numericality: { greater_than_or_equal_to: 0 }
validates :started_at, presence: true
validates :reset_at, presence: true

def self.find_or_create_for(user:, llm_quota:)
usage = find_or_initialize_by(user: user, llm_quota: llm_quota)

if usage.new_record?
now = Time.current
usage.started_at = now
usage.reset_at = now + llm_quota.duration_seconds.seconds
usage.input_tokens_used = 0
usage.output_tokens_used = 0
usage.usages = 0
usage.save!
end

usage
end

def reset_if_needed!
return if Time.current < reset_at

now = Time.current
update!(
input_tokens_used: 0,
output_tokens_used: 0,
usages: 0,
started_at: now,
reset_at: now + llm_quota.duration_seconds.seconds,
)
end

def increment_usage!(input_tokens:, output_tokens:)
reset_if_needed!

increment!(:usages)
increment!(:input_tokens_used, input_tokens)
increment!(:output_tokens_used, output_tokens)
end

def check_quota!
reset_if_needed!

if quota_exceeded?
raise QuotaExceededError.new(
I18n.t(
"discourse_ai.errors.quota_exceeded",
relative_time: AgeWords.distance_of_time_in_words(reset_at, Time.now),
),
)
end
end

def quota_exceeded?
return false if !llm_quota

(llm_quota.max_tokens.present? && total_tokens_used > llm_quota.max_tokens) ||
(llm_quota.max_usages.present? && usages > llm_quota.max_usages)
end

def total_tokens_used
input_tokens_used + output_tokens_used
end

def remaining_tokens
return nil if llm_quota.max_tokens.nil?
[0, llm_quota.max_tokens - total_tokens_used].max
end

def remaining_usages
return nil if llm_quota.max_usages.nil?
[0, llm_quota.max_usages - usages].max
end

def percentage_tokens_used
return 0 if llm_quota.max_tokens.nil? || llm_quota.max_tokens.zero?
[(total_tokens_used.to_f / llm_quota.max_tokens * 100).round, 100].min
end

def percentage_usages_used
return 0 if llm_quota.max_usages.nil? || llm_quota.max_usages.zero?
[(usages.to_f / llm_quota.max_usages * 100).round, 100].min
end
end

# == Schema Information
#
# Table name: llm_quota_usages
#
# id :bigint not null, primary key
# user_id :bigint not null
# llm_quota_id :bigint not null
# input_tokens_used :integer not null
# output_tokens_used :integer not null
# usages :integer not null
# started_at :datetime not null
# reset_at :datetime not null
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_llm_quota_usages_on_llm_quota_id (llm_quota_id)
# index_llm_quota_usages_on_user_id_and_llm_quota_id (user_id,llm_quota_id) UNIQUE
#
1 change: 1 addition & 0 deletions app/serializers/llm_model_serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class LlmModelSerializer < ApplicationSerializer
:used_by

has_one :user, serializer: BasicUserSerializer, embed: :object
has_many :llm_quotas, serializer: LlmQuotaSerializer, embed: :objects

def used_by
llm_usage =
Expand Down
9 changes: 9 additions & 0 deletions app/serializers/llm_quota_serializer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# frozen_string_literal: true

class LlmQuotaSerializer < ApplicationSerializer
attributes :id, :group_id, :llm_model_id, :max_tokens, :max_usages, :duration_seconds, :group_name

def group_name
object.group.name
end
end
2 changes: 1 addition & 1 deletion assets/javascripts/discourse/admin/models/ai-llm.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export default class AiLlm extends RestModel {
updateProperties() {
const attrs = this.createProperties();
attrs.id = this.id;

attrs.llm_quotas = this.llm_quotas;
return attrs;
}

Expand Down
Loading

0 comments on commit d07cf51

Please sign in to comment.