generated from discourse/discourse-plugin-skeleton
-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds a comprehensive quota management system for LLM models that allows: - Setting per-group (applied per user in the group) token and usage limits with configurable durations - Tracking and enforcing token/usage limits across user groups - Quota reset periods (hourly, daily, weekly, or custom) - Admin UI for managing quotas with real-time updates This system provides granular control over LLM API usage by allowing admins to define limits on both total tokens and number of requests per group. Supports multiple concurrent quotas per model and automatically handles quota resets. Co-authored-by: Keegan George <kgeorge13@gmail.com>
- Loading branch information
1 parent
20612fd
commit d07cf51
Showing
50 changed files
with
1,684 additions
and
151 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
app/controllers/discourse_ai/admin/ai_llm_quotas_controller.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseAi | ||
module Admin | ||
class AiLlmQuotasController < ::Admin::AdminController | ||
requires_plugin ::DiscourseAi::PLUGIN_NAME | ||
|
||
def index | ||
quotas = LlmQuota.includes(:group) | ||
|
||
render json: { | ||
quotas: | ||
ActiveModel::ArraySerializer.new(quotas, each_serializer: LlmQuotaSerializer), | ||
} | ||
end | ||
|
||
def create | ||
quota = LlmQuota.new(quota_params) | ||
|
||
if quota.save | ||
render json: LlmQuotaSerializer.new(quota), status: :created | ||
else | ||
render_json_error quota | ||
end | ||
end | ||
|
||
def update | ||
quota = LlmQuota.find(params[:id]) | ||
|
||
if quota.update(quota_params) | ||
render json: LlmQuotaSerializer.new(quota) | ||
else | ||
render_json_error quota | ||
end | ||
end | ||
|
||
def destroy | ||
quota = LlmQuota.find(params[:id]) | ||
quota.destroy! | ||
|
||
head :no_content | ||
rescue ActiveRecord::RecordNotFound | ||
render json: { error: I18n.t("not_found") }, status: 404 | ||
end | ||
|
||
private | ||
|
||
def quota_params | ||
params.require(:quota).permit( | ||
:group_id, | ||
:llm_model_id, | ||
:max_tokens, | ||
:max_usages, | ||
:duration_seconds, | ||
) | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# frozen_string_literal: true | ||
|
||
class LlmQuota < ActiveRecord::Base | ||
self.table_name = "llm_quotas" | ||
|
||
belongs_to :group | ||
belongs_to :llm_model | ||
has_many :llm_quota_usages | ||
|
||
validates :group_id, presence: true | ||
# we can not validate on create cause it breaks build | ||
validates :llm_model_id, presence: true, on: :update | ||
validates :duration_seconds, presence: true, numericality: { greater_than: 0 } | ||
validates :max_tokens, numericality: { only_integer: true, greater_than: 0, allow_nil: true } | ||
validates :max_usages, numericality: { greater_than: 0, allow_nil: true } | ||
|
||
validate :at_least_one_limit | ||
|
||
def self.check_quotas!(llm, user) | ||
return true if user.blank? | ||
quotas = joins(:group).where(llm_model: llm).where(group: user.groups) | ||
|
||
return true if quotas.empty? | ||
errors = | ||
quotas.map do |quota| | ||
usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota) | ||
begin | ||
usage.check_quota! | ||
nil | ||
rescue LlmQuotaUsage::QuotaExceededError => e | ||
e | ||
end | ||
end | ||
|
||
return if errors.include?(nil) | ||
|
||
raise errors.first | ||
end | ||
|
||
def self.log_usage(llm, user, input_tokens, output_tokens) | ||
return if user.blank? | ||
|
||
quotas = joins(:group).where(llm_model: llm).where(group: user.groups) | ||
|
||
quotas.each do |quota| | ||
usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota) | ||
usage.increment_usage!(input_tokens: input_tokens, output_tokens: output_tokens) | ||
end | ||
end | ||
|
||
def available_tokens | ||
max_tokens | ||
end | ||
|
||
def available_usages | ||
max_usages | ||
end | ||
|
||
private | ||
|
||
def at_least_one_limit | ||
if max_tokens.nil? && max_usages.nil? | ||
errors.add(:base, I18n.t("discourse_ai.errors.quota_required")) | ||
end | ||
end | ||
end | ||
|
||
# == Schema Information | ||
# | ||
# Table name: llm_quotas | ||
# | ||
# id :bigint not null, primary key | ||
# group_id :bigint not null | ||
# llm_model_id :bigint not null | ||
# max_tokens :integer | ||
# max_usages :integer | ||
# duration_seconds :integer not null | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# | ||
# Indexes | ||
# | ||
# index_llm_quotas_on_group_id_and_llm_model_id (group_id,llm_model_id) UNIQUE | ||
# index_llm_quotas_on_llm_model_id (llm_model_id) | ||
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
# frozen_string_literal: true | ||
|
||
class LlmQuotaUsage < ActiveRecord::Base | ||
self.table_name = "llm_quota_usages" | ||
|
||
QuotaExceededError = Class.new(StandardError) | ||
|
||
belongs_to :user | ||
belongs_to :llm_quota | ||
|
||
validates :user_id, presence: true | ||
validates :llm_quota_id, presence: true | ||
validates :input_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 } | ||
validates :output_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 } | ||
validates :usages, presence: true, numericality: { greater_than_or_equal_to: 0 } | ||
validates :started_at, presence: true | ||
validates :reset_at, presence: true | ||
|
||
def self.find_or_create_for(user:, llm_quota:) | ||
usage = find_or_initialize_by(user: user, llm_quota: llm_quota) | ||
|
||
if usage.new_record? | ||
now = Time.current | ||
usage.started_at = now | ||
usage.reset_at = now + llm_quota.duration_seconds.seconds | ||
usage.input_tokens_used = 0 | ||
usage.output_tokens_used = 0 | ||
usage.usages = 0 | ||
usage.save! | ||
end | ||
|
||
usage | ||
end | ||
|
||
def reset_if_needed! | ||
return if Time.current < reset_at | ||
|
||
now = Time.current | ||
update!( | ||
input_tokens_used: 0, | ||
output_tokens_used: 0, | ||
usages: 0, | ||
started_at: now, | ||
reset_at: now + llm_quota.duration_seconds.seconds, | ||
) | ||
end | ||
|
||
def increment_usage!(input_tokens:, output_tokens:) | ||
reset_if_needed! | ||
|
||
increment!(:usages) | ||
increment!(:input_tokens_used, input_tokens) | ||
increment!(:output_tokens_used, output_tokens) | ||
end | ||
|
||
def check_quota! | ||
reset_if_needed! | ||
|
||
if quota_exceeded? | ||
raise QuotaExceededError.new( | ||
I18n.t( | ||
"discourse_ai.errors.quota_exceeded", | ||
relative_time: AgeWords.distance_of_time_in_words(reset_at, Time.now), | ||
), | ||
) | ||
end | ||
end | ||
|
||
def quota_exceeded? | ||
return false if !llm_quota | ||
|
||
(llm_quota.max_tokens.present? && total_tokens_used > llm_quota.max_tokens) || | ||
(llm_quota.max_usages.present? && usages > llm_quota.max_usages) | ||
end | ||
|
||
def total_tokens_used | ||
input_tokens_used + output_tokens_used | ||
end | ||
|
||
def remaining_tokens | ||
return nil if llm_quota.max_tokens.nil? | ||
[0, llm_quota.max_tokens - total_tokens_used].max | ||
end | ||
|
||
def remaining_usages | ||
return nil if llm_quota.max_usages.nil? | ||
[0, llm_quota.max_usages - usages].max | ||
end | ||
|
||
def percentage_tokens_used | ||
return 0 if llm_quota.max_tokens.nil? || llm_quota.max_tokens.zero? | ||
[(total_tokens_used.to_f / llm_quota.max_tokens * 100).round, 100].min | ||
end | ||
|
||
def percentage_usages_used | ||
return 0 if llm_quota.max_usages.nil? || llm_quota.max_usages.zero? | ||
[(usages.to_f / llm_quota.max_usages * 100).round, 100].min | ||
end | ||
end | ||
|
||
# == Schema Information | ||
# | ||
# Table name: llm_quota_usages | ||
# | ||
# id :bigint not null, primary key | ||
# user_id :bigint not null | ||
# llm_quota_id :bigint not null | ||
# input_tokens_used :integer not null | ||
# output_tokens_used :integer not null | ||
# usages :integer not null | ||
# started_at :datetime not null | ||
# reset_at :datetime not null | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# | ||
# Indexes | ||
# | ||
# index_llm_quota_usages_on_llm_quota_id (llm_quota_id) | ||
# index_llm_quota_usages_on_user_id_and_llm_quota_id (user_id,llm_quota_id) UNIQUE | ||
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# frozen_string_literal: true | ||
|
||
class LlmQuotaSerializer < ApplicationSerializer | ||
attributes :id, :group_id, :llm_model_id, :max_tokens, :max_usages, :duration_seconds, :group_name | ||
|
||
def group_name | ||
object.group.name | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.