generated from discourse/discourse-plugin-skeleton
-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DEV: Embedding tables' model_id has to be a bigint (#1058)
* DEV: Embedding tables' model_id has to be a bigint * Drop old search_bit indexes * copy rag fragment embeddings created during deploy window
- Loading branch information
1 parent
d07cf51
commit 65bbcd7
Showing
3 changed files
with
139 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# frozen_string_literal: true | ||
|
||
class NewEmbeddingsTables < ActiveRecord::Migration[7.2] | ||
def up | ||
create_table :ai_topics_embeddings, id: false do |t| | ||
t.bigint :topic_id, null: false | ||
t.bigint :model_id, null: false | ||
t.integer :model_version, null: false | ||
t.integer :strategy_id, null: false | ||
t.integer :strategy_version, null: false | ||
t.text :digest, null: false | ||
t.column :embeddings, "halfvec", null: false | ||
t.timestamps | ||
|
||
t.index %i[model_id strategy_id topic_id], | ||
unique: true, | ||
name: "index_ai_topics_embeddings_on_model_strategy_topic" | ||
end | ||
|
||
create_table :ai_posts_embeddings, id: false do |t| | ||
t.bigint :post_id, null: false | ||
t.bigint :model_id, null: false | ||
t.integer :model_version, null: false | ||
t.integer :strategy_id, null: false | ||
t.integer :strategy_version, null: false | ||
t.text :digest, null: false | ||
t.column :embeddings, "halfvec", null: false | ||
t.timestamps | ||
|
||
t.index %i[model_id strategy_id post_id], | ||
unique: true, | ||
name: "index_ai_posts_embeddings_on_model_strategy_post" | ||
end | ||
|
||
create_table :ai_document_fragments_embeddings, id: false do |t| | ||
t.bigint :rag_document_fragment_id, null: false | ||
t.bigint :model_id, null: false | ||
t.integer :model_version, null: false | ||
t.integer :strategy_id, null: false | ||
t.integer :strategy_version, null: false | ||
t.text :digest, null: false | ||
t.column :embeddings, "halfvec", null: false | ||
t.timestamps | ||
|
||
t.index %i[model_id strategy_id rag_document_fragment_id], | ||
unique: true, | ||
name: "index_ai_fragments_embeddings_on_model_strategy_fragment" | ||
end | ||
|
||
# Copied from 20241008054440_create_binary_indexes_for_embeddings | ||
%w[topics posts document_fragments].each do |type| | ||
# our supported embeddings models IDs and dimensions | ||
[ | ||
[1, 768], | ||
[2, 1536], | ||
[3, 1024], | ||
[4, 1024], | ||
[5, 768], | ||
[6, 1536], | ||
[7, 2000], | ||
[8, 1024], | ||
].each { |model_id, dimensions| execute <<-SQL } | ||
CREATE INDEX ai_#{type}_embeddings_#{model_id}_1_search_bit ON ai_#{type}_embeddings | ||
USING hnsw ((binary_quantize(embeddings)::bit(#{dimensions})) bit_hamming_ops) | ||
WHERE model_id = #{model_id} AND strategy_id = 1; | ||
SQL | ||
end | ||
|
||
# Copy data from old tables to new tables | ||
execute <<-SQL | ||
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) | ||
SELECT * FROM ai_topic_embeddings; | ||
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) | ||
SELECT * FROM ai_post_embeddings; | ||
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) | ||
SELECT * FROM ai_document_fragment_embeddings; | ||
SQL | ||
end | ||
|
||
def down | ||
raise ActiveRecord::IrreversibleMigration | ||
end | ||
end |
51 changes: 51 additions & 0 deletions
51
db/post_migrate/20250113171444_drop_old_embedding_tables.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# frozen_string_literal: true | ||
class DropOldEmbeddingTables < ActiveRecord::Migration[7.2] | ||
def up | ||
# Copy rag embeddings created during deploy. | ||
execute <<~SQL | ||
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) | ||
( | ||
SELECT ai_document_fragment_embeddings.* | ||
FROM ai_document_fragment_embeddings | ||
LEFT OUTER JOIN ai_document_fragments_embeddings ON ai_document_fragment_embeddings.rag_document_fragment_id = ai_document_fragments_embeddings.rag_document_fragment_id | ||
WHERE ai_document_fragments_embeddings.rag_document_fragment_id IS NULL | ||
) | ||
SQL | ||
|
||
execute <<~SQL | ||
DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit; | ||
DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit; | ||
DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit; | ||
DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit; | ||
DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit; | ||
DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit; | ||
DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit; | ||
DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit; | ||
DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit; | ||
DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit; | ||
DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit; | ||
DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit; | ||
DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit; | ||
DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit; | ||
DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit; | ||
DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit; | ||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit; | ||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit; | ||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit; | ||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit; | ||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit; | ||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit; | ||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit; | ||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit; | ||
SQL | ||
|
||
drop_table :ai_topic_embeddings | ||
drop_table :ai_post_embeddings | ||
drop_table :ai_document_fragment_embeddings | ||
end | ||
|
||
def down | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters