Skip to content

Commit

Permalink
DEV: Migration to remove old embeddings tables~ (#1067)
Browse files Browse the repository at this point in the history
* DEV: Migration to remove old embeddings tables~

* Check for table existence
  • Loading branch information
romanrizzi authored Jan 14, 2025
1 parent c4d2b7d commit 65456c8
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 43 deletions.
36 changes: 19 additions & 17 deletions db/migrate/20250114160417_backfill_topic_embeddings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,26 @@ class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2]
disable_ddl_transaction!

def up
loop do
count = execute(<<~SQL).cmd_tuples
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT source.*
FROM (
SELECT old_table.*
FROM ai_topic_embeddings old_table
LEFT JOIN ai_topics_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.topic_id = old_table.topic_id
)
WHERE target.topic_id IS NULL
LIMIT 10000
) source
SQL
if table_exists?(:ai_topic_embeddings)
loop do
count = execute(<<~SQL).cmd_tuples
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT source.*
FROM (
SELECT old_table.*
FROM ai_topic_embeddings old_table
LEFT JOIN ai_topics_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.topic_id = old_table.topic_id
)
WHERE target.topic_id IS NULL
LIMIT 10000
) source
SQL

break if count == 0
break if count == 0
end
end
end

Expand Down
38 changes: 20 additions & 18 deletions db/migrate/20250114160446_backfill_post_embeddings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,28 @@ class BackfillPostEmbeddings < ActiveRecord::Migration[7.2]
disable_ddl_transaction!

def up
# Copy data from old tables to new tables in batches.
if table_exists?(:ai_post_embeddings)
# Copy data from old tables to new tables in batches.

loop do
count = execute(<<~SQL).cmd_tuples
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT source.*
FROM (
SELECT old_table.*
FROM ai_post_embeddings old_table
LEFT JOIN ai_posts_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.post_id = old_table.post_id
)
WHERE target.post_id IS NULL
LIMIT 10000
) source
SQL
loop do
count = execute(<<~SQL).cmd_tuples
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT source.*
FROM (
SELECT old_table.*
FROM ai_post_embeddings old_table
LEFT JOIN ai_posts_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.post_id = old_table.post_id
)
WHERE target.post_id IS NULL
LIMIT 10000
) source
SQL

break if count == 0
break if count == 0
end
end
end

Expand Down
18 changes: 10 additions & 8 deletions db/migrate/20250114160500_backfill_rag_embeddings.rb
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
# frozen_string_literal: true
class BackfillRagEmbeddings < ActiveRecord::Migration[7.2]
def up
not_backfilled =
DB.query_single("SELECT COUNT(*) FROM ai_document_fragments_embeddings").first.to_i == 0
if table_exists?(:ai_document_fragment_embeddings)
not_backfilled =
DB.query_single("SELECT COUNT(*) FROM ai_document_fragments_embeddings").first.to_i == 0

if not_backfilled
# Copy data from old tables to new tables
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_document_fragment_embeddings;
SQL
if not_backfilled
# Copy data from old tables to new tables
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_document_fragment_embeddings;
SQL
end
end
end

Expand Down
53 changes: 53 additions & 0 deletions db/post_migrate/20250114184356_drop_old_embedding_tables2.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# frozen_string_literal: true
class DropOldEmbeddingTables2 < ActiveRecord::Migration[7.2]
def up
# Copy rag embeddings created during deploy.
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
(
SELECT old_table.*
FROM ai_document_fragment_embeddings old_table
LEFT OUTER JOIN ai_document_fragments_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.rag_document_fragment_id = old_table.rag_document_fragment_id
)
WHERE target.rag_document_fragment_id IS NULL
)
SQL

execute <<~SQL
DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit;
SQL

drop_table :ai_topic_embeddings, if_exists: true
drop_table :ai_post_embeddings, if_exists: true
drop_table :ai_document_fragment_embeddings, if_exists: true
end

def down
end
end

0 comments on commit 65456c8

Please sign in to comment.