From fe0454a77874c32d250d29cc3e07ab4e7d4f4e9e Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Tue, 14 Jan 2025 16:07:26 -0300 Subject: [PATCH 1/2] DEV: Migration to remove old embeddings tables~ --- ...250114184356_drop_old_embedding_tables2.rb | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 db/post_migrate/20250114184356_drop_old_embedding_tables2.rb diff --git a/db/post_migrate/20250114184356_drop_old_embedding_tables2.rb b/db/post_migrate/20250114184356_drop_old_embedding_tables2.rb new file mode 100644 index 000000000..6606fc6d8 --- /dev/null +++ b/db/post_migrate/20250114184356_drop_old_embedding_tables2.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true +class DropOldEmbeddingTables2 < ActiveRecord::Migration[7.2] + def up + # Copy rag embeddings created during deploy. + execute <<~SQL + INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + ( + SELECT old_table.* + FROM ai_document_fragment_embeddings old_table + LEFT OUTER JOIN ai_document_fragments_embeddings target ON ( + target.model_id = old_table.model_id AND + target.strategy_id = old_table.strategy_id AND + target.rag_document_fragment_id = old_table.rag_document_fragment_id + ) + WHERE target.rag_document_fragment_id IS NULL + ) + SQL + + execute <<~SQL + DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit; + DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit; + DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit; + DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit; + SQL + drop_table :ai_topic_embeddings + drop_table :ai_post_embeddings + drop_table :ai_document_fragment_embeddings + end + + def down + end +end From 883b632b42d061d6dad94a807a29fa7638a6ae6e Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Tue, 14 Jan 2025 17:06:23 -0300 Subject: [PATCH 2/2] Check for table existence --- ...0250114160417_backfill_topic_embeddings.rb | 36 +++++++++--------- ...20250114160446_backfill_post_embeddings.rb | 38 ++++++++++--------- .../20250114160500_backfill_rag_embeddings.rb | 18 +++++---- ...250114184356_drop_old_embedding_tables2.rb | 7 ++-- 4 files changed, 53 insertions(+), 46 deletions(-) diff --git a/db/migrate/20250114160417_backfill_topic_embeddings.rb b/db/migrate/20250114160417_backfill_topic_embeddings.rb index dcec61db4..3ec94de61 100644 --- a/db/migrate/20250114160417_backfill_topic_embeddings.rb +++ b/db/migrate/20250114160417_backfill_topic_embeddings.rb @@ -3,24 +3,26 @@ class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2] disable_ddl_transaction! def up - loop do - count = execute(<<~SQL).cmd_tuples - INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - SELECT source.* - FROM ( - SELECT old_table.* - FROM ai_topic_embeddings old_table - LEFT JOIN ai_topics_embeddings target ON ( - target.model_id = old_table.model_id AND - target.strategy_id = old_table.strategy_id AND - target.topic_id = old_table.topic_id - ) - WHERE target.topic_id IS NULL - LIMIT 10000 - ) source - SQL + if table_exists?(:ai_topic_embeddings) + loop do + count = execute(<<~SQL).cmd_tuples + INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT source.* + FROM ( + SELECT old_table.* + FROM ai_topic_embeddings old_table + LEFT JOIN ai_topics_embeddings target ON ( + target.model_id = old_table.model_id AND + target.strategy_id = old_table.strategy_id AND + target.topic_id = old_table.topic_id + ) + WHERE target.topic_id IS NULL + LIMIT 10000 + ) source + SQL - break if count == 0 + break if count == 0 + end end end diff --git a/db/migrate/20250114160446_backfill_post_embeddings.rb b/db/migrate/20250114160446_backfill_post_embeddings.rb index 0933e1977..f314f90d1 100644 --- a/db/migrate/20250114160446_backfill_post_embeddings.rb +++ b/db/migrate/20250114160446_backfill_post_embeddings.rb @@ -3,26 +3,28 @@ class BackfillPostEmbeddings < ActiveRecord::Migration[7.2] disable_ddl_transaction! def up - # Copy data from old tables to new tables in batches. + if table_exists?(:ai_post_embeddings) + # Copy data from old tables to new tables in batches. - loop do - count = execute(<<~SQL).cmd_tuples - INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - SELECT source.* - FROM ( - SELECT old_table.* - FROM ai_post_embeddings old_table - LEFT JOIN ai_posts_embeddings target ON ( - target.model_id = old_table.model_id AND - target.strategy_id = old_table.strategy_id AND - target.post_id = old_table.post_id - ) - WHERE target.post_id IS NULL - LIMIT 10000 - ) source - SQL + loop do + count = execute(<<~SQL).cmd_tuples + INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT source.* + FROM ( + SELECT old_table.* + FROM ai_post_embeddings old_table + LEFT JOIN ai_posts_embeddings target ON ( + target.model_id = old_table.model_id AND + target.strategy_id = old_table.strategy_id AND + target.post_id = old_table.post_id + ) + WHERE target.post_id IS NULL + LIMIT 10000 + ) source + SQL - break if count == 0 + break if count == 0 + end end end diff --git a/db/migrate/20250114160500_backfill_rag_embeddings.rb b/db/migrate/20250114160500_backfill_rag_embeddings.rb index 168432197..46f73e7a0 100644 --- a/db/migrate/20250114160500_backfill_rag_embeddings.rb +++ b/db/migrate/20250114160500_backfill_rag_embeddings.rb @@ -1,15 +1,17 @@ # frozen_string_literal: true class BackfillRagEmbeddings < ActiveRecord::Migration[7.2] def up - not_backfilled = - DB.query_single("SELECT COUNT(*) FROM ai_document_fragments_embeddings").first.to_i == 0 + if table_exists?(:ai_document_fragment_embeddings) + not_backfilled = + DB.query_single("SELECT COUNT(*) FROM ai_document_fragments_embeddings").first.to_i == 0 - if not_backfilled - # Copy data from old tables to new tables - execute <<~SQL - INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - SELECT * FROM ai_document_fragment_embeddings; - SQL + if not_backfilled + # Copy data from old tables to new tables + execute <<~SQL + INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT * FROM ai_document_fragment_embeddings; + SQL + end end end diff --git a/db/post_migrate/20250114184356_drop_old_embedding_tables2.rb b/db/post_migrate/20250114184356_drop_old_embedding_tables2.rb index 6606fc6d8..2f422021d 100644 --- a/db/post_migrate/20250114184356_drop_old_embedding_tables2.rb +++ b/db/post_migrate/20250114184356_drop_old_embedding_tables2.rb @@ -42,9 +42,10 @@ def up DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit; DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit; SQL - drop_table :ai_topic_embeddings - drop_table :ai_post_embeddings - drop_table :ai_document_fragment_embeddings + + drop_table :ai_topic_embeddings, if_exists: true + drop_table :ai_post_embeddings, if_exists: true + drop_table :ai_document_fragment_embeddings, if_exists: true end def down