@@ -33,6 +33,8 @@ import file_system_type;
33
33
import defer_op;
34
34
import stl;
35
35
import logical_type;
36
+ import embedding_info;
37
+ import status;
36
38
37
39
namespace infinity {
38
40
@@ -50,6 +52,10 @@ bool PhysicalExport::Execute(QueryContext *query_context, OperatorState *operato
50
52
exported_row_count = ExportToJSONL (query_context, export_op_state);
51
53
break ;
52
54
}
55
+ case CopyFileType::kFVECS : {
56
+ exported_row_count = ExportToFVECS (query_context, export_op_state);
57
+ break ;
58
+ }
53
59
default : {
54
60
String error_message = " Not supported file type" ;
55
61
LOG_CRITICAL (error_message);
@@ -192,4 +198,70 @@ SizeT PhysicalExport::ExportToJSONL(QueryContext *query_context, ExportOperatorS
192
198
return row_count;
193
199
}
194
200
201
+ SizeT PhysicalExport::ExportToFVECS (QueryContext *query_context, ExportOperatorState *export_op_state) {
202
+
203
+ if (column_idx_array_.size () != 1 ) {
204
+ String error_message = " Only one column with embedding data type can be exported as FVECS file" ;
205
+ LOG_CRITICAL (error_message);
206
+ UnrecoverableError (error_message);
207
+ }
208
+
209
+ u64 exported_column_idx = column_idx_array_[0 ];
210
+ const Vector<SharedPtr<ColumnDef>>& column_defs = table_entry_->column_defs ();
211
+ DataType* data_type = column_defs[exported_column_idx]->type ().get ();
212
+ if (data_type->type () != LogicalType::kEmbedding ) {
213
+ String error_message = fmt::format (" Only embedding column can be exported as FVECS file, but it is {}" , data_type->ToString ());
214
+ LOG_CRITICAL (error_message);
215
+ UnrecoverableError (error_message);
216
+ }
217
+
218
+ EmbeddingInfo* embedding_type_info = static_cast <EmbeddingInfo*>(data_type->type_info ().get ());
219
+ if (embedding_type_info->Type () != EmbeddingDataType::kElemFloat ) {
220
+ Status status = Status::NotSupport (" Only float element type embedding is supported now." );
221
+ LOG_ERROR (status.message ());
222
+ RecoverableError (status);
223
+ }
224
+
225
+ i32 dimension = embedding_type_info->Dimension ();
226
+
227
+ LocalFileSystem fs;
228
+ auto [file_handler, status] = fs.OpenFile (file_path_, FileFlags::WRITE_FLAG | FileFlags::CREATE_FLAG, FileLockType::kWriteLock );
229
+ if (!status.ok ()) {
230
+ RecoverableError (status);
231
+ }
232
+ DeferFn file_defer ([&]() { fs.Close (*file_handler); });
233
+
234
+ SizeT row_count{0 };
235
+ Map<SegmentID, SegmentSnapshot>& segment_block_index_ref = block_index_->segment_block_index_ ;
236
+
237
+ // Write header
238
+ LOG_DEBUG (fmt::format (" Going to export segment count: {}" , segment_block_index_ref.size ()));
239
+ for (auto & [segment_id, segment_snapshot]: segment_block_index_ref) {
240
+ SizeT block_count = segment_snapshot.block_map_ .size ();
241
+ LOG_DEBUG (fmt::format (" Export segment_id: {}, with block count: {}" , segment_id, block_count));
242
+ for (SizeT block_idx = 0 ; block_idx < block_count; ++ block_idx) {
243
+ LOG_DEBUG (fmt::format (" Export block_idx: {}" , block_idx));
244
+ BlockEntry *block_entry = segment_snapshot.block_map_ [block_idx];
245
+ SizeT block_row_count = block_entry->row_count ();
246
+
247
+ ColumnVector exported_column_vector = block_entry->GetColumnBlockEntry (exported_column_idx)->GetColumnVector (query_context->storage ()->buffer_manager ());
248
+ if (exported_column_vector.Size () != block_row_count) {
249
+ String error_message = " Unmatched row_count between block and block_column" ;
250
+ LOG_CRITICAL (error_message);
251
+ UnrecoverableError (error_message);
252
+ }
253
+
254
+ for (SizeT row_idx = 0 ; row_idx < block_row_count; ++ row_idx) {
255
+ Value v = exported_column_vector.GetValue (row_idx);
256
+ Span<char > embedding = v.GetEmbedding ();
257
+ fs.Write (*file_handler, &dimension, sizeof (dimension));
258
+ fs.Write (*file_handler, embedding.data (), embedding.size_bytes ());
259
+ ++ row_count;
260
+ }
261
+ }
262
+ }
263
+ LOG_DEBUG (fmt::format (" Export to FVECS, db {}, table {}, file: {}, row: {}" , schema_name_, table_name_, file_path_, row_count));
264
+ return row_count;
265
+ }
266
+
195
267
} // namespace infinity
0 commit comments