15
15
module;
16
16
17
17
#include < cassert>
18
+ #include < cstring>
18
19
#include < string>
19
20
#include < tuple>
20
21
#include < vector>
@@ -23,6 +24,11 @@ module logical_planner;
23
24
24
25
import stl;
25
26
import bind_context;
27
+ import defer_op;
28
+ import file_system;
29
+ import file_system_type;
30
+ import statement_common;
31
+ import zsv;
26
32
27
33
import infinity_exception;
28
34
import query_binder;
@@ -885,12 +891,104 @@ Status LogicalPlanner::BuildImport(const CopyStatement *statement, SharedPtr<Bin
885
891
RecoverableError (Status::FileNotFound (statement->file_path_ ));
886
892
}
887
893
894
+ UniquePtr<FileHandler> file_handler = fs.OpenFile (statement->file_path_ , FileFlags::READ_FLAG, FileLockType::kReadLock );
895
+ SizeT file_size = fs.GetFileSize (*file_handler);
896
+ DeferFn defer_fn ([&]() { fs.Close (*file_handler); });
897
+
898
+ SizeT row_count = 0 ;
899
+ switch (statement->copy_file_type_ ) {
900
+ case CopyFileType::kCSV : {
901
+ FILE *fp = fopen (statement->file_path_ .c_str (), " rb" );
902
+ if (!fp) {
903
+ UnrecoverableError (strerror (errno));
904
+ }
905
+ auto opts = MakeUnique<ZsvOpts>();
906
+ if (statement->header_ ) {
907
+ opts->row_handler = [&](void *){};
908
+ } else {
909
+ opts->row_handler = [&](void *){ ++row_count; };
910
+ }
911
+ opts->delimiter = statement->delimiter_ ;
912
+ opts->stream = fp;
913
+ opts->buffsize = (1 << 20 );
914
+
915
+ auto parser = ZsvParser (opts.get ());
916
+
917
+ ZsvStatus csv_parser_status;
918
+ while ((csv_parser_status = parser.ParseMore ()) == zsv_status_ok) {
919
+ ;
920
+ }
921
+ parser.Finish ();
922
+ fclose (fp);
923
+ break ;
924
+ }
925
+ case CopyFileType::kJSON : {
926
+ String jsonl_str (file_size + 1 , 0 );
927
+ SizeT read_n = file_handler->Read (jsonl_str.data (), file_size);
928
+ if (read_n != file_size) {
929
+ UnrecoverableError (fmt::format (" Read file size {} doesn't match with file size {}." , read_n, file_size));
930
+ }
931
+ if (read_n == 0 ) {
932
+ break ;
933
+ }
934
+ nlohmann::json json_arr;
935
+ json_arr = nlohmann::json::parse (jsonl_str);
936
+ if (!json_arr.is_array ()) {
937
+ break ;
938
+ }
939
+ row_count = json_arr.size ();
940
+ break ;
941
+ }
942
+ case CopyFileType::kJSONL : {
943
+ String jsonl_str (file_size + 1 , 0 );
944
+ SizeT read_n = file_handler->Read (jsonl_str.data (), file_size);
945
+ if (read_n != file_size) {
946
+ UnrecoverableError (fmt::format (" Read file size {} doesn't match with file size {}." , read_n, file_size));
947
+ }
948
+ if (read_n == 0 ) {
949
+ break ;
950
+ }
951
+ SizeT start_pos = 0 ;
952
+ SizeT end_pos = 0 ;
953
+ while (true ) {
954
+ if (start_pos >= read_n) {
955
+ break ;
956
+ }
957
+ end_pos = jsonl_str.find (' \n ' , start_pos);
958
+ if (end_pos == String::npos) {
959
+ end_pos = file_size;
960
+ }
961
+ start_pos = end_pos + 1 ;
962
+ ++row_count;
963
+ }
964
+ break ;
965
+ }
966
+ case CopyFileType::kFVECS : {
967
+ int dimension = 0 ;
968
+ i64 nbytes = fs.Read (*file_handler, &dimension, sizeof (dimension));
969
+ fs.Seek (*file_handler, 0 );
970
+ if (nbytes == 0 ) {
971
+ break ;
972
+ }
973
+ if (nbytes != sizeof (dimension)) {
974
+ RecoverableError (Status::ImportFileFormatError (fmt::format (" Read dimension which length isn't {}." , nbytes)));
975
+ }
976
+ SizeT row_size = dimension * sizeof (FloatT) + sizeof (dimension);
977
+ row_count = file_size / row_size;
978
+ break ;
979
+ }
980
+ case CopyFileType::kInvalid : {
981
+ UnrecoverableError (" Invalid file type" );
982
+ }
983
+ }
984
+
888
985
SharedPtr<LogicalNode> logical_import = MakeShared<LogicalImport>(bind_context_ptr->GetNewLogicalNodeId (),
889
986
table_entry,
890
987
statement->file_path_ ,
891
988
statement->header_ ,
892
989
statement->delimiter_ ,
893
- statement->copy_file_type_ );
990
+ statement->copy_file_type_ ,
991
+ row_count / DEFAULT_SEGMENT_CAPACITY + 1 );
894
992
895
993
this ->logical_plan_ = logical_import;
896
994
return Status::OK ();
0 commit comments