From ab8b5728a8f0ee6b2986035f74ead20386b7f527 Mon Sep 17 00:00:00 2001 From: Salieri Date: Thu, 25 Apr 2024 22:51:25 +0800 Subject: [PATCH] Fix bug in csv import (#1114) ### What problem does this PR solve? Fix bug when csv file cell count mismatched with table fields. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- python/test/test_import.py | 3 +-- python/test_http_api/test_import.py | 2 +- src/executor/operator/physical_import.cpp | 15 +++++++++++++-- test/data/csv/pysdk_test_import_default.csv | 4 +++- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/python/test/test_import.py b/python/test/test_import.py index 143b83ac3f..0e76fe6008 100755 --- a/python/test/test_import.py +++ b/python/test/test_import.py @@ -406,8 +406,7 @@ def test_table_with_not_matched_columns(self, get_infinity_db, columns, check_da table_obj = db_obj.create_table("test_table_with_not_matched_columns", columns) test_csv_dir = common_values.TEST_TMP_DIR + "pysdk_test_commas.csv" - with pytest.raises(Exception, - match="ERROR:3039, Column count mismatch: CSV file row count isn't match with table schema*"): + with pytest.raises(Exception, match="ERROR:3037*"): res = table_obj.import_data(test_csv_dir) assert res.error_code == ErrorCode.OK diff --git a/python/test_http_api/test_import.py b/python/test_http_api/test_import.py index e2e6cae672..3f61067359 100644 --- a/python/test_http_api/test_import.py +++ b/python/test_http_api/test_import.py @@ -547,7 +547,7 @@ def test_http_table_with_not_matched_columns(self): "delimiter": "," }, { "status_code": 500, - "error_code": 3039, + "error_code": 3037, }) self.drop_table(db_name, table_name) diff --git a/src/executor/operator/physical_import.cpp b/src/executor/operator/physical_import.cpp index 3c854f3269..dba2e0308e 100644 --- a/src/executor/operator/physical_import.cpp +++ b/src/executor/operator/physical_import.cpp @@ -452,7 +452,7 @@ void PhysicalImport::CSVRowHandler(void *context) { UniquePtr block_entry = std::move(parser_context->block_entry_); // if column count is larger than columns defined from schema, extra columns are abandoned - if (column_count != table_entry->ColumnCount()) { + if (column_count > table_entry->ColumnCount()) { UniquePtr err_msg = MakeUnique( fmt::format("CSV file row count isn't match with table schema, row id: {}, column_count = {}, table_entry->ColumnCount = {}.", parser_context->row_count_, @@ -467,19 +467,30 @@ void PhysicalImport::CSVRowHandler(void *context) { ZsvCell cell = parser_context->parser_.GetCell(column_idx); std::string_view str_view{}; auto column_def = table_entry->GetColumnDefByID(column_idx); - auto &column_vector = parser_context->column_vectors_[column_idx]; if (cell.len) { str_view = std::string_view((char *)cell.str, cell.len); + auto &column_vector = parser_context->column_vectors_[column_idx]; column_vector.AppendByStringView(str_view, parser_context->delimiter_); } else { if (column_def->has_default_value()) { auto const_expr = dynamic_cast(column_def->default_expr_.get()); + auto &column_vector = parser_context->column_vectors_[column_idx]; column_vector.AppendByConstantExpr(const_expr); } else { RecoverableError(Status::ImportFileFormatError(fmt::format("Column {} is empty.", column_def->name_))); } } } + for (SizeT column_idx = column_count; column_idx < table_entry->ColumnCount(); ++column_idx) { + auto column_def = table_entry->GetColumnDefByID(column_idx); + auto &column_vector = parser_context->column_vectors_[column_idx]; + if (column_def->has_default_value()) { + auto const_expr = dynamic_cast(column_def->default_expr_.get()); + column_vector.AppendByConstantExpr(const_expr); + } else { + RecoverableError(Status::ImportFileFormatError(fmt::format("Column {} is empty.", column_def->name_))); + } + } block_entry->IncreaseRowCount(1); ++parser_context->row_count_; diff --git a/test/data/csv/pysdk_test_import_default.csv b/test/data/csv/pysdk_test_import_default.csv index eee271f6a4..ad123708d8 100644 --- a/test/data/csv/pysdk_test_import_default.csv +++ b/test/data/csv/pysdk_test_import_default.csv @@ -5,4 +5,6 @@ 5,6, 2,, ,3, -,,"[1.2,3.4,5.7]" \ No newline at end of file +,,"[1.2,3.4,5.7]" +10,20 +100 \ No newline at end of file