Skip to content

Commit

Permalink
Fix bug in csv import (#1114)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Fix bug when csv file cell count mismatched with table fields.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
  • Loading branch information
Ognimalf authored Apr 25, 2024
1 parent e7d7a3b commit ab8b572
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 6 deletions.
3 changes: 1 addition & 2 deletions python/test/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,7 @@ def test_table_with_not_matched_columns(self, get_infinity_db, columns, check_da
table_obj = db_obj.create_table("test_table_with_not_matched_columns", columns)

test_csv_dir = common_values.TEST_TMP_DIR + "pysdk_test_commas.csv"
with pytest.raises(Exception,
match="ERROR:3039, Column count mismatch: CSV file row count isn't match with table schema*"):
with pytest.raises(Exception, match="ERROR:3037*"):
res = table_obj.import_data(test_csv_dir)
assert res.error_code == ErrorCode.OK

Expand Down
2 changes: 1 addition & 1 deletion python/test_http_api/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def test_http_table_with_not_matched_columns(self):
"delimiter": ","
}, {
"status_code": 500,
"error_code": 3039,
"error_code": 3037,
})

self.drop_table(db_name, table_name)
Expand Down
15 changes: 13 additions & 2 deletions src/executor/operator/physical_import.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ void PhysicalImport::CSVRowHandler(void *context) {
UniquePtr<BlockEntry> block_entry = std::move(parser_context->block_entry_);

// if column count is larger than columns defined from schema, extra columns are abandoned
if (column_count != table_entry->ColumnCount()) {
if (column_count > table_entry->ColumnCount()) {
UniquePtr<String> err_msg = MakeUnique<String>(
fmt::format("CSV file row count isn't match with table schema, row id: {}, column_count = {}, table_entry->ColumnCount = {}.",
parser_context->row_count_,
Expand All @@ -467,19 +467,30 @@ void PhysicalImport::CSVRowHandler(void *context) {
ZsvCell cell = parser_context->parser_.GetCell(column_idx);
std::string_view str_view{};
auto column_def = table_entry->GetColumnDefByID(column_idx);
auto &column_vector = parser_context->column_vectors_[column_idx];
if (cell.len) {
str_view = std::string_view((char *)cell.str, cell.len);
auto &column_vector = parser_context->column_vectors_[column_idx];
column_vector.AppendByStringView(str_view, parser_context->delimiter_);
} else {
if (column_def->has_default_value()) {
auto const_expr = dynamic_cast<ConstantExpr *>(column_def->default_expr_.get());
auto &column_vector = parser_context->column_vectors_[column_idx];
column_vector.AppendByConstantExpr(const_expr);
} else {
RecoverableError(Status::ImportFileFormatError(fmt::format("Column {} is empty.", column_def->name_)));
}
}
}
for (SizeT column_idx = column_count; column_idx < table_entry->ColumnCount(); ++column_idx) {
auto column_def = table_entry->GetColumnDefByID(column_idx);
auto &column_vector = parser_context->column_vectors_[column_idx];
if (column_def->has_default_value()) {
auto const_expr = dynamic_cast<ConstantExpr *>(column_def->default_expr_.get());
column_vector.AppendByConstantExpr(const_expr);
} else {
RecoverableError(Status::ImportFileFormatError(fmt::format("Column {} is empty.", column_def->name_)));
}
}
block_entry->IncreaseRowCount(1);
++parser_context->row_count_;

Expand Down
4 changes: 3 additions & 1 deletion test/data/csv/pysdk_test_import_default.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@
5,6,
2,,
,3,
,,"[1.2,3.4,5.7]"
,,"[1.2,3.4,5.7]"
10,20
100

0 comments on commit ab8b572

Please sign in to comment.