Skip to content

Commit

Permalink
Fix parser (#1431)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Fix: float point number parser for sql.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
  • Loading branch information
small-turtle-1 authored Jul 3, 2024
1 parent c079810 commit 88a5415
Show file tree
Hide file tree
Showing 7 changed files with 525 additions and 497 deletions.
38 changes: 36 additions & 2 deletions benchmark/local_infinity/sparse/bmp_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,40 @@ int main(int argc, char *argv[]) {
SaveGroundtruth(topk, query_n, new_indices.get(), scores.get(), opt.groundtruth_save_path_);
break;
}
case ModeType::kOptimize: {
auto [file_handler, status] = fs.OpenFile(opt.index_save_path_.string(), FileFlags::READ_FLAG, FileLockType::kNoLock);
if (!status.ok()) {
UnrecoverableError(fmt::format("Failed to open file: {}", opt.index_save_path_.string()));
}
auto inner = [&](auto &index) {
BMPOptimizeOptions optimize_options{.topk_ = opt.topk_, .bp_reorder_ = opt.bp_reorder_};
std::cout << "Optimizing index...\n";
index.Optimize(optimize_options);
std::cout << "Index built\n";

auto [file_handler, status] = fs.OpenFile(opt.index_save_path_.string(), FileFlags::WRITE_FLAG, FileLockType::kNoLock);
if (!status.ok()) {
UnrecoverableError(fmt::format("Failed to open file: {}", opt.index_save_path_.string()));
}
index.Save(*file_handler);
};
switch (opt.type_) {
case BMPCompressType::kCompressed: {
BMPAlg<f32, i16, BMPCompressType::kCompressed> index = BMPAlg<f32, i16, BMPCompressType::kCompressed>::Load(*file_handler);
inner(index);
break;
}
case BMPCompressType::kRaw: {
BMPAlg<f32, i16, BMPCompressType::kRaw> index = BMPAlg<f32, i16, BMPCompressType::kRaw>::Load(*file_handler);
inner(index);
break;
}
default: {
UnrecoverableError("Unknown compress type");
}
}
break;
}
case ModeType::kImport: {
SparseMatrix<f32, i32> data_mat = DecodeSparseDataset(opt.data_path_);
profiler.Begin();
Expand Down Expand Up @@ -131,7 +165,7 @@ int main(int argc, char *argv[]) {
auto inner = [&](auto &index) {
auto [top_k, all_query_n, _1, _2] = DecodeGroundtruth(opt.groundtruth_path_, true);
if ((int)top_k != opt.topk_) {
UnrecoverableError(fmt::format("Topk mismatch: {} vs {}", top_k, opt.topk_));
std::cout << fmt::format("Topk mismatch: {} vs {}", top_k, opt.topk_) << std::endl;
}
Vector<Pair<Vector<u32>, Vector<f32>>> query_result;
{
Expand All @@ -149,7 +183,7 @@ int main(int argc, char *argv[]) {
profiler.Begin();
query_result = Search(thread_n,
query_mat,
top_k,
opt.topk_,
query_n,
[&](const SparseVecRef<f32, i32> &query, u32 topk) -> Pair<Vector<u32>, Vector<f32>> {
Vector<i16> indices(query.nnz_);
Expand Down
30 changes: 11 additions & 19 deletions benchmark/local_infinity/sparse/sparse_benchmark_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ enum class ModeType : i8 {
kImport,
kQuery,
kShuffle,
kOptimize,
};

enum class DataSetType : u8 {
Expand All @@ -193,11 +194,10 @@ struct BenchmarkOption {
BenchmarkOption() : app_("sparse benchmark") {}

void Parse(int argc, char *argv[]) {
Map<String, ModeType> mode_type_map = {
{"import", ModeType::kImport},
{"query", ModeType::kQuery},
{"shuffle", ModeType::kShuffle},
};
Map<String, ModeType> mode_type_map = {{"import", ModeType::kImport},
{"query", ModeType::kQuery},
{"shuffle", ModeType::kShuffle},
{"optimize", ModeType::kOptimize}};
Map<String, DataSetType> dataset_type_map = {
{"small", DataSetType::kSmall},
{"1M", DataSetType::k1M},
Expand Down Expand Up @@ -227,12 +227,11 @@ struct BenchmarkOption {
if (!shuffled_) {
data_path_ /= "base_small.csr";
groundtruth_path_ /= "base_small.dev.gt";
index_save_path_ /= fmt::format("small_{}.bin", index_name);
} else {
data_path_ /= "base_small_shuffled.csr";
groundtruth_path_ /= "base_small_shuffled.dev.gt";
index_save_path_ /= fmt::format("small_shuffled_{}.bin", index_name);
}
index_save_path_ /= fmt::format("small_{}.bin", index_name);
data_save_path_ /= "base_small_shuffled.csr";
groundtruth_save_path_ /= "base_small_shuffled.dev.gt";
break;
Expand All @@ -241,12 +240,11 @@ struct BenchmarkOption {
if (!shuffled_) {
data_path_ /= "base_1M.csr";
groundtruth_path_ /= "base_1M.dev.gt";
index_save_path_ /= fmt::format("1M_{}.bin", index_name);
} else {
data_path_ /= "base_1M_shuffled.csr";
groundtruth_path_ /= "base_1M_shuffled.dev.gt";
index_save_path_ /= fmt::format("1M_shuffled_{}.bin", index_name);
}
index_save_path_ /= fmt::format("1M_{}.bin", index_name);
data_save_path_ /= "base_1M_shuffled.csr";
groundtruth_save_path_ /= "base_1M_shuffled.dev.gt";
break;
Expand All @@ -255,12 +253,11 @@ struct BenchmarkOption {
if (!shuffled_) {
data_path_ /= "base_full.csr";
groundtruth_path_ /= "base_full.dev.gt";
index_save_path_ /= fmt::format("full_{}.bin", index_name);
} else {
data_path_ /= "base_full_shuffled.csr";
groundtruth_path_ /= "base_full_shuffled.dev.gt";
index_save_path_ /= fmt::format("full_shuffled_{}.bin", index_name);
}
index_save_path_ /= fmt::format("full_{}.bin", index_name);
data_save_path_ /= "base_full_shuffled.csr";
groundtruth_save_path_ /= "base_full_shuffled.dev.gt";
break;
Expand All @@ -283,6 +280,7 @@ struct BenchmarkOption {
bool shuffled_ = false;
i64 query_n_ = 0;
i32 thread_n_ = 1;
i32 test_i_ = 0;

Path data_path_;
Path query_path_;
Expand All @@ -303,7 +301,7 @@ struct LinScanOption : public BenchmarkOption {
app_.add_option("--budget_ratio", budget_ratio_, "Budget radio")->required(false)->transform(CLI::Range(0.0, 100.0));
}

String IndexName() const override { return "linscan"; }
String IndexName() const override { return fmt::format("linscan_i{}", test_i_); }

public:
bool bf_ = false;
Expand All @@ -329,13 +327,7 @@ struct BMPOption : public BenchmarkOption {
app_.add_option("--beta", beta_, "Beta")->required(false)->transform(CLI::Range(0.0, 100.0));
}

String IndexName() const override {
String name = fmt::format("bmp_block{}_type{}", block_size_, static_cast<i8>(type_));
if (bp_reorder_) {
name += "_bp";
}
return name;
}
String IndexName() const override { return fmt::format("bmp_block{}_{}_i{}", block_size_, BMPCompressTypeToString(type_), test_i_); }

public:
BMPCompressType type_ = BMPCompressType::kCompressed;
Expand Down
Loading

0 comments on commit 88a5415

Please sign in to comment.