diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 58dbd2f834..2641755157 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -6,12 +6,5 @@ include_directories("${CMAKE_SOURCE_DIR}/src") add_definitions(-march=native) add_subdirectory(common) -add_subdirectory(embedding) add_subdirectory(local_infinity) -add_subdirectory(remote_infinity) -add_subdirectory(blocking_scheduler) -add_subdirectory(polling_scheduler) -add_subdirectory(csv) -add_subdirectory(toml) -add_subdirectory(wal) -add_subdirectory(fst) \ No newline at end of file +add_subdirectory(remote_infinity) \ No newline at end of file diff --git a/benchmark/blocking_scheduler/CMakeLists.txt b/benchmark/blocking_scheduler/CMakeLists.txt deleted file mode 100644 index 1ac94e43f0..0000000000 --- a/benchmark/blocking_scheduler/CMakeLists.txt +++ /dev/null @@ -1,48 +0,0 @@ - -file(GLOB_RECURSE - scheduler_benchmark_files - CONFIGURE_DEPENDS - scheduler_benchmark.cpp -# new_scheduler.cpp -# new_scheduler.h - task.cpp - task.h - pipeline.cpp - fragment.h - fragment.cpp - ) - -add_executable(blocking_scheduler_benchmark - ${scheduler_benchmark_files} - ) - -target_include_directories(blocking_scheduler_benchmark PUBLIC "${CMAKE_SOURCE_DIR}/third_party/concurrentqueue") -target_include_directories(blocking_scheduler_benchmark PUBLIC "${CMAKE_SOURCE_DIR}/third_party/ctpl") -target_include_directories(blocking_scheduler_benchmark PUBLIC "${CMAKE_SOURCE_DIR}/src") - -target_link_libraries( - blocking_scheduler_benchmark - benchmark_profiler -) - -# add_definitions(-march=native) -# add_definitions(-msse4.2 -mfma) -# add_definitions(-mavx2 -mf16c -mpopcnt) - -# execute_process(COMMAND grep -q avx2 /proc/cpuinfo -# RESULT_VARIABLE SUPPORT_AVX2 -# OUTPUT_QUIET -# ERROR_QUIET) - -# execute_process(COMMAND grep -q avx512 /proc/cpuinfo -# RESULT_VARIABLE SUPPORT_AVX512 -# OUTPUT_QUIET -# ERROR_QUIET) - -# if (SUPPORT_AVX2 EQUAL 0 OR SUPPORT_AVX512 EQUAL 0) -# message("Compiled by AVX2 or AVX512") -# target_compile_options(blocking_scheduler_benchmark PUBLIC $<$:-march=native>) -# else() -# message("Compiled by SSE") -# target_compile_options(blocking_scheduler_benchmark PUBLIC $<$:-msse4.2 -mfma>) -# endif() \ No newline at end of file diff --git a/benchmark/blocking_scheduler/buffer.h b/benchmark/blocking_scheduler/buffer.h deleted file mode 100644 index b492fddb44..0000000000 --- a/benchmark/blocking_scheduler/buffer.h +++ /dev/null @@ -1,39 +0,0 @@ -// -// Created by jinhai on 23-5-11. -// - -#pragma once - -#include -#include - -namespace infinity { - -class Buffer { -public: - explicit - Buffer(size_t size) : size_(size) { - buffer_ = std::make_unique(size); - } - - inline void - Append(const char* str) const { - size_t len = std::strlen(str); - if(len + offset_ >= size_) { - throw; - } - memcpy(buffer_.get() + offset_, str, len); - } - - inline char* - Get() const { - return buffer_.get(); - } - -private: - std::unique_ptr buffer_{nullptr}; - size_t size_{}; - size_t offset_{}; -}; - -} diff --git a/benchmark/blocking_scheduler/buffer_queue.h b/benchmark/blocking_scheduler/buffer_queue.h deleted file mode 100644 index 578ad8eefb..0000000000 --- a/benchmark/blocking_scheduler/buffer_queue.h +++ /dev/null @@ -1,41 +0,0 @@ -// -// Created by jinhai on 23-5-9. -// - -#pragma once - -#include "blockingconcurrentqueue.h" -#include "mpsc_queue.h" -#include - -namespace infinity { - -struct ConcurrentQueue { - bool - TryEnqueue(std::shared_ptr buffer) { - return queue_.try_enqueue(std::move(buffer)); - } - - bool - TryDequeue(std::shared_ptr& buffer) { - return queue_.try_dequeue(buffer); - } - - moodycamel::ConcurrentQueue> queue_{}; -}; - -struct WaitFreeQueue { - void - TryEnqueue(std::shared_ptr buffer) { - queue_.enqueue(std::move(buffer)); - } - - bool - TryDequeue(std::shared_ptr& buffer) { - return queue_.dequeue(buffer); - } - - MPSCQueue> queue_{}; -}; - -} diff --git a/benchmark/blocking_scheduler/fragment.cpp b/benchmark/blocking_scheduler/fragment.cpp deleted file mode 100644 index 3536d8e121..0000000000 --- a/benchmark/blocking_scheduler/fragment.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Created by jinhai on 23-5-9. -// - -#include "fragment.h" - -namespace infinity { - -std::vector> -Fragment::BuildTask(uint64_t parallel_size) { - assert(parallel_size > 0); - assert(this->source_ != nullptr); - assert(this->sink_ != nullptr); - - std::vector> child_tasks; - if(this->child_ != nullptr) { - child_tasks = this->child_->BuildTask(parallel_size); - } - - std::vector> result; - - size_t task_count = 0; - if(fragment_type_ == FragmentType::kSerial) { - task_count = 1; - } else if(fragment_type_ == FragmentType::kParallel) { - task_count = parallel_size; - } - - for(size_t idx = 0; idx < task_count; ++idx) { - result.emplace_back(std::make_shared()); - PipelineTask* the_task = (PipelineTask*)(result[idx].get()); - the_task->AddSink(sink_.get()); - the_task->AddSource(source_.get(), !child_tasks.empty()); - for(auto& op: this->operators_) { - the_task->AddOperator(op.get()); - } - the_task->SetChildren(child_tasks); - } - return result; -} - -} \ No newline at end of file diff --git a/benchmark/blocking_scheduler/fragment.h b/benchmark/blocking_scheduler/fragment.h deleted file mode 100644 index bddd2fbc59..0000000000 --- a/benchmark/blocking_scheduler/fragment.h +++ /dev/null @@ -1,85 +0,0 @@ -// -// Created by jinhai on 23-5-7. -// - -#pragma once - -#include -#include -#include -#include - -#include "operator.h" -#include "task.h" - -namespace infinity { -#if 0 -class Pipeline { -public: - inline explicit - Pipeline(uint64_t fragment_id, uint64_t pipeline_id): fragment_id_(fragment_id), pipeline_id_(pipeline_id) {} - - inline void - Add(std::unique_ptr op) { - operators_.emplace_back(std::move(op)); - - // 128 bytes buffer for the operators input - buffers_.emplace_back(128); - - SizeT last_index = operators_.size() - 1; - operators_[last_index]->SetInput(&buffers_[last_index]); - if(last_index > 0) { - operators_[last_index]->SetOutput(&buffers_[last_index - 1]); - } - } - - [[nodiscard]] inline SizeT - size() const { - return operators_.size(); - } - - inline void - ConnectBuffer(Buffer* buffer) { - operators_[0]->SetInput(buffer); - } - - inline void - PrintOutput() const { - printf("Pipeline Output: %s", buffers_.back().Get()); - } - -private: - std::vector> operators_{}; - std::vector buffers_{}; - uint64_t fragment_id_{}; - uint64_t pipeline_id_{}; -}; -#endif -enum class FragmentType { kParallel, kSerial, kInvalid }; - -class Fragment { -public: - inline explicit Fragment(uint64_t id, FragmentType type) : id_(id), fragment_type_(type) {} - - std::vector> BuildTask(uint64_t parallel_size); - - inline void AddOperator(std::unique_ptr op) { operators_.emplace_back(std::move(op)); } - - inline void SetChild(std::unique_ptr child) { child_ = std::move(child); } - - inline void AddSource(std::unique_ptr op) { source_ = std::move(op); } - - inline void AddSink(std::unique_ptr op) { sink_ = std::move(op); } - - inline uint64_t id() const { return id_; } - -private: - uint64_t id_{}; - FragmentType fragment_type_{FragmentType::kInvalid}; - std::unique_ptr source_{}; - std::vector> operators_{}; - std::unique_ptr sink_{}; - std::unique_ptr child_{}; -}; - -} // namespace infinity diff --git a/benchmark/blocking_scheduler/operator.h b/benchmark/blocking_scheduler/operator.h deleted file mode 100644 index 61c848c941..0000000000 --- a/benchmark/blocking_scheduler/operator.h +++ /dev/null @@ -1,68 +0,0 @@ -// -// Created by jinhai on 23-5-8. -// - -#pragma once - -#include "buffer.h" -#include "buffer_queue.h" - -namespace infinity { - -class Operator { -public: - explicit - Operator(const std::string& name) : op_name_(std::make_unique(name)) {} - - inline void - Run(const Buffer* input_buffer, Buffer* output_buffer) { -// printf("Operator::Run(): %s\n", op_name_->c_str()); - } - -private: - std::unique_ptr op_name_; -}; - -class Sink { -public: - explicit - Sink(const std::string& name) : op_name_(std::make_unique(name)) {} - - inline void - Run(const Buffer* input_buffer, std::vector& output_buffers) { -// printf("Sink::Run(): %s\n", op_name_->c_str()); - // Read all input buffer and send to output buffer -// output_buffer_->Append(input_buffer_->Get()); - } -private: - std::unique_ptr op_name_; -}; - -enum class SourceType { - kScan, // each pipeline has its own source - kExchange, // all pipelines share one source -}; - -class Source { -public: - explicit - Source(const std::string& name, SourceType source_type) : op_name_(std::make_unique(name)), type_(source_type) {} - - inline void - Run(ConcurrentQueue* input_queue, const Buffer* input_buffer, Buffer* output_buffer) { - // Send read file request to file reader -// printf("Source::Run(): %s\n", op_name_->c_str()); - - // Or read data from the input buffer, and put it to output_buffer - } - - SourceType - type() const { - return type_; - } -private: - std::unique_ptr op_name_; - SourceType type_; -}; - -} diff --git a/benchmark/blocking_scheduler/scheduler_benchmark.cpp b/benchmark/blocking_scheduler/scheduler_benchmark.cpp deleted file mode 100644 index 99b9aa40c6..0000000000 --- a/benchmark/blocking_scheduler/scheduler_benchmark.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// -// Created by jinhai on 23-5-7. -// - -#include "task.h" -#include "fragment.h" -#include "concurrentqueue.h" -#include "base_profiler.h" -#include "ctpl.h" -#include - -using namespace infinity; - - -void -test_concurrent_queue() { - ConcurrentQueue queue; - ctpl::thread_pool p(2); - - std::unique_ptr buffer1 = std::make_unique(BUFFER_SIZE); - std::unique_ptr buffer2 = std::make_unique(BUFFER_SIZE); - std::unique_ptr buffer3 = std::make_unique(BUFFER_SIZE); - queue.TryEnqueue(std::move(buffer1)); - p.push([](int64_t cpu_id, ConcurrentQueue* queue) { - while(true) { - std::shared_ptr buffer; - if(queue->TryDequeue(buffer)) { - if(buffer == nullptr) { - printf("Terminated\n"); - break; - } else { - printf("got a buffer\n"); - } - } else { - printf("trying\n"); - } - } - }, &queue); -// sleep(1); - queue.TryEnqueue(std::move(buffer2)); - queue.TryEnqueue(std::move(buffer3)); - std::unique_ptr buffer4{nullptr}; - queue.TryEnqueue(std::move(buffer4)); - p.stop(true); -} - -void -test_waitfree_queue() { - WaitFreeQueue queue; - ctpl::thread_pool p(2); - std::shared_ptr buffer1 = std::make_shared(BUFFER_SIZE); - std::shared_ptr buffer2 = std::make_shared(BUFFER_SIZE); - std::shared_ptr buffer3 = std::make_shared(BUFFER_SIZE); - queue.TryEnqueue(buffer1); - p.push([](int64_t cpu_id, WaitFreeQueue* queue) { - while(true) { - std::shared_ptr buffer; - if(queue->TryDequeue(buffer)) { - if(buffer == nullptr) { - printf("Terminated\n"); - break; - } else { - printf("got a buffer\n"); - } - } else { - printf("trying\n"); - } - } - }, &queue); -// sleep(1); - queue.TryEnqueue(buffer2); - queue.TryEnqueue(buffer3); - std::shared_ptr buffer4{nullptr}; - queue.TryEnqueue(buffer4); - p.stop(true); -} - -static BaseProfiler profiler; -std::atomic_long long_atomic{0}; - -void -execute_task(int64_t id, Task* task, int64_t task_count) { -// printf("execute task by thread: %ld\n", id); - if(task->type() == TaskType::kPipeline) { - PipelineTask* root_task = (PipelineTask*)(task); - root_task->Init(); - - std::queue queue; - queue.push(root_task); - while(!queue.empty()) { - PipelineTask* task_node = queue.front(); - queue.pop(); - if(task_node->children().empty()) { - NewScheduler::RunTask(task_node); - continue; - } - for(const auto& child_task: task_node->children()) { - queue.push((PipelineTask*)child_task.get()); - } - } - - root_task->GetResult(); - ++long_atomic; - if(long_atomic > task_count) { - printf("time cost: %ld ms\n", profiler.Elapsed() / 1000000); - } - } -} - -void -direct_execute_task(int64_t id, Task* task, int64_t task_count) { -// printf("execute task by thread: %ld\n", id); - if(task->type() == TaskType::kPipeline) { - PipelineTask* root_task = (PipelineTask*)(task); - root_task->Init(); - - std::queue queue; - queue.push(root_task); - while(!queue.empty()) { - PipelineTask* task_node = queue.front(); - queue.pop(); - if(task_node->children().empty()) { - NewScheduler::DispatchTask(id % 16, task_node); -// NewScheduler::RunTask(task_node); - continue; - } - for(const auto& child_task: task_node->children()) { - queue.push((PipelineTask*)child_task.get()); - } - } - - root_task->GetResult(); - ++long_atomic; - if(long_atomic > task_count) { - printf("time cost: %ld ms\n", profiler.Elapsed() / 1000000); - } - } -} - -void -start_scheduler() { -// const std::unordered_set cpu_mask{1, 3, 5, 7, 9, 11, 13, 15}; -// const std::unordered_set cpu_mask{1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15}; -// const std::unordered_set cpu_mask{1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15}; -// const std::unordered_set cpu_mask{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -// const std::unordered_set cpu_mask{1, 3, 5, 7}; - const std::unordered_set cpu_mask; -// total_query_count = 16; - - int64_t cpu_count = std::thread::hardware_concurrency(); - std::unordered_set cpu_set; - for(int64_t idx = 0; idx < cpu_count; ++idx) { - if(!cpu_mask.contains(idx)) { - cpu_set.insert(idx); - } - } - - NewScheduler::Init(cpu_set); -} - -void -stop_scheduler() { - NewScheduler::Uninit(); -} - -std::unique_ptr -build_fragment0(uint64_t id, const std::string& name) { - // sink->op2->op1->scan - - std::unique_ptr fragment = std::make_unique(id, FragmentType::kParallel); - - std::unique_ptr source = std::make_unique(name, SourceType::kScan); - fragment->AddSource(std::move(source)); - - std::unique_ptr op1 = std::make_unique(name); - fragment->AddOperator(std::move(op1)); -// std::unique_ptr op2 = std::make_unique(name); -// fragment->AddOperator(std::move(op2)); - - std::unique_ptr sink = std::make_unique(name); - fragment->AddSink(std::move(sink)); - - return fragment; -} - -std::unique_ptr -build_fragment1(uint64_t id, const std::string& name) { - // sink->op2->op1->exchange - std::unique_ptr root_fragment = std::make_unique(id, FragmentType::kSerial); - - { - std::unique_ptr source = std::make_unique(name, SourceType::kExchange); - root_fragment->AddSource(std::move(source)); - - std::unique_ptr op1 = std::make_unique(name); - root_fragment->AddOperator(std::move(op1)); - std::unique_ptr op2 = std::make_unique(name); - root_fragment->AddOperator(std::move(op2)); - - std::unique_ptr sink = std::make_unique(name); - root_fragment->AddSink(std::move(sink)); - } - - std::unique_ptr child_fragment = std::make_unique(id, FragmentType::kParallel); - - { - std::unique_ptr source = std::make_unique(name, SourceType::kScan); - child_fragment->AddSource(std::move(source)); - - std::unique_ptr op1 = std::make_unique(name); - child_fragment->AddOperator(std::move(op1)); - std::unique_ptr op2 = std::make_unique(name); - child_fragment->AddOperator(std::move(op2)); - - std::unique_ptr sink = std::make_unique(name); - child_fragment->AddSink(std::move(sink)); - } - - root_fragment->SetChild(std::move(child_fragment)); - - return root_fragment; -} - -auto -main() -> int { - -// uint64_t parallel_size = std::thread::hardware_concurrency(); -// uint64_t parallel_size = 65536; - uint64_t parallel_size = 65536 * 50; - - start_scheduler(); - - ctpl::thread_pool pool(16); - - std::unique_ptr frag0 = build_fragment0(0, "test"); -// std::unique_ptr frag0 = build_fragment1(0, "test"); - std::vector> root_tasks = frag0->BuildTask(parallel_size); - std::shared_ptr source_buffer_ = std::make_unique(BUFFER_SIZE); - - profiler.Begin(); - for(const auto& task: root_tasks) { -// pool.push(execute_task, task.get(), parallel_size - 1); - pool.push(direct_execute_task, task.get(), parallel_size - 1); - - } - sleep(5); - stop_scheduler(); -} \ No newline at end of file diff --git a/benchmark/blocking_scheduler/task.cpp b/benchmark/blocking_scheduler/task.cpp deleted file mode 100644 index 13470c8345..0000000000 --- a/benchmark/blocking_scheduler/task.cpp +++ /dev/null @@ -1,172 +0,0 @@ -// -// Created by jinhai on 23-5-11. -// - - -#include "task.h" -#include "threadutil.h" -#include -#include -#include - -namespace infinity { - -std::unordered_set NewScheduler::cpu_set{}; -std::unordered_map> NewScheduler::task_queues{}; -std::unordered_map> NewScheduler::workers{}; -std::unique_ptr NewScheduler::input_queue{}; -std::unique_ptr NewScheduler::coordinator{}; -std::vector NewScheduler::cpu_array{}; -uint64_t NewScheduler::current_cpu_id{}; - -void -NewScheduler::CoordinatorLoop(int64_t cpu_id) { - std::vector input_tasks(50); -// Task* input_task{nullptr}; - bool running{true}; - printf("start coordinator on CPU: %ld\n", cpu_id); - while(running) { - size_t task_count = NewScheduler::input_queue->DequeueBulk(input_tasks.begin(), 50); -// if(task_count > 1) { -// printf("Get tasks count: %lu\n", task_count); -// } - for(size_t idx = 0; idx < task_count; ++idx) { - Task* input_task = input_tasks[idx]; - if(__builtin_expect((input_task == nullptr), false)) { - printf("coordinator: null task\n"); - continue; - } - - switch(input_task->type()) { - case TaskType::kPipeline: { -// printf("coordinator receives PIPELINE TASK on CPU: %ld\n", cpu_id); - PipelineTask* pipeline_task = (PipelineTask*)input_task; - // Construct pipeline task and schedule it. - if(__builtin_expect((pipeline_task->last_worker_id_ == -1), true)) { - // Select an available cpu - current_cpu_id = current_cpu_id % cpu_array.size(); -// printf("Dispatched to CPU: %ld\n", cpu_array[current_cpu_id]); - NewScheduler::DispatchTask(cpu_array[current_cpu_id], pipeline_task); - ++current_cpu_id; - } else { - NewScheduler::DispatchTask(pipeline_task->last_worker_id_, pipeline_task); - } - break; - } - case TaskType::kTerminate: { - printf("terminate coordinator on CPU: %ld\n", cpu_id); - running = false; - break; - } - case TaskType::kDummy: { - DummyTask* dummy_task = (DummyTask*)input_task; - if(dummy_task->last_worker_id_ == -1) { - // Select an available cpu - NewScheduler::DispatchTask(current_cpu_id % cpu_array.size(), dummy_task); - ++current_cpu_id; - } else { - NewScheduler::DispatchTask(dummy_task->last_worker_id_, dummy_task); - } - break; - } - case TaskType::kInvalid: { - printf("receive invalid type of task, terminate coordinator on CPU: %ld\n", cpu_id); - running = false; - break; - } - } - } - } -} - -void -NewScheduler::WorkerLoop(BlockingQueue* task_queue, int64_t worker_id) { - Task* task{nullptr}; - bool running{true}; - printf("start worker on CPU: %ld\n", worker_id); - while(running) { - task_queue->Dequeue(task); - if(task == nullptr) { - printf("worker %ld: null task\n", worker_id); - continue; - } - switch(task->type()) { - case TaskType::kTerminate: { - printf("terminate worker on CPU: %ld\n", worker_id); - running = false; - break; - } - case TaskType::kDummy: - case TaskType::kPipeline: { - task->Run(worker_id); - break; - } - case TaskType::kInvalid: { - printf("receive invalid type of task, terminate worker on CPU: %ld\n", worker_id); - running = false; - break; - } - } - } -} - -void -NewScheduler::Init(const std::unordered_set& input_cpu_set) { - if(!cpu_set.empty()) { - std::cerr << "scheduler was initialized before" << std::endl; - return; - } - cpu_set = input_cpu_set; - - cpu_array.reserve(cpu_set.size()); - for(int64_t cpu_id: cpu_set) { - cpu_array.emplace_back(cpu_id); - - std::unique_ptr task_queue = std::make_unique(); - std::unique_ptr task_thread = std::make_unique(WorkerLoop, task_queue.get(), cpu_id); - - // Pin the thread to specific cpu - ThreadUtil::pin(*task_thread, cpu_id); - - task_queues.emplace(cpu_id, std::move(task_queue)); - workers.emplace(cpu_id, std::move(task_thread)); - } - - // Start coordinator - input_queue = std::make_unique(); - coordinator = std::make_unique(CoordinatorLoop, 0); - ThreadUtil::pin(*coordinator, 0); -} - -int64_t -NewScheduler::GetAvailableCPU() { - assert(false); - return 0; -} - -void -NewScheduler::Uninit() { - std::unique_ptr terminate_task = std::make_unique(); - input_queue->Enqueue(terminate_task.get()); - coordinator->join(); - for(int64_t cpu_id: cpu_set) { - task_queues[cpu_id]->Enqueue(terminate_task.get()); - workers[cpu_id]->join(); - } -} - -void -NewScheduler::RunTask(Task* task) { - input_queue->Enqueue(task); -} - -//void -//NewScheduler::DispatchTask(int64_t worker_id, Task* task) { -//// if(!task_queues.contains(worker_id)) { -//// printf("Can't use this CPU: %ld\n", worker_id); -//// assert(false); -//// } -// task_queues[worker_id]->Enqueue(task); -//} - -} \ No newline at end of file diff --git a/benchmark/blocking_scheduler/task.h b/benchmark/blocking_scheduler/task.h deleted file mode 100644 index ba8611efdf..0000000000 --- a/benchmark/blocking_scheduler/task.h +++ /dev/null @@ -1,242 +0,0 @@ -// -// Created by jinhai on 23-5-7. -// - -#pragma once - - -#include "operator.h" -#include "buffer_queue.h" -#include "task_queue.h" -#include -#include -#include -#include -#include - -namespace infinity { - -struct Task; - -class NewScheduler { -public: - static void - Init(const std::unordered_set& cpu_set); - - static void - Uninit(); - - static void - RunTask(Task* task); - - inline static void - DispatchTask(long worker_id, Task* task) { - task_queues[worker_id]->Enqueue(task); - } -private: - static void - CoordinatorLoop(long cpu_id); - - static void - WorkerLoop(BlockingQueue* task_queue, long worker_id); - - static long - GetAvailableCPU(); - -private: - static std::unordered_set cpu_set; - - static std::unordered_map> task_queues; - static std::unordered_map> workers; - - static std::unique_ptr input_queue; - static std::unique_ptr coordinator; - - static std::vector cpu_array; - static uint64_t current_cpu_id; -}; - - -#define BUFFER_SIZE 128 - -enum class TaskType { - kTerminate, - kDummy, - kPipeline, - kInvalid, -}; - -struct Task { - inline explicit - Task(TaskType type) : type_(type) {} - - virtual void - Run(long worker_id) { - // Not implemented - last_worker_id_ = worker_id; - } - - [[nodiscard]] inline TaskType - type() const { - return type_; - } - - TaskType type_{TaskType::kInvalid}; - long last_worker_id_{-1}; - bool ready_{false}; -}; - -struct TerminateTask final : public Task { - inline explicit - TerminateTask() : Task(TaskType::kTerminate) { - ready_ = true; - } -}; - -struct DummyTask final : public Task { - inline explicit - DummyTask() : Task(TaskType::kDummy) { - ready_ = true; - } - - void - Run(long worker_id) override { - last_worker_id_ = worker_id; - printf("Run dummy task by worker: %ld\n", worker_id); - sleep(1); - } -}; - -struct PipelineTask final : public Task { - inline explicit - PipelineTask() : Task(TaskType::kPipeline) {} - - inline void - Init() { - if(parents_.empty()) { - root_task_ = true; - } else { - root_task_ = false; - } - } - - inline void - AddSink(Sink* sink) { - sink_ = sink; - } - - inline void - AddSource(Source* source, bool input_queue) { - source_ = source; - if(input_queue) { - input_queue_ = std::make_unique(); - } - } - - inline void - AddOperator(Operator* op) { - operators_.emplace_back(op); - buffers_.emplace_back(std::make_unique(BUFFER_SIZE)); - } - - inline void - Run(long worker_id) override { - last_worker_id_ = worker_id; -// printf("Run pipeline task by worker: %ld\n", worker_id); - - // Read data from source buffer or input queue - if(input_queue_ == nullptr) { - std::string id_str = std::to_string(worker_id); - source_buffer_ = std::make_shared(BUFFER_SIZE); - source_buffer_->Append(id_str.c_str()); -// memcpy((void*)(source_buffer_.get()), id_str.c_str(), id_str.size()); - } else { -// printf("Get data from input queue\n"); - input_queue_->TryDequeue(source_buffer_); - } - - // process the data one by one operator and push to next operator - size_t op_count = operators_.size(); - assert(op_count > 0); - operators_[0]->Run(source_buffer_.get(), buffers_[0].get()); - for(size_t idx = 1; idx < op_count; ++idx) { - operators_[idx]->Run(buffers_[idx - 1].get(), buffers_[idx].get()); - } - - // push the data into output queue - sink_->Run(buffers_.back().get(), output_queues_); - - // put the parent task into scheduler - for(Task* parent: parents_) { -// printf("Notify parent to run\n"); - NewScheduler::RunTask(parent); - } - - if(root_task_) { -// wait_flag_.notify_one(); -// printf("Notify result\n"); - std::unique_lock lck(result_lk_); - completed_ = true; - result_cv_.notify_one(); - } -// sleep(1); - } - - inline void - SetChildren(std::vector> children) { - children_ = std::move(children); - for(const std::shared_ptr& child: children_) { - PipelineTask* child_pipeline = (PipelineTask*)child.get(); - child_pipeline->AddOutputQueue(input_queue_.get()); - child_pipeline->AddParent(this); - } - } - - [[nodiscard]] inline const std::vector>& - children() const { - return children_; - } - - inline void - GetResult() { -// wait_flag_.wait(true); - std::unique_lock locker(result_lk_); - result_cv_.wait(locker, [&] { - return completed_; - }); -// printf("Get result\n"); - } - -private: - inline void - AddOutputQueue(ConcurrentQueue* queue) { - output_queues_.emplace_back(queue); - } - - inline void - AddParent(Task* parent) { - parents_.emplace_back(parent); - } -private: - Sink* sink_{}; - std::vector output_queues_; - - std::vector operators_{}; - std::vector> buffers_{}; - - Source* source_{}; - std::shared_ptr source_buffer_ = nullptr; - // Wait-free queue - std::unique_ptr input_queue_{nullptr}; - - std::vector> children_{}; - std::vector parents_{}; - - bool root_task_{false}; - bool completed_{false}; - std::mutex result_lk_; - std::condition_variable result_cv_; -// std::atomic_bool wait_flag_{false}; -}; - -} diff --git a/benchmark/blocking_scheduler/task_queue.h b/benchmark/blocking_scheduler/task_queue.h deleted file mode 100644 index f3ead38212..0000000000 --- a/benchmark/blocking_scheduler/task_queue.h +++ /dev/null @@ -1,33 +0,0 @@ -// -// Created by jinhai on 23-5-9. -// - -#pragma once - -#include "blockingconcurrentqueue.h" - -namespace infinity { - -struct Task; - -struct BlockingQueue { - void - Enqueue(Task* task) { - queue_.enqueue(task); - } - - template - size_t - DequeueBulk(It iter, size_t count) { - return queue_.wait_dequeue_bulk(iter, count); - } - - void - Dequeue(Task*& task) { - queue_.wait_dequeue(task); - } - - moodycamel::BlockingConcurrentQueue queue_; -}; - -} diff --git a/benchmark/csv/CMakeLists.txt b/benchmark/csv/CMakeLists.txt deleted file mode 100644 index ee1f5eb5a4..0000000000 --- a/benchmark/csv/CMakeLists.txt +++ /dev/null @@ -1,41 +0,0 @@ - -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/csv_config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/csv_config.h) - -add_executable(csv_benchmark - csv_benchmark.cpp -) - -target_link_libraries(csv_benchmark - zsv_parser -) - -if(ENABLE_JEMALLOC) - target_link_libraries(csv_benchmark jemalloc.a) -endif() - -target_include_directories(csv_benchmark - PUBLIC "${CMAKE_SOURCE_DIR}/third_party/zsv/include") -target_include_directories(csv_benchmark - PUBLIC "${CMAKE_SOURCE_DIR}/core") - -# add_definitions(-march=native) -# add_definitions(-msse4.2 -mfma) -# add_definitions(-mavx2 -mf16c -mpopcnt) - -# execute_process(COMMAND grep -q avx2 /proc/cpuinfo -# RESULT_VARIABLE SUPPORT_AVX2 -# OUTPUT_QUIET -# ERROR_QUIET) - -# execute_process(COMMAND grep -q avx512 /proc/cpuinfo -# RESULT_VARIABLE SUPPORT_AVX512 -# OUTPUT_QUIET -# ERROR_QUIET) - -# if (SUPPORT_AVX2 EQUAL 0 OR SUPPORT_AVX512 EQUAL 0) -# message("Compiled by AVX2 or AVX512") -# target_compile_options(csv_benchmark PUBLIC $<$:-march=native>) -# else() -# message("Compiled by SSE") -# target_compile_options(csv_benchmark PUBLIC $<$:-msse4.2 -mfma>) -# endif() \ No newline at end of file diff --git a/benchmark/csv/csv_benchmark.cpp b/benchmark/csv/csv_benchmark.cpp deleted file mode 100644 index fcd4cd5f1c..0000000000 --- a/benchmark/csv/csv_benchmark.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// -// Created by jinhai on 23-8-27. -// - -#include -#include -#include -#include - -#include "csv_config.h" - -extern "C" { -#include "zsv.h" -} - -struct my_data { - zsv_parser parser; /* used to access the parsed data */ - size_t row_num; /* used to track the current row number */ -}; - -/** - * Our row handler is passed a pointer to our structure - */ -void -my_row_handler(void* ctx) { - struct my_data* data = static_cast(ctx); - - /* get a cell count */ - size_t cell_count = zsv_cell_count(data->parser); - - /* iterate through each cell in this row, to count blanks */ - size_t nonblank = 0; - for(size_t i = 0; i < cell_count; i++) { - /* use zsv_get_cell() to get our cell data */ - struct zsv_cell c = zsv_get_cell(data->parser, i); - - printf("%s, ", c.str); - /* check if the cell data length is zero */ - if(c.len > 0) - nonblank++; - } - printf("\n"); - - /* print our results for this row */ - printf("Row %zu has %zu columns of which %zu %s non-blank\n", - ++data->row_num, - cell_count, - nonblank, - nonblank == 1 ? "is" : "are"); -} - -auto -main() -> int { - - std::cout << std::filesystem::current_path() << std::endl; - - std::string filename = std::string(CSV_DATA_PATH) + "/test/flatten.csv"; - - FILE* f = fopen(filename.c_str(), "rb"); - if(!f) { - perror(filename.c_str()); - return 1; - } - - /** - * Configure our parser options. Here, all we do is specify our row handler - * and the pointer that will be passed to our row handler. There are many - * other options to control the parser behavior (CSV vs TSV vs other - * delimited; header row span etc)-- for details, see - * ../../include/zsv/api.h - */ - struct zsv_opts opts = {}; - opts.row_handler = my_row_handler; - struct my_data data = {}; - opts.ctx = &data; - opts.stream = f; - opts.delimiter = ','; - - - /** - * Create a parser - */ - data.parser = zsv_new(&opts); - - /** - * Continuously parse our input until we have no more input - */ - enum zsv_status stat; - while((stat = zsv_parse_more(data.parser)) == zsv_status_ok) { - - break; - } - - - /** - * Clean up - */ - zsv_finish(data.parser); - zsv_delete(data.parser); - - if(f != stdin) - fclose(f); - - if(stat == zsv_status_ok) { - return 0; - } - - /** - * If there was a parse error, print it - */ - if(stat != zsv_status_no_more_input) { - fprintf(stderr, "Parse error: %s\n", zsv_parse_status_desc(stat)); - return 1; - } - - return 0; -} diff --git a/benchmark/csv/csv_config.h.in b/benchmark/csv/csv_config.h.in deleted file mode 100644 index bb9e85507d..0000000000 --- a/benchmark/csv/csv_config.h.in +++ /dev/null @@ -1,8 +0,0 @@ -// -// Created by JinHai on 2022/9/5. -// - -#pragma once - -#define TEST_DATA_PATH "@TEST_DATA_PATH@" -#define CSV_DATA_PATH "@CSV_DATA_PATH@" \ No newline at end of file diff --git a/benchmark/embedding/CMakeLists.txt b/benchmark/embedding/CMakeLists.txt deleted file mode 100644 index 785d12ddae..0000000000 --- a/benchmark/embedding/CMakeLists.txt +++ /dev/null @@ -1,60 +0,0 @@ -add_executable(hnsw_benchmark2 - hnsw_benchmark2.cpp - helper.cpp - helper.h -) -target_include_directories(hnsw_benchmark2 PUBLIC "${CMAKE_SOURCE_DIR}/src") -target_link_libraries( - hnsw_benchmark2 - infinity_core - sql_parser - benchmark_profiler -) - -add_executable(ann_ivfflat_benchmark - ann_ivfflat_benchmark.cpp - helper.cpp - helper.h -) -target_include_directories(ann_ivfflat_benchmark PUBLIC "${CMAKE_SOURCE_DIR}/third_party/mlas") -target_include_directories(ann_ivfflat_benchmark PUBLIC "${CMAKE_SOURCE_DIR}/src") -target_link_libraries( - ann_ivfflat_benchmark - infinity_core - sql_parser - zsv_parser - benchmark_profiler - onnxruntime_mlas - newpfor - fastpfor - atomic.a - lz4.a - jma -) - -if(ENABLE_JEMALLOC) - target_link_libraries(hnsw_benchmark2 jemalloc.a) - target_link_libraries(ann_ivfflat_benchmark jemalloc.a) -endif() - -# add_definitions(-march=native) -# add_definitions(-msse4.2 -mfma) -# add_definitions(-mavx2 -mf16c -mpopcnt) - -# execute_process(COMMAND grep -q avx2 /proc/cpuinfo -# RESULT_VARIABLE SUPPORT_AVX2 -# OUTPUT_QUIET -# ERROR_QUIET) - -# execute_process(COMMAND grep -q avx512 /proc/cpuinfo -# RESULT_VARIABLE SUPPORT_AVX512 -# OUTPUT_QUIET -# ERROR_QUIET) - -# if (SUPPORT_AVX2 EQUAL 0 OR SUPPORT_AVX512 EQUAL 0) -# message("Compiled by AVX2 or AVX512") -# target_compile_options(ann_ivfflat_benchmark PUBLIC $<$:-march=native>) -# else() -# message("Compiled by SSE") -# target_compile_options(ann_ivfflat_benchmark PUBLIC $<$:-msse4.2 -mfma>) -# endif() diff --git a/benchmark/embedding/ann_ivfflat_benchmark.cpp b/benchmark/embedding/ann_ivfflat_benchmark.cpp deleted file mode 100644 index 46b06946b0..0000000000 --- a/benchmark/embedding/ann_ivfflat_benchmark.cpp +++ /dev/null @@ -1,341 +0,0 @@ - - -/* - * This benchmark includes: - * 1. ann_ivfflatl2 (train, add, search, save to / load from file) - * 2. ann_ivfflatip (train, add, search, save to / load from file) - */ - -#include "base_profiler.h" -#include "helper.h" -#include -#include - -import stl; -import index_base; -import ann_ivf_flat; -import annivfflat_index_data; - -import local_file_system; -import knn_expr; - -static const char *sift1m_train = "/benchmark_dataset/sift1M/sift_learn.fvecs"; -static const char *sift1M_base = "/benchmark_dataset/sift1M/sift_base.fvecs"; -static const char *sift1M_query = "/benchmark_dataset/sift1M/sift_query.fvecs"; -static const char *sift1M_groundtruth = "/benchmark_dataset/sift1M/sift_groundtruth.ivecs"; - -static const char *deep10M_train = "/benchmark_dataset/deep10M/deep10M_base.fvecs"; -static const char *deep10M_base = "/benchmark_dataset/deep10M/deep10M_base.fvecs"; -static const char *deep10M_query = "/benchmark_dataset/deep10M/deep10M_query.fvecs"; -static const char *deep10M_groundtruth = "/benchmark_dataset/deep10M/deep10M_groundtruth.ivecs"; - -static const char *ivfflat_index_name_prefix = "/benchmark_save_index/ivfflat_index/"; -static const char *ivfflat_index_name_suffix = "/ivfflat_index.save"; - -using namespace infinity; - -template -std::unique_ptr load_data(const std::string &filename, size_t &num, size_t &dim) { - std::ifstream in(filename, std::ios::binary); - if (!in.is_open()) { - std::cout << "open file error" << std::endl; - exit(-1); - } - int dim_; - in.read((char *)&dim_, 4); - dim = (size_t)dim_; - in.seekg(0, std::ios::end); - auto ss = in.tellg(); - num = ((size_t)ss) / (dim + 1) / 4; - auto data = std::make_unique(num * dim); - - in.seekg(0, std::ios::beg); - for (size_t i = 0; i < num; i++) { - in.seekg(4, std::ios::cur); - in.read((char *)(data.get() + i * dim), dim * 4); - } - in.close(); - return data; -} - -template -void output_recall_top_k(const unsigned top_k, - std::vector> &results, - const std::vector> &groundtruth_1, - const std::vector> &groundtruth_10, - const std::vector> &groundtruth_100) { - if (top_k != 100) { - std::cerr << "top_k can only be 100" << std::endl; - exit(-1); - } - size_t gt_cnt_1 = 0, gt_cnt_10 = 0, gt_cnt_100 = 0; - for (unsigned i = 0; i < results.size(); i++) { - for (size_t cnt = 0; cnt < 100; ++cnt) { - auto id = results[i][cnt]; - if (cnt < 1 && groundtruth_1[i].contains(id)) { - ++gt_cnt_1; - } - if (cnt < 10 && groundtruth_10[i].contains(id)) { - ++gt_cnt_10; - } - if (cnt < 100 && groundtruth_100[i].contains(id)) { - ++gt_cnt_100; - } - } - } - std::cout << "R@1: " << std::fixed << std::setprecision(3) << float(gt_cnt_1) / float(results.size() * 1) << std::endl; - std::cout << "R@10: " << std::fixed << std::setprecision(3) << float(gt_cnt_10) / float(results.size() * 10) << std::endl; - std::cout << "R@100: " << std::fixed << std::setprecision(3) << float(gt_cnt_100) / float(results.size() * 100) << std::endl; -} - -int main() { - const char *homeDir = getenv("HOME"); - std::string train_file = homeDir; - std::string base_file = homeDir; - std::string query_file = homeDir; - std::string groundtruth_file = homeDir; - std::string data_type; - size_t dimension = {}; - size_t base_counts = {}; - size_t centroids = 1000; - size_t n_probes_begin = 1; - size_t n_probes_end = 10; - size_t n_probes_step = 1; - // let user choose to test sift1M or deep10M - enum test_type { invalid = 0, sift1M = 1, deep10M = 2 }; - std::cout << "please choose to test sift1M or deep10M, input 1 or 2" << std::endl; - std::cout << "1. sift1M" << std::endl; - std::cout << "2. deep10M" << std::endl; - int choose = 0; - std::cin >> choose; - switch (choose) { - case test_type::sift1M: { - data_type = "sift1M"; - dimension = 128; - base_counts = 1'000'000; - train_file += sift1m_train; - base_file += sift1M_base; - query_file += sift1M_query; - groundtruth_file += sift1M_groundtruth; - break; - } - case test_type::deep10M: { - data_type = "deep10M"; - dimension = 96; - base_counts = 10'000'000; - train_file += deep10M_train; - base_file += deep10M_base; - query_file += deep10M_query; - groundtruth_file += deep10M_groundtruth; - break; - } - default: - std::cout << "invalid input, exit." << std::endl; - return -1; - } - MetricType metric = MetricType::kInvalid; - std::string metric_str; - // let user choose metric - std::cout << "please choose metric, input 1 or 2" << std::endl; - std::cout << "1. L2" << std::endl; - std::cout << "2. IP" << std::endl; - int choose_metric = 0; - std::cin >> choose_metric; - switch (choose_metric) { - case 1: { - metric = MetricType::kMetricL2; - metric_str = "L2"; - break; - } - case 2: { - metric = MetricType::kMetricInnerProduct; - metric_str = "IP"; - break; - } - default: - std::cout << "invalid input, exit." << std::endl; - return -1; - } - // let user choose centroids, n_probes_begin, n_probes_end, n_probes_step - std::cout << "please input centroids, n_probes_begin, n_probes_end, n_probes_step. input 0 to use default values" << std::endl; - size_t choose_centroids = 0, choose_n_probes_begin = 0, choose_n_probes_end = 0, choose_n_probes_step = 0; - std::cin >> choose_centroids >> choose_n_probes_begin >> choose_n_probes_end >> choose_n_probes_step; - if (choose_centroids != 0) { - centroids = choose_centroids; - } - if (choose_n_probes_begin != 0) { - n_probes_begin = choose_n_probes_begin; - } - if (choose_n_probes_end != 0) { - n_probes_end = choose_n_probes_end; - } - if (choose_n_probes_step != 0) { - n_probes_step = choose_n_probes_step; - } - std::string build_parameter = "." + data_type + "." + metric_str + ".C." + std::to_string(centroids); - std::string ivfflat_index_name = - std::string(homeDir) + std::string(ivfflat_index_name_prefix) + data_type + std::string(ivfflat_index_name_suffix) + build_parameter; - std::cout << "data_type: " << data_type << std::endl; - std::cout << "metric: " << metric_str << std::endl; - std::cout << "dimension: " << dimension << std::endl; - std::cout << "base_counts: " << base_counts << std::endl; - std::cout << "centroids: " << centroids << std::endl; - std::cout << "n_probes_begin: " << n_probes_begin << std::endl; - std::cout << "n_probes_end: " << n_probes_end << std::endl; - std::cout << "n_probes_step: " << n_probes_step << std::endl; - std::cout << "ivfflat_index_name: " << ivfflat_index_name << std::endl; - UniquePtr> ann_index_data; - - std::ifstream f(ivfflat_index_name); - if (f.good()) { - // Found index file - std::cout << "Found index file ... " << std::endl; - ann_index_data = AnnIVFFlatIndexData::LoadIndex(ivfflat_index_name, MakeUnique()); - } else { - BaseProfiler profiler; - { - size_t embedding_count; - std::unique_ptr input_embeddings; - { - size_t dim; - BaseProfiler profiler_in; - profiler_in.Begin(); - input_embeddings = load_data(base_file, embedding_count, dim); - profiler_in.End(); - assert(dimension == dim || !"embedding dimension wrong"); - assert(base_counts == embedding_count || !"embedding count wrong"); - std::cout << "Load base data: " << profiler_in.ElapsedToString() << std::endl; - // output embedding_count, dimension - std::cout << "embedding_count: " << embedding_count << std::endl; - std::cout << "dimension: " << dim << std::endl; - } - size_t train_count; - std::unique_ptr input_train; - f32 *input_train_ptr = nullptr; - if (train_file == base_file) { - train_count = embedding_count; - input_train_ptr = input_embeddings.get(); - std::cout << "Use base data as train data" << std::endl; - } else { - size_t dim; - BaseProfiler profiler_in; - profiler_in.Begin(); - input_train = load_data(train_file, train_count, dim); - profiler_in.End(); - input_train_ptr = input_train.get(); - assert(dimension == dim || !"embedding dimension wrong"); - std::cout << "Load train data: " << profiler_in.ElapsedToString() << std::endl; - // output embedding_count, dimension - std::cout << "train_count: " << train_count << std::endl; - std::cout << "dimension: " << dim << std::endl; - } - profiler.Begin(); - switch (metric) { - case MetricType::kMetricL2: { - ann_index_data = - AnnIVFFlatL2::CreateIndex(dimension, train_count, input_train_ptr, base_counts, input_embeddings.get(), centroids); - break; - } - case MetricType::kMetricInnerProduct: { - ann_index_data = - AnnIVFFlatIP::CreateIndex(dimension, train_count, input_train_ptr, base_counts, input_embeddings.get(), centroids); - break; - } - default: - std::cout << "invalid metric" << std::endl; - return -1; - } - profiler.End(); - std::cout << "before clearing input vector, memory cost: " << get_current_rss() / (1024 * 1024) << " MiB" << std::endl; - } - std::cout << "Build index cost: " << profiler.ElapsedToString() << " memory cost: " << get_current_rss() / (1024 * 1024) << " MiB" - << std::endl; - ann_index_data->SaveIndex(ivfflat_index_name, MakeUnique()); - } - - size_t number_of_queries; - std::unique_ptr queries; - { - size_t dim; - BaseProfiler profiler; - profiler.Begin(); - queries = load_data(query_file, number_of_queries, dim); - profiler.End(); - assert(dimension == dim || !"query does not have same dimension as train set"); - std::cout << "Load query data: " << profiler.ElapsedToString() << std::endl; - } - - size_t top_k{}; // nb of results per query in the GT - std::vector> ground_truth_sets_1, ground_truth_sets_10, - ground_truth_sets_100; // number_of_queries * top_k matrix of ground-truth nearest-neighbors - { - // load ground-truth and convert int to long - size_t nq2; - BaseProfiler profiler; - profiler.Begin(); - - auto gt_int = load_data(groundtruth_file, nq2, top_k); - assert(nq2 == number_of_queries || !"incorrect nb of ground truth entries"); - - ground_truth_sets_1.resize(number_of_queries); - ground_truth_sets_10.resize(number_of_queries); - ground_truth_sets_100.resize(number_of_queries); - for (size_t i = 0; i < number_of_queries; ++i) { - for (size_t j = 0; j < top_k; ++j) { - auto gt = gt_int[i * top_k + j]; - if (j < 1) { - ground_truth_sets_1[i].insert(gt); - } - if (j < 10) { - ground_truth_sets_10[i].insert(gt); - } - if (j < 100) { - ground_truth_sets_100[i].insert(gt); - } - } - } - profiler.End(); - std::cout << "Load ground truth: " << profiler.ElapsedToString() << std::endl; - } - for (size_t n_probes = n_probes_begin; n_probes <= n_probes_end; n_probes += n_probes_step) { - BaseProfiler profiler; - std::chrono::duration t_val = std::chrono::duration::zero(); - Vector> results; - auto search_f = [&]() { - AnnIVFFlat test_ivf(queries.get(), number_of_queries, top_k, dimension, EmbeddingDataType::kElemFloat); - test_ivf.Begin(); - profiler.Begin(); - auto t0 = std::chrono::high_resolution_clock::now(); - test_ivf.Search(ann_index_data.get(), 0, n_probes); - t_val += std::chrono::duration_cast>(std::chrono::high_resolution_clock::now() - t0); - profiler.End(); - test_ivf.End(); - auto ID = test_ivf.GetIDs(); - results.resize(number_of_queries); - for (size_t i = 0; i < number_of_queries; ++i) { - results[i].resize(top_k); - for (size_t j = 0; j < top_k; ++j) { - results[i][j] = ID[i * top_k + j].segment_offset_; - } - } - }; - switch (metric) { - case MetricType::kMetricL2: { - search_f.operator()>(); - break; - } - case MetricType::kMetricInnerProduct: { - search_f.operator()>(); - break; - } - default: - std::cout << "invalid metric" << std::endl; - return -1; - } - std::cout << "\nn_probes = " << n_probes << ", Spend: " << profiler.ElapsedToString() << std::endl; - std::cout << "time = " << std::fixed << std::setprecision(3) << t_val.count() << " s" << std::endl; - std::cout << "QPS = " << std::fixed << std::setprecision(3) << f64(number_of_queries) / t_val.count() << std::endl; - output_recall_top_k(top_k, results, ground_truth_sets_1, ground_truth_sets_10, ground_truth_sets_100); - } - return 0; -} diff --git a/benchmark/embedding/helper.cpp b/benchmark/embedding/helper.cpp deleted file mode 100644 index 2bbe073604..0000000000 --- a/benchmark/embedding/helper.cpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Created by jinhai on 23-5-2. -// - -#include "helper.h" - -#include -#include - -float* -fvecs_read(const char* fname, size_t* d_out, size_t* n_out) { - FILE* f = fopen(fname, "r"); - if(!f) { - fprintf(stderr, "could not open %s\n", fname); - perror(""); - abort(); - } - int d; - fread(&d, 1, sizeof(int), f); - assert((d > 0 && d < 1000000) || !"unreasonable dimension"); - fseek(f, 0, SEEK_SET); - struct stat st; - fstat(fileno(f), &st); - size_t sz = st.st_size; - assert(sz % ((d + 1) * 4) == 0 || !"weird file size"); - size_t n = sz / ((d + 1) * 4); - - *d_out = d; - *n_out = n; - float* x = new float[n * (d + 1)]; - fread(x, sizeof(float), n * (d + 1), f); - - // shift array to remove row headers - for(size_t i = 0; i < n; i++) - memmove(x + i * d, x + 1 + i * (d + 1), d * sizeof(*x)); - - fclose(f); - return x; -} - -int* -ivecs_read(const char* fname, size_t* d_out, size_t* n_out) { - return (int*)fvecs_read(fname, d_out, n_out); -} - -size_t -get_current_rss() { - // Only for linux, copy from hnswlib - long rss = 0L; - FILE* fp = NULL; - if((fp = fopen("/proc/self/statm", "r")) == NULL) - return (size_t)0L; /* Can't open? */ - if(fscanf(fp, "%*s%ld", &rss) != 1) { // asterisk means ignore - fclose(fp); - return (size_t)0L; /* Can't read? */ - } - fclose(fp); - return (size_t)rss * (size_t)sysconf(_SC_PAGESIZE); -} diff --git a/benchmark/embedding/helper.h b/benchmark/embedding/helper.h deleted file mode 100644 index 39f4207130..0000000000 --- a/benchmark/embedding/helper.h +++ /dev/null @@ -1,22 +0,0 @@ -// -// Created by jinhai on 23-5-2. -// - -#pragma once - -#include -#include -#include -#include -#include - -// Following functions are copied from FAISS - -float* -fvecs_read(const char* fname, size_t* d_out, size_t* n_out); - -int* -ivecs_read(const char* fname, size_t* d_out, size_t* n_out); - -size_t -get_current_rss(); diff --git a/benchmark/embedding/hnsw_benchmark2.cpp b/benchmark/embedding/hnsw_benchmark2.cpp deleted file mode 100644 index ec759b99ce..0000000000 --- a/benchmark/embedding/hnsw_benchmark2.cpp +++ /dev/null @@ -1,196 +0,0 @@ -#include "base_profiler.h" -#include "helper.h" -#include -#include -#include -#include - -import stl; -import hnsw_alg; -import hnsw_common; -import local_file_system; -import file_system_type; -import file_system; -import data_store; -import vec_store_type; -import dist_func_l2; -import dist_func_ip; -import compilation_config; - -using namespace infinity; - -int main() { - // String base_file = String(test_data_path()) + "/benchmark/text2image_10m/base.fvecs"; - // String query_file = String(test_data_path()) + "/benchmark/text2image_10m/query.fvecs"; - // String groundtruth_file = String(test_data_path()) + "/benchmark/text2image_10m/groundtruth.ivecs"; - - String base_file = String(test_data_path()) + "/benchmark/sift_1m/sift_base.fvecs"; - String query_file = String(test_data_path()) + "/benchmark/sift_1m/sift_query.fvecs"; - String groundtruth_file = String(test_data_path()) + "/benchmark/sift_1m/sift_groundtruth.ivecs"; - - LocalFileSystem fs; - std::string save_dir = tmp_data_path(); - - size_t dimension = 128; - size_t M = 16; - size_t ef_construction = 200; - size_t embedding_count = 1000000; - size_t test_top = 100; - const int build_thread_n = 1; - const int query_thread_n = 1; - - using LabelT = uint64_t; - - // using Hnsw = KnnHnsw, LabelT>; - // std::string save_place = save_dir + "/my_sift_plain_l2.hnsw"; - - using Hnsw = KnnHnsw, LabelT>; - std::string save_place = save_dir + "/my_sift_lvq8_l2_1.hnsw"; - - // using Hnsw = KnnHnsw, LabelT>; - // std::string save_place = save_dir + "/my_sift_plain_ip.hnsw"; - - // using Hnsw = KnnHnsw, LabelT>; - // std::string save_place = save_dir + "/my_sift_lvq8_ip.hnsw"; - - Hnsw knn_hnsw; - std::ifstream f(save_place); - if (!f.good()) { - std::cout << "Build index" << std::endl; - - size_t dim = -1; - size_t eb_cnt = -1; - float *input_embeddings = fvecs_read(base_file.c_str(), &dim, &eb_cnt); - assert(dimension == dim || !"embedding dimension isn't correct"); - assert(embedding_count == eb_cnt || !"embedding size isn't correct"); - - knn_hnsw = Hnsw::Make(embedding_count, 1 /*chunk_n*/, dimension, M, ef_construction); - - infinity::BaseProfiler profiler; - std::cout << "Begin memory cost: " << get_current_rss() << "B" << std::endl; - profiler.Begin(); - - { - std::cout << "Build thread number: " << build_thread_n << std::endl; - - auto [start_i, end_i] = knn_hnsw.StoreDataRaw(input_embeddings, embedding_count); - delete[] input_embeddings; - Atomic next_i = start_i; - std::vector threads; - for (int i = 0; i < build_thread_n; ++i) { - threads.emplace_back([&]() { - while (true) { - VertexType cur_i = next_i.fetch_add(1); - if (cur_i >= VertexType(start_i + embedding_count)) { - break; - } - knn_hnsw.Build(cur_i); - if (cur_i && cur_i % 10000 == 0) { - std::cout << "Inserted " << cur_i << " / " << embedding_count << std::endl; - } - } - }); - } - for (auto &thread : threads) { - thread.join(); - } - } - - profiler.End(); - std::cout << "Insert data cost: " << profiler.ElapsedToString() << " memory cost: " << get_current_rss() << "B" << std::endl; - - uint8_t file_flags = FileFlags::WRITE_FLAG | FileFlags::CREATE_FLAG; - std::unique_ptr file_handler = fs.OpenFile(save_place, file_flags, FileLockType::kWriteLock); - knn_hnsw.Save(*file_handler); - file_handler->Close(); - } else { - std::cout << "Load index from " << save_place << std::endl; - - uint8_t file_flags = FileFlags::READ_FLAG; - std::unique_ptr file_handler = fs.OpenFile(save_place, file_flags, FileLockType::kReadLock); - - knn_hnsw = Hnsw::Load(*file_handler); - std::cout << "Loaded" << std::endl; - - // std::ofstream out("dump.txt"); - // knn_hnsw.Dump(out); - // knn_hnsw.Check(); - } - return 0; - - size_t number_of_queries; - const float *queries = nullptr; - { - size_t dim = -1; - queries = const_cast(fvecs_read(query_file.c_str(), &dim, &number_of_queries)); - assert(dimension == dim || !"query does not have same dimension as train set"); - } - - size_t top_k; // nb of results per query in the GT - Vector> ground_truth_sets; // number_of_queries * top_k matrix of ground-truth nearest-neighbors - - { - // load ground-truth and convert int to long - size_t nq2; - int *gt_int = ivecs_read(groundtruth_file.c_str(), &top_k, &nq2); - assert(nq2 >= number_of_queries || !"incorrect nb of ground truth entries"); - assert(top_k >= test_top || !"dataset does not provide enough ground truth data"); - - ground_truth_sets.resize(number_of_queries); - for (size_t i = 0; i < number_of_queries; ++i) { - for (size_t j = 0; j < test_top; ++j) { - ground_truth_sets[i].insert(gt_int[i * top_k + j]); - } - } - } - - infinity::BaseProfiler profiler; - std::cout << "Start!" << std::endl; - int round = 3; - std::vector>> results(number_of_queries); - std::cout << "Query thread number: " << query_thread_n << std::endl; - for (int ef = 100; ef <= 300; ef += 25) { - knn_hnsw.SetEf(ef); - int correct = 0; - int sum_time = 0; - for (int i = 0; i < round; ++i) { - std::atomic_int idx(0); - std::vector threads; - profiler.Begin(); - for (int j = 0; j < query_thread_n; ++j) { - threads.emplace_back([&]() { - while (true) { - int cur_idx = idx.fetch_add(1); - if (cur_idx >= (int)number_of_queries) { - break; - } - const float *query = queries + cur_idx * dimension; - auto result = knn_hnsw.KnnSearchSorted(query, test_top); - results[cur_idx] = std::move(result); - } - }); - } - for (auto &thread : threads) { - thread.join(); - } - profiler.End(); - if (i == 0) { - for (size_t query_idx = 0; query_idx < number_of_queries; ++query_idx) { - for (const auto &[dist, label] : results[query_idx]) { - if (ground_truth_sets[query_idx].contains(label)) { - ++correct; - } - } - } - printf("Recall = %.4f\n", correct / float(test_top * number_of_queries)); - } - sum_time += profiler.ElapsedToMs(); - } - sum_time /= round; - printf("ef = %d, Spend: %d\n", ef, sum_time); - - std::cout << "----------------------------" << std::endl; - } - - delete[] queries; -} diff --git a/benchmark/embedding/scheduler.cpp b/benchmark/embedding/scheduler.cpp deleted file mode 100644 index 726ec01c06..0000000000 --- a/benchmark/embedding/scheduler.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// -// Created by jinhai on 23-5-5. -// - -#include "scheduler.h" -#include "threadutil.h" - -#include -#include - -namespace infinity { - -void -Scheduler::ExecuteLoop(TaskQueue* task_queue, int64_t worker_id) { - Task* task{nullptr}; - bool running{true}; - while(running) { - task_queue->Dequeue(task); - if(task == nullptr) { - std::cout << "Null task" << std::endl; - continue; - } - if(task->type() == TaskType::kTerminate) { - running = false; - } else { - task->run(worker_id); - } - } -} - -void -Scheduler::Init(const std::unordered_set& cpu_set) { - if(!cpu_set_.empty()) { - std::cerr << "scheduler was initialized before" << std::endl; - return; - } - cpu_set_ = cpu_set; - - for(int64_t cpu_id: cpu_set) { - std::unique_ptr thread_ptr = std::make_unique(); - std::unique_ptr> running = std::make_unique>(true); - std::unique_ptr task_queue = std::make_unique(); - std::unique_ptr task_thread = std::make_unique(ExecuteLoop, task_queue.get(), cpu_id); - - // Pin the thread to specific cpu - ThreadUtil::pin(*task_thread, cpu_id); - - task_queues_.emplace(cpu_id, std::move(task_queue)); - task_threads_.emplace(cpu_id, std::move(task_thread)); - - std::cout << "Start worker: " << cpu_id << std::endl; - } -} - -void -Scheduler::Uninit() { - std::unique_ptr terminate_task = std::make_unique(); - for(int64_t cpu_id: cpu_set_) { - task_queues_[cpu_id]->Enqueue(terminate_task.get()); - task_threads_[cpu_id]->join(); - std::cout << "Stop worker: " << cpu_id << std::endl; - } -} - -void -Scheduler::ScheduleTask(int64_t worker_id, Task* task) { - task_queues_[worker_id]->Enqueue(task); -} - -} \ No newline at end of file diff --git a/benchmark/embedding/scheduler.h b/benchmark/embedding/scheduler.h deleted file mode 100644 index 9e50f5447e..0000000000 --- a/benchmark/embedding/scheduler.h +++ /dev/null @@ -1,97 +0,0 @@ -// -// Created by jinhai on 23-5-5. -// - -#pragma once - -#include "mpsc_queue.h" -#include "blockingconcurrentqueue.h" - -#include -#include - -#include - -namespace infinity { - -enum class TaskType { - kTerminate, - kDummy, - kAnnFlat, - kAnnIVFSQ8, - kAnnIVFFlat, - kAnnHNSW, - kInvalid, -}; - -struct Task { - inline explicit - Task(TaskType type) : type_(type) {} - - virtual void - run(int64_t worker_id) { - // Not implemented - } - - [[nodiscard]] inline TaskType - type() const { - return type_; - } - - TaskType type_{TaskType::kInvalid}; -}; - -struct TerminateTask final : public Task { - inline explicit - TerminateTask() : Task(TaskType::kTerminate) {} -}; - -struct DummyTask final : public Task { - inline explicit - DummyTask() : Task(TaskType::kDummy) {} - - void - run(int64_t worker_id) override { - printf("Run dummy task by worker: %ld\n", (long)worker_id); - sleep(1); - } -}; - -struct TaskQueue { - void - Enqueue(Task* task) { - queue_.enqueue(task); - } - - void - Dequeue(Task*& task) { - queue_.wait_dequeue(task); - } - - moodycamel::BlockingConcurrentQueue queue_; -// MPSCQueue queue_; -}; - -class Scheduler { -public: - Scheduler() = default; - - void - Init(const std::unordered_set& cpu_set); - - void - Uninit(); - - void - ScheduleTask(int64_t worker_id, Task* task); - - static void - ExecuteLoop(TaskQueue* task_queue, int64_t worker_id); - -private: - std::unordered_set cpu_set_{}; - std::unordered_map> task_queues_{}; - std::unordered_map> task_threads_{}; -}; - -} diff --git a/benchmark/fst/CMakeLists.txt b/benchmark/fst/CMakeLists.txt deleted file mode 100644 index a14cc611af..0000000000 --- a/benchmark/fst/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -# fst benchmark -add_executable(fst - fst.cpp -) -target_include_directories(fst PUBLIC "${CMAKE_SOURCE_DIR}/src") - -target_link_libraries( - fst - infinity_core - benchmark_profiler -) - -if(ENABLE_JEMALLOC) - target_link_libraries(fst jemalloc.a) -endif() - -# add_definitions(-march=native) -# add_definitions(-msse4.2 -mfma) -# add_definitions(-mavx2 -mf16c -mpopcnt) - -# execute_process(COMMAND grep -q avx2 /proc/cpuinfo -# RESULT_VARIABLE SUPPORT_AVX2 -# OUTPUT_QUIET -# ERROR_QUIET) - -# execute_process(COMMAND grep -q avx512 /proc/cpuinfo -# RESULT_VARIABLE SUPPORT_AVX512 -# OUTPUT_QUIET -# ERROR_QUIET) - -# if (SUPPORT_AVX2 EQUAL 0 OR SUPPORT_AVX512 EQUAL 0) -# message("Compiled by AVX2 or AVX512") -# target_compile_options(fst PUBLIC $<$:-march=native>) -# else() -# message("Compiled by SSE") -# target_compile_options(fst PUBLIC $<$:-msse4.2 -mfma>) -# endif() \ No newline at end of file diff --git a/benchmark/fst/fst.cpp b/benchmark/fst/fst.cpp deleted file mode 100644 index 231805108b..0000000000 --- a/benchmark/fst/fst.cpp +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright(C) 2023 InfiniFlow, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "base_profiler.h" -#include -#include -#include -#include -#include -#include -import stl; -import third_party; -import fst; - -using namespace infinity; -namespace fs = std::filesystem; - -static BaseProfiler profiler; - -int MmapFile(const String &fp, u8 *&data_ptr, SizeT &data_len) { - data_ptr = nullptr; - data_len = 0; - long len_f = fs::file_size(fp); - if (len_f == 0) - return -1; - int f = open(fp.c_str(), O_RDONLY); - void *tmpd = mmap(NULL, len_f, PROT_READ, MAP_SHARED, f, 0); - if (tmpd == MAP_FAILED) - return -1; - close(f); - int rc = madvise(tmpd, len_f, MADV_RANDOM | MADV_DONTDUMP); - if (rc < 0) - return -1; - data_ptr = (u8 *)tmpd; - data_len = len_f; - return 0; -} - -int MunmapFile(u8 *&data_ptr, SizeT &data_len) { - if (data_ptr != nullptr) { - int rc = munmap(data_ptr, data_len); - if (rc < 0) - return -1; - data_ptr = nullptr; - data_len = 0; - } - return 0; -} - -// unescape a string, refers to https://en.cppreference.com/w/cpp/language/escape -String unescape(const String &escaped) { - std::stringstream result; - for (std::size_t i = 0; i < escaped.size(); ++i) { - if (escaped[i] == '\\' && i + 1 < escaped.size()) { - switch (escaped[i + 1]) { - case '\'': - result << '\''; // single quote - break; - case '"': - result << '"'; // double quote - break; - case '?': - result << '\?'; // question mark - break; - case '\\': - result << '\\'; // backslash - break; - case 'a': - result << '\a'; // audible bell - break; - case 'b': - result << '\b'; // backspace - break; - case 'f': - result << '\f'; // form feed - new page - break; - case 'n': - result << '\n'; // line feed - new line - break; - case 'r': - result << '\r'; // carriage return - break; - case 't': - result << '\t'; // horizontal tab - break; - case 'v': - result << '\v'; // vertical tab - break; - // Add more cases for other escape sequences as needed - default: - result << escaped[i + 1]; - break; - } - ++i; // Skip the next character as it was part of an escape sequence - } else { - result << escaped[i]; - } - } - return result.str(); -} - -int main(int argc, char *argv[]) { - CLI::App app{ - "fst 0.1\nZhichang Yu \nA command line tool for building, searching and inspecting FSTs. A simplified C++ " - "reimplementing of " - "[BurntSushi/fst](https://github.com/BurntSushi/fst) library. The FST file format should be binary compatible with the original one."}; - app.require_subcommand(1); - - CLI::App *app_map = app.add_subcommand( - "map", - "Creates an ordered map backed by a finite state transducer.\nThe input to this command should be a CSV file with exactly two columns and no " - "headers. The first column should be the key and the second column should be a value that can be interpreted as an unsigned 64 bit integer. " - "The input shall be already sorted in lexicographic order by the key. Otherwise this will return an error when it sees an out-of-order key."); - bool force; - String delimiter, fp_keys, fp_fst; - app_map->add_flag("-f,--force", force, "Overwrites the output if the destination file already exists."); - app_map->add_option("-d,--delimiter", delimiter, "The delimiter used in the CSV file to separate key and value in.")->default_val(","); - app_map->add_option("keys", fp_keys, "A file containing a key per line.")->required(); - app_map->add_option("fst", fp_fst, "The destination file path to write the FST.")->required(); - - CLI::App *app_range = - app.add_subcommand("range", - "Issues a range query against the given transducer.\nA range query returns all search results within a particular " - "range.\nIf neither the start or the end of the range is specified, then all entries in the transducer are shown."); - bool show_outputs; - String start, end; - app_range->add_flag("-o,--outputs", show_outputs, "When set, output values are shown as CSV data."); - app_range->add_option("-e,--end", end, "Only show results less than or equal to this."); - app_range->add_option("-s,--start", start, "Only show results greater than or equal to this."); - app_range->add_option("fst", fp_fst, "The FST to run a range query against.")->required(); - - CLI::App *app_verify = app.add_subcommand("verify", - "Performs verification on the FST to check its integrity. This works by computing a checksum of the " - "FST's underlying data and comparing it to an expected checksum. If the checksums do not match, then " - "it's likely that the FST is corrupt in some fashion and must be re-generated.\n"); - app_verify->add_option("fst", fp_fst, "The FST to verify.")->required(); - - CLI::App *app_bench = app.add_subcommand("benchmark", "Query benchmark with given keys against the given transducer."); - app_bench->add_option("-d,--delimiter", delimiter, "The delimiter used in the CSV file to separate key and value in.")->default_val(","); - app_bench->add_option("keys", fp_keys, "A file containing a key per line.")->required(); - app_bench->add_option("fst", fp_fst, "The FST to run query benchmark against.")->required(); - - try { - app.parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app.exit(e); - } - if (app_map->parsed()) { - profiler.Begin(); - std::filesystem::path output_path(fp_fst); - output_path = std::filesystem::absolute(output_path); - std::filesystem::path output_dir = output_path.parent_path(); - std::filesystem::create_directories(output_dir); - if (std::filesystem::exists(output_path) && !force) { - return app.exit(CLI::FileError("Output file already exists. Use --force to overwrite.")); - } - std::ofstream ofs(output_path.c_str(), std::ios::binary | std::ios::trunc); - if (!ofs.is_open()) { - return app.exit(CLI::FileError("Failed to open output file: " + output_path.string())); - } - OstreamWriter wtr(ofs); - FstBuilder builder(wtr); - - std::ifstream ifs(fp_keys); - String line; - delimiter = unescape(delimiter); - SizeT del_len = delimiter.length(); - while (std::getline(ifs, line)) { - auto pos = line.find(delimiter); - if (pos == String::npos) { - return app.exit( - CLI::FileError("Delimiter [" + delimiter + "]" + FormatBytes((u8 *)delimiter.c_str(), del_len) + " not found in line: " + line)); - } - String key = line.substr(0, pos); - String value = line.substr(pos + del_len); - builder.Insert((u8 *)key.c_str(), key.length(), std::stoull(value)); - } - builder.Finish(); - ofs.close(); - profiler.End(); - std::cout << "FST written to " << output_path << std::endl; - std::cout << "time cost: " << profiler.Elapsed() / 1000000 << " ms" << std::endl; - } else if (app_range->parsed()) { - profiler.Begin(); - u8 *data_ptr = nullptr; - SizeT data_len = 0; - if (MmapFile(fp_fst, data_ptr, data_len) < 0) { - return app.exit(CLI::FileError("Failed to mmap file: " + fp_fst)); - } - Fst f(data_ptr, data_len); - Bound min, max; - if (!start.empty()) { - min = Bound(Bound::kIncluded, (u8 *)start.c_str(), start.length()); - } - if (!end.empty()) { - max = Bound(Bound::kIncluded, (u8 *)end.c_str(), end.length()); - } - FstStream it(f, min, max); - Vector key; - u64 val; - while (it.Next(key, val)) { - String str_key(key.data(), key.data() + key.size()); - if (show_outputs) { - std::cout << str_key << "," << val << std::endl; - } else { - std::cout << str_key << std::endl; - } - } - MunmapFile(data_ptr, data_len); - profiler.End(); - std::cout << "time cost: " << profiler.Elapsed() / 1000000 << " ms" << std::endl; - } else if (app_verify->parsed()) { - profiler.Begin(); - u8 *data_ptr = nullptr; - SizeT data_len = 0; - if (MmapFile(fp_fst, data_ptr, data_len) < 0) { - return app.exit(CLI::FileError("Failed to mmap file: " + fp_fst)); - } - Fst f(data_ptr, data_len); - f.Verify(); - MunmapFile(data_ptr, data_len); - std::cout << "FST checksum is correct: " << fp_fst << std::endl; - profiler.End(); - std::cout << "time cost: " << profiler.Elapsed() / 1000000 << " ms" << std::endl; - } else if (app_bench->parsed()) { - Vector> queries; - std::ifstream ifs(fp_keys); - String line; - delimiter = unescape(delimiter); - SizeT del_len = delimiter.length(); - while (std::getline(ifs, line)) { - auto pos = line.find(delimiter); - if (pos == String::npos) { - return app.exit( - CLI::FileError("Delimiter [" + delimiter + "]" + FormatBytes((u8 *)delimiter.c_str(), del_len) + " not found in line: " + line)); - } - String key = line.substr(0, pos); - String value = line.substr(pos + del_len); - queries.emplace_back(key, std::stoull(value)); - } - - profiler.Begin(); - u8 *data_ptr = nullptr; - SizeT data_len = 0; - if (MmapFile(fp_fst, data_ptr, data_len) < 0) { - return app.exit(CLI::FileError("Failed to mmap file: " + fp_fst)); - } - Fst f(data_ptr, data_len); - SizeT cnt = 0; - bool running = true; - while (running) { - u64 res; - for (auto &[key, val] : queries) { - bool found = f.Get((u8 *)key.c_str(), key.length(), res); - if (found) { - if (res != val) { - // -1 means expecting missing - String exp = val == SizeT(-1) ? "missing" : std::to_string(val); - return app.exit(CLI::FileError("Value of key " + key + " mismatch, expect " + exp + ", got " + std::to_string(res))); - } - } else { - if (val != SizeT(-1)) { - return app.exit(CLI::FileError("Value of key " + key + " mismatch, expect " + std::to_string(val) + ", got missing")); - } - } - cnt++; - if ((cnt & 0xFFFF) == 0) { - if (profiler.Elapsed() >= 60LL * 1000000000LL) { - // ensure benchmark last at least 60 seconds - running = false; - break; - } - } - } - } - MunmapFile(data_ptr, data_len); - profiler.End(); - std::cout << "query count: " << cnt << std::endl; - std::cout << "time cost: " << profiler.Elapsed() / 1000000 << " ms" << std::endl; - std::cout << "qps: " << cnt / (profiler.Elapsed() / 1000000000) << std::endl; - } else { - return app.exit(CLI::RequiresError("fst", "a subcommand of [map, range, verify]")); - } - - return 0; -} \ No newline at end of file diff --git a/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp b/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp index 4f102f724c..be95f0f72f 100644 --- a/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp +++ b/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp @@ -83,19 +83,19 @@ SharedPtr CreateDbAndTable(const String &db_name, const String &table_ { String col1_name = "id"; auto col1_type = std::make_shared(LogicalType::kVarchar); - auto col1_def = new ColumnDef(0, col1_type, std::move(col1_name), std::unordered_set()); + auto col1_def = new ColumnDef(0, col1_type, std::move(col1_name), std::set()); column_defs.push_back(col1_def); } { String col2_name = "title"; auto col2_type = std::make_shared(LogicalType::kVarchar); - auto col2_def = new ColumnDef(0, col2_type, std::move(col2_name), std::unordered_set()); + auto col2_def = new ColumnDef(0, col2_type, std::move(col2_name), std::set()); column_defs.push_back(col2_def); } { String col3_name = "text"; auto col3_type = std::make_shared(LogicalType::kVarchar); - auto col3_def = new ColumnDef(0, col3_type, std::move(col3_name), std::unordered_set()); + auto col3_def = new ColumnDef(0, col3_type, std::move(col3_name), std::set()); column_defs.push_back(col3_def); } diff --git a/benchmark/local_infinity/infinity_benchmark.cpp b/benchmark/local_infinity/infinity_benchmark.cpp index 18f15f009c..2055675b0e 100644 --- a/benchmark/local_infinity/infinity_benchmark.cpp +++ b/benchmark/local_infinity/infinity_benchmark.cpp @@ -136,12 +136,12 @@ int main() { SharedPtr col_type = MakeShared(LogicalType::kInteger); String col_name_1 = "col1"; - auto col_def_1 = new ColumnDef(0, col_type, col_name_1, HashSet()); + auto col_def_1 = new ColumnDef(0, col_type, col_name_1, std::set()); column_defs.emplace_back(col_def_1); col_type = MakeShared(LogicalType::kInteger); String col_name_2 = "col2"; - auto col_def_2 = new ColumnDef(1, col_type, col_name_2, HashSet()); + auto col_def_2 = new ColumnDef(1, col_type, col_name_2, std::set()); column_defs.emplace_back(col_def_2); { // Init Table @@ -183,12 +183,12 @@ int main() { SharedPtr col_type = MakeShared(LogicalType::kInteger); String col_name_1 = "col1"; - auto col_def_1 = new ColumnDef(0, col_type, col_name_1, HashSet()); + auto col_def_1 = new ColumnDef(0, col_type, col_name_1, std::set()); column_definitions.emplace_back(col_def_1); col_type = MakeShared(LogicalType::kInteger); String col_name_2 = "col2"; - auto col_def_2 = new ColumnDef(1, col_type, col_name_2, HashSet()); + auto col_def_2 = new ColumnDef(1, col_type, col_name_2, std::set()); column_definitions.emplace_back(col_def_2); // auto [database, status] = infinity->GetDatabase("default_db"); @@ -297,10 +297,10 @@ int main() { Vector column_definitions; column_definitions.reserve(column_count); - auto col_def_1 = new ColumnDef(0, col_type, col_name_1, HashSet()); + auto col_def_1 = new ColumnDef(0, col_type, col_name_1, std::set()); column_definitions.emplace_back(col_def_1); - auto col_def_2 = new ColumnDef(1, col_type, col_name_2, HashSet()); + auto col_def_2 = new ColumnDef(1, col_type, col_name_2, std::set()); column_definitions.emplace_back(col_def_2); SharedPtr infinity = Infinity::LocalConnect(); @@ -356,7 +356,7 @@ int main() { std::shared_ptr col1_type = std::make_shared(LogicalType::kEmbedding, std::make_shared(EmbeddingDataType::kElemFloat, 128)); std::string col1_name = "col1"; - auto col1_def = std::make_unique(0, col1_type, col1_name, std::unordered_set()); + auto col1_def = std::make_unique(0, col1_type, col1_name, std::set()); column_defs.emplace_back(col1_def.release()); std::string db_name = "default_db"; diff --git a/benchmark/local_infinity/knn/knn_import_benchmark.cpp b/benchmark/local_infinity/knn/knn_import_benchmark.cpp index fd9c45c541..e695628ca2 100644 --- a/benchmark/local_infinity/knn/knn_import_benchmark.cpp +++ b/benchmark/local_infinity/knn/knn_import_benchmark.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include import compilation_config; import internal_types; @@ -96,7 +96,7 @@ int main(int argc, char *argv[]) { std::cout << "Import from: " << base_path << std::endl; std::string col1_name = "col1"; - auto col1_def = std::make_unique(0, col1_type, col1_name, std::unordered_set()); + auto col1_def = std::make_unique(0, col1_type, col1_name, std::set()); column_defs.emplace_back(col1_def.release()); std::string db_name = "default_db"; diff --git a/benchmark/toml/CMakeLists.txt b/benchmark/toml/CMakeLists.txt deleted file mode 100644 index fc5ec7b9ac..0000000000 --- a/benchmark/toml/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ - -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/toml_config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/toml_config.h) - -add_executable(toml_benchmark - toml_benchmark.cpp -) - -#target_link_libraries(csv_benchmark -# zsv_parser -#) - -target_include_directories(toml_benchmark - PUBLIC "${CMAKE_SOURCE_DIR}/third_party/tomlplusplus/") -target_include_directories(toml_benchmark - PUBLIC "${CMAKE_SOURCE_DIR}/src") \ No newline at end of file diff --git a/benchmark/toml/toml_benchmark.cpp b/benchmark/toml/toml_benchmark.cpp deleted file mode 100644 index de9287df90..0000000000 --- a/benchmark/toml/toml_benchmark.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Created by jinhai on 23-8-27. -// - -#include -#include -#include -#include "toml.hpp" -#include "toml_config.h" - -auto -main() -> int { - - std::cout << std::filesystem::current_path() << std::endl; - - std::string filename = std::string(TEST_DATA_PATH) + "/config/infinity_conf.toml"; - - auto config = toml::parse_file(filename); - - auto general_config = config["general"]; - std::cout << general_config["version"].value_or("") << std::endl; - std::cout << general_config["timezone"].value_or("") << std::endl; - std::cout << general_config["pg_port"] << std::endl; - std::cout << general_config["http_port"] << std::endl; - std::cout << general_config["grpc_port"] << std::endl; - - auto log_config = config["log"]; - std::cout << log_config["log_dir"].value_or("") << std::endl; - std::cout << log_config["log_to_stdout"] << std::endl; - std::cout << log_config["max_log_file_size"].value_or("") << std::endl; - std::cout << log_config["log_level"].value_or("") << std::endl; - - - auto storage_config = config["storage"]; - std::cout << storage_config["data_dir"].value_or("") << std::endl; - std::cout << storage_config["wal_dir"].value_or("") << std::endl; - - auto buffer_config = config["buffer"]; - std::cout << buffer_config["buffer_pool_size"].value_or("") << std::endl; - std::cout << buffer_config["temp_dir"].value_or("") << std::endl; - return 0; -} diff --git a/benchmark/toml/toml_config.h.in b/benchmark/toml/toml_config.h.in deleted file mode 100644 index bb9e85507d..0000000000 --- a/benchmark/toml/toml_config.h.in +++ /dev/null @@ -1,8 +0,0 @@ -// -// Created by JinHai on 2022/9/5. -// - -#pragma once - -#define TEST_DATA_PATH "@TEST_DATA_PATH@" -#define CSV_DATA_PATH "@CSV_DATA_PATH@" \ No newline at end of file diff --git a/benchmark/wal/CMakeLists.txt b/benchmark/wal/CMakeLists.txt deleted file mode 100644 index 9413267bf4..0000000000 --- a/benchmark/wal/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ - -add_executable(asio_wal - asio_wal.cpp -) - -target_link_libraries(asio_wal - uring.a - spdlog -) - -target_include_directories(asio_wal - PUBLIC "${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/third_party/third_party/spdlog/include") - -target_compile_options(asio_wal PUBLIC -DBOOST_ASIO_HAS_FILE -DBOOST_ASIO_HAS_IO_URING) diff --git a/benchmark/wal/asio_wal.cpp b/benchmark/wal/asio_wal.cpp deleted file mode 100644 index 8911c59d5a..0000000000 --- a/benchmark/wal/asio_wal.cpp +++ /dev/null @@ -1,441 +0,0 @@ -// POC of asio usage on file IO and timer - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "crc.hpp" - -using namespace std; - -std::shared_ptr logger; - -void -initializeLogger() { - spdlog::sink_ptr stdoutSink = - std::make_shared(); - logger = std::make_shared("console", stdoutSink); - spdlog::register_logger(logger); -} - -enum class WALType : uint8_t { - INVALID = 0, - // ----------------------------- - // Data - // ----------------------------- - INSERT_TUPLE = 26, - DELETE_TUPLE = 27, - UPDATE_TUPLE = 28, - // ----------------------------- - // Flush - // ----------------------------- - CHECKPOINT = 99, - WAL_FLUSH = 100 -}; - -struct WalEntry { - int64_t lsn; // each entry's lsn(Log Sequence Number) is strictly - // increasing by one. - int32_t size; // size of payload, excluding the header, round to multi - // of 4. There's 4 bytes pad just after the payload storing - // the same value to assist backward iterating. - uint32_t - checksum; // crc32 of the entry, including the header and the payload. - // User shall populate it before writing to wal. - WALType type; - int32_t pad: 24; - union { - int64_t txn_id; - int64_t max_lsn; - }; - - virtual ~WalEntry() = default; - virtual int32_t - GetSize() = 0; // size of the entry, excluding the 4 bytes pad. -}; - -struct WALInsertTuple : WalEntry { - int16_t columnId; - int64_t row_start; - int64_t len; - char data[]; - virtual int32_t - GetSize() { return sizeof(WALInsertTuple) + size; } -}; - -struct WALDeleteTuple : WalEntry { - int16_t columnId; - int64_t row_start; - int64_t count; - virtual int32_t - GetSize() { return sizeof(WALDeleteTuple); } -}; - -struct WALUpdateTuple : WalEntry { - int16_t columnId; - int64_t row_start; - int64_t len; - char data[]; - virtual int32_t - GetSize() { return sizeof(WALUpdateTuple) + size; } -}; - -struct CheckpointEntry : WalEntry { - virtual int32_t - GetSize() { return sizeof(CheckpointEntry); } -}; - -class Txn { -public: - int64_t txn_id; - int64_t begin_ts; - int64_t commit_ts; - int64_t lsn; - -public: - void - Commit() { - commit_ts = std::chrono::steady_clock::now().time_since_epoch().count(); - }; -}; - -class SeqGenerator { -public: - // Begin with 1 to avoid distinguish uninitialized value and the minimal - // valid value. - SeqGenerator(int64_t begin = 1) : next_seq_(begin) {} - int64_t - Generate() { return next_seq_.fetch_add(1); } - int64_t - GetLast() { return next_seq_.load() - 1; } - -private: - atomic next_seq_; -}; - -class TxnManager { -public: - static TxnManager* - GetInstance() { - if(instance_ == nullptr) { - instance_ = new TxnManager(); - } - return instance_; - } - void - AddActiveTxn(std::shared_ptr txn) { - std::lock_guard guard(mutex_); - active_trans_[txn->txn_id] = txn; - } - int64_t - AllocTxnId() { return txn_id_gen_.Generate(); } - void - CommitTxn(int64_t txn_id, int64_t lsn) { - std::shared_ptr txn = nullptr; - { - std::lock_guard guard(mutex_); - auto ent = active_trans_.find(txn_id); - if(ent != active_trans_.end()) { - txn = ent->second; - } - } - active_trans_.erase(txn_id); - txn->lsn = lsn; - txn->Commit(); - logger->info("TxnManager::Commit done for transaction {}", txn_id); - } - -private: - static TxnManager* instance_; - std::mutex mutex_; - std::map> active_trans_; - SeqGenerator txn_id_gen_; -}; - -class WalManager { -public: - WalManager(boost::asio::io_context& ioc, const std::string& wal_path) - : running_(true), - seq_(0), - ioc_(ioc), - stream_file_( - boost::asio::stream_file(ioc, wal_path, - boost::asio::stream_file::read_write | - boost::asio::stream_file::append | - boost::asio::stream_file::create)) { - // schedule an immediate Flush() - ioc.post([this] { Flush(); }); - } - - void - Stop() { running_.store(false); } - - // Session request to persist an entry. Assuming txn_id of the entry has - // been initialized. - void - WriteEntry(std::shared_ptr entry) { - if(running_.load()) { - mutex_.lock(); - que_.push(entry); - mutex_.unlock(); - } - } - - // Flush is scheduled regularly. It collects a batch of transactions, sync - // wal and do parallel committing. Each sync cost ~1s. Each checkpoint cost - // ~10s. So it's necessary to sync for a batch of transactions, and to - // checkpoint for a batch of sync. - void - Flush() { - int seq = seq_.fetch_add(1); - logger->info("WalManager::Flush {} enter", seq); - boost::system::error_code ec; - int written = 0; - int32_t size_pad = 0; - int64_t max_lsn = lsn_gen_.GetLast(); - if(!running_.load()) { - goto QUIT; - } - mutex_.lock(); - que_.swap(que2_); - mutex_.unlock(); - while(!que2_.empty()) { - std::shared_ptr entry = que2_.front(); - size_pad = entry->GetSize(); - entry->lsn = lsn_gen_.Generate(); - max_lsn = entry->lsn; - logger->info( - "WalManager::Flush {} begin writing wal for transaction {}", - seq, entry->txn_id); - boost::asio::write( - stream_file_, - boost::asio::buffer(entry.get(), entry->GetSize()), ec); - boost::asio::write(stream_file_, boost::asio::buffer(&size_pad, 4), - ec); - if(ec.failed()) { - logger->error( - "WalManager::Flush {} failed to write wal for transaction " - "{}, async_write error: {}", - seq, entry->txn_id, ec.to_string()); - } else { - logger->info( - "WalManager::Flush {} done writing wal for transaction {}", - seq, entry->txn_id); - } - que2_.pop(); - que3_.push(entry); - written++; - } - if(written > 0) { - logger->info( - "WalManager::Flush {} begin syncing wal for {} transactions", - seq, written); - stream_file_.sync_all(); - logger->info( - "WalManager::Flush {} done syncing wal for {} transactions", - seq, written); - while(!que3_.empty()) { - std::shared_ptr entry = que3_.front(); - // Commit sequently so they get visible in the same order with - // wal. - TxnManager::GetInstance()->CommitTxn(entry->txn_id, entry->lsn); - que3_.pop(); - } - pending_checkpoint_ += written; - } - // Fuzzy checkpoint for every 10 transactions or 20s. - if(pending_checkpoint_ >= 10 || - (checkpoint_ts_ > 0 && - std::chrono::steady_clock::now().time_since_epoch().count() - - checkpoint_ts_ >= - 20000000000)) { - ioc_.post([this, max_lsn] { Checkpoint(max_lsn); }); - pending_checkpoint_ = 0; - } -QUIT: - // Need to ensure all above async tasks be done at this point. However - // it's false. asio requires to wrap the remained code into a function, - // and call it from the callback. - if(written == 0) { - // async_wait is an initiating function for an @ref - // asynchronous_operation, and always returns immediately. Wait - // asynchronously for the sleep duration since no work to do - logger->info("WalManager::Flush {} schedule a delayed run", seq); - auto timer = std::make_shared(ioc_); - timer->expires_from_now(std::chrono::milliseconds(1000)); - timer->async_wait([this, timer, seq](boost::system::error_code ec) { - if(ec == boost::asio::error::operation_aborted) { - logger->error( - "WalManager::Flush {} async_wait has been cancelled.", - seq); - } else if(ec.failed()) { - logger->error( - "WalManager::Flush {} failed to async_wait. error: {}", - seq, ec.to_string()); - } else { - Flush(); - } - }); - } else { - logger->info("WalManager::Flush {} schedule an immediate run", seq); - ioc_.post([this] { Flush(); }); - } - logger->info("WalManager::Flush {} quit", seq); - } - - // Do checkpoint for transactions which's lsn no larger than the given one. - void - Checkpoint(int64_t lsn) { - // Ensure that at most one instance of a particular async task is - // running at any given time. - if(!checkpoint_mutex_.try_lock()) { - return; - } - // Checkponit is heavy and infrequent operation. - logger->info("WalManager::Checkpoint enter for transactions' lsn <= {}", - lsn); - std::this_thread::sleep_for(std::chrono::seconds(10)); - logger->info("WalManager::Checkpoint quit", lsn); - checkpoint_ts_ = - std::chrono::steady_clock::now().time_since_epoch().count(); - checkpoint_mutex_.unlock(); - } - -private: - // Concurrent writing WAL is disallowed. So put all WAL writing into a queue - // and do serial writing. - atomic running_; - atomic seq_; - boost::asio::io_context& ioc_; - mutex mutex_; - queue> que_, que2_, que3_; - boost::asio::stream_file stream_file_; - SeqGenerator lsn_gen_; - int pending_checkpoint_; - int64_t checkpoint_ts_; - mutex checkpoint_mutex_; -}; - -TxnManager* TxnManager::instance_ = nullptr; -std::shared_ptr wal_manager = nullptr; - -class Session : public std::enable_shared_from_this { -public: - Session(size_t seq) : seq_(seq) {} - - void - Start() { - bool expected = false; - bool changed = running_.compare_exchange_strong(expected, true); - if(!changed) - return; - std::unique_ptr th( - new std::thread([this] { Generating(); })); - t_ = std::move(th); - } - - void - Stop() { - bool expected = true; - bool changed = running_.compare_exchange_strong(expected, false); - if(!changed) - return; - t_->join(); - } - - void - Generating() { - while(running_) { - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - if(wal_manager != nullptr) { - auto txn = std::make_shared(); - txn->txn_id = TxnManager::GetInstance()->AllocTxnId(); - txn->begin_ts = - std::chrono::steady_clock::now().time_since_epoch().count(); - txn->commit_ts = 0; - TxnManager::GetInstance()->AddActiveTxn(txn); - logger->info("session {} generated transaction {}", seq_, - txn->txn_id); - auto ent = std::make_shared(); - ent->type = WALType::INSERT_TUPLE; - ent->txn_id = txn->txn_id; - ent->checksum = 0; - // CRC32IEEE is equivalent to boost::crc_32_type on - // little-endian machine. - ent->checksum = infinity::CRC32IEEE::makeCRC( - reinterpret_cast(ent.get()), - ent->GetSize()); - wal_manager->WriteEntry(ent); - } - } - } - -private: - size_t seq_; - std::unique_ptr t_; - atomic running_; -}; - -int -main(int argc, char* argv[]) { - if(argc != 4) { - std::cout << "Usage: " << argv[0] - << " " << std::endl; - return 1; - } - - std::size_t sessions = std::atoi(argv[1]); - std::size_t ioc_threads = std::atoi(argv[2]); - std::string wal_path(argv[3]); - - initializeLogger(); - - boost::asio::io_context ioc; - wal_manager = std::make_shared(ioc, wal_path); - - std::vector> m_threads; - // All threads from the pool will be used to call the corresponding - // asynchronous operation completion callbacks. - for (int i = 0; i < (int)ioc_threads; i++) { - std::unique_ptr th(new std::thread([&ioc]() { - while(!ioc.stopped()) { - // Process asynchronous operations till all are completed. - ioc.run(); - // Sleep for a short duration to avoid busy waiting. - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - } - })); - m_threads.push_back(std::move(th)); - } - - std::vector sesses; - for (int i = 0; i != (int)sessions; i++) { - Session* sess = new Session(i); - sesses.emplace_back(sess); - sess->Start(); - } - - std::this_thread::sleep_for(std::chrono::seconds(30)); - - // Stop generating transactions. This allows the I/O threads to exit the - // event loop when there are no more pending asynchronous operations. - for (int i = 0; i != (int)sessions; ++i) { - sesses[i]->Stop(); - delete sesses[i]; - } - // Waiting for the I/O threads to exit. - for (int i = 0; i < (int)ioc_threads; i++) { - m_threads[i]->join(); - } - - return 0; -} diff --git a/src/common/stl.cppm b/src/common/stl.cppm index a732c804a2..2beb553f07 100644 --- a/src/common/stl.cppm +++ b/src/common/stl.cppm @@ -27,7 +27,7 @@ module; #include #include #include -#include +#include #include #include #include @@ -55,8 +55,9 @@ export module stl; export namespace std { - using std::experimental::source_location; -// using std::stringstream; + using std::source_location; + + // using std::stringstream; using std::forward; using std::move; using std::exchange; @@ -92,7 +93,6 @@ export namespace std { using std::upper_bound; using std::condition_variable; - using std::condition_variable_any; using std::lock_guard; using std::memory_order; using std::memory_order_acq_rel; @@ -242,6 +242,13 @@ export namespace std { using std::time_t; using std::asctime; using std::localtime; + + using std::stof; + using std::stod; + + using std::construct_at; + + using std::set; } // namespace std namespace infinity { diff --git a/src/executor/operator/physical_aggregate.cpp b/src/executor/operator/physical_aggregate.cpp index 6d5d25bacc..2e7c81130c 100644 --- a/src/executor/operator/physical_aggregate.cpp +++ b/src/executor/operator/physical_aggregate.cpp @@ -81,7 +81,7 @@ bool PhysicalAggregate::Execute(QueryContext *query_context, OperatorState *oper SharedPtr col_def = MakeShared(idx, MakeShared(expr->Type()), expr->Name(), - HashSet()); + std::set()); groupby_columns.emplace_back(col_def); types.emplace_back(expr->Type()); ++ idx; @@ -116,7 +116,7 @@ bool PhysicalAggregate::Execute(QueryContext *query_context, OperatorState *oper SharedPtr col_type = input_table_->GetColumnTypeById(idx); String col_name = input_table_->GetColumnNameById(idx); - SharedPtr col_def = MakeShared(idx, col_type, col_name, HashSet()); + SharedPtr col_def = MakeShared(idx, col_type, col_name, std::set()); columns.emplace_back(col_def); } @@ -152,7 +152,7 @@ bool PhysicalAggregate::Execute(QueryContext *query_context, OperatorState *oper SharedPtr col_def = MakeShared(idx, data_type, expr->Name(), - HashSet()); + std::set()); aggregate_columns.emplace_back(col_def); // for output block @@ -621,7 +621,7 @@ bool PhysicalAggregate::SimpleAggregateExecute(const Vector SharedPtr output_type = MakeShared(expr->Type()); // column definition - SharedPtr col_def = MakeShared(idx, output_type, expr->Name(), HashSet()); + SharedPtr col_def = MakeShared(idx, output_type, expr->Name(), std::set()); aggregate_columns.emplace_back(col_def); // for output block diff --git a/src/executor/operator/physical_create_index_prepare.cpp b/src/executor/operator/physical_create_index_prepare.cpp index 65b369173c..46a01f05ee 100644 --- a/src/executor/operator/physical_create_index_prepare.cpp +++ b/src/executor/operator/physical_create_index_prepare.cpp @@ -15,6 +15,7 @@ module; #include +#include module physical_create_index_prepare; diff --git a/src/executor/operator/physical_drop_index.cpp b/src/executor/operator/physical_drop_index.cpp index 33b44cebd0..5391bc32d6 100644 --- a/src/executor/operator/physical_drop_index.cpp +++ b/src/executor/operator/physical_drop_index.cpp @@ -41,7 +41,7 @@ bool PhysicalDropIndex::Execute(QueryContext *query_context, OperatorState *oper // Generate the result Vector> column_defs = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; auto result_table_def_ptr = MakeShared(MakeShared("default_db"), MakeShared("Tables"), column_defs); output_ = MakeShared(result_table_def_ptr, TableType::kDataTable); diff --git a/src/executor/operator/physical_drop_schema.cpp b/src/executor/operator/physical_drop_schema.cpp index dd954e30c9..2bad81fafb 100644 --- a/src/executor/operator/physical_drop_schema.cpp +++ b/src/executor/operator/physical_drop_schema.cpp @@ -42,7 +42,7 @@ bool PhysicalDropSchema::Execute(QueryContext *query_context, OperatorState *ope // Generate the result Vector> column_defs = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; auto result_table_def_ptr = MakeShared(MakeShared("default_db"), MakeShared("Tables"), column_defs); output_ = MakeShared(result_table_def_ptr, TableType::kDataTable); diff --git a/src/executor/operator/physical_drop_table.cpp b/src/executor/operator/physical_drop_table.cpp index 5a7745c42c..ff3367ae4b 100644 --- a/src/executor/operator/physical_drop_table.cpp +++ b/src/executor/operator/physical_drop_table.cpp @@ -44,7 +44,7 @@ bool PhysicalDropTable::Execute(QueryContext *query_context, OperatorState *oper // Generate the result Vector> column_defs = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; auto result_table_def_ptr = MakeShared(MakeShared("default_db"), MakeShared("Tables"), column_defs); output_ = MakeShared(result_table_def_ptr, TableType::kDataTable); diff --git a/src/executor/operator/physical_show.cpp b/src/executor/operator/physical_show.cpp index b9b4ccbf35..c7bb031ddf 100644 --- a/src/executor/operator/physical_show.cpp +++ b/src/executor/operator/physical_show.cpp @@ -1125,17 +1125,17 @@ void PhysicalShow::ExecuteShowProfiles(QueryContext *query_context, ShowOperator auto varchar_type = MakeShared(LogicalType::kVarchar); Vector> column_defs = { - MakeShared(0, varchar_type, "profile_no", HashSet()), - MakeShared(1, varchar_type, "parser", HashSet()), - MakeShared(2, varchar_type, "logical_plan", HashSet()), - MakeShared(3, varchar_type, "optimizer", HashSet()), - MakeShared(4, varchar_type, "physical_plan", HashSet()), - MakeShared(5, varchar_type, "pipeline_build", HashSet()), - MakeShared(6, varchar_type, "task_build", HashSet()), - MakeShared(7, varchar_type, "execution", HashSet()), - MakeShared(8, varchar_type, "commit", HashSet()), - MakeShared(9, varchar_type, "rollback", HashSet()), - MakeShared(10, varchar_type, "total_cost", HashSet()), + MakeShared(0, varchar_type, "profile_no", std::set()), + MakeShared(1, varchar_type, "parser", std::set()), + MakeShared(2, varchar_type, "logical_plan", std::set()), + MakeShared(3, varchar_type, "optimizer", std::set()), + MakeShared(4, varchar_type, "physical_plan", std::set()), + MakeShared(5, varchar_type, "pipeline_build", std::set()), + MakeShared(6, varchar_type, "task_build", std::set()), + MakeShared(7, varchar_type, "execution", std::set()), + MakeShared(8, varchar_type, "commit", std::set()), + MakeShared(9, varchar_type, "rollback", std::set()), + MakeShared(10, varchar_type, "total_cost", std::set()), }; auto catalog = txn->GetCatalog(); @@ -1219,10 +1219,10 @@ void PhysicalShow::ExecuteShowColumns(QueryContext *query_context, ShowOperatorS auto varchar_type = MakeShared(LogicalType::kVarchar); Vector> column_defs = { - MakeShared(0, varchar_type, "column_name", HashSet()), - MakeShared(1, varchar_type, "column_type", HashSet()), - MakeShared(2, varchar_type, "constraint", HashSet()), - MakeShared(3, varchar_type, "default", HashSet()), + MakeShared(0, varchar_type, "column_name", std::set()), + MakeShared(1, varchar_type, "column_type", std::set()), + MakeShared(2, varchar_type, "constraint", std::set()), + MakeShared(3, varchar_type, "default", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("Views"), column_defs); @@ -1815,9 +1815,9 @@ void PhysicalShow::ExecuteShowConfigs(QueryContext *query_context, ShowOperatorS auto varchar_type = MakeShared(LogicalType::kVarchar); Vector> column_defs = { - MakeShared(0, varchar_type, "config_name", HashSet()), - MakeShared(1, varchar_type, "value", HashSet()), - MakeShared(2, varchar_type, "description", HashSet()), + MakeShared(0, varchar_type, "config_name", std::set()), + MakeShared(1, varchar_type, "value", std::set()), + MakeShared(2, varchar_type, "description", std::set()), }; Config *global_config = query_context->global_config(); @@ -2445,13 +2445,13 @@ void PhysicalShow::ExecuteShowIndexes(QueryContext *query_context, ShowOperatorS auto varchar_type = MakeShared(LogicalType::kVarchar); auto bigint_type = MakeShared(LogicalType::kBigInt); - Vector> column_defs = {MakeShared(0, varchar_type, "index_name", HashSet()), - MakeShared(1, varchar_type, "method_type", HashSet()), - MakeShared(2, bigint_type, "column_id", HashSet()), - MakeShared(3, varchar_type, "column_name", HashSet()), - MakeShared(4, varchar_type, "path", HashSet()), - MakeShared(5, varchar_type, "index_segment", HashSet()), - MakeShared(6, varchar_type, "other_parameters", HashSet())}; + Vector> column_defs = {MakeShared(0, varchar_type, "index_name", std::set()), + MakeShared(1, varchar_type, "method_type", std::set()), + MakeShared(2, bigint_type, "column_id", std::set()), + MakeShared(3, varchar_type, "column_name", std::set()), + MakeShared(4, varchar_type, "path", std::set()), + MakeShared(5, varchar_type, "index_segment", std::set()), + MakeShared(6, varchar_type, "other_parameters", std::set())}; auto table_def = TableDef::Make(MakeShared("default_db"), MakeShared("Views"), column_defs); @@ -2560,8 +2560,8 @@ void PhysicalShow::ExecuteShowViewDetail(QueryContext *query_context, const SharedPtr> &view_column_names) { SharedPtr varchar_type = MakeShared(LogicalType::kVarchar); Vector> output_column_defs = { - MakeShared(0, varchar_type, "column_name", HashSet()), - MakeShared(1, varchar_type, "column_type", HashSet()), + MakeShared(0, varchar_type, "column_name", std::set()), + MakeShared(1, varchar_type, "column_type", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("Views"), output_column_defs); @@ -2610,7 +2610,7 @@ void PhysicalShow::ExecuteShowSessionVariable(QueryContext *query_context, ShowO switch (session_var) { case SessionVariable::kQueryCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2629,7 +2629,7 @@ void PhysicalShow::ExecuteShowSessionVariable(QueryContext *query_context, ShowO } case SessionVariable::kTotalCommitCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2648,7 +2648,7 @@ void PhysicalShow::ExecuteShowSessionVariable(QueryContext *query_context, ShowO } case SessionVariable::kTotalRollbackCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2667,7 +2667,7 @@ void PhysicalShow::ExecuteShowSessionVariable(QueryContext *query_context, ShowO } case SessionVariable::kConnectedTime: { Vector> output_column_defs = { - MakeShared(0, varchar_type, "value", HashSet()), + MakeShared(0, varchar_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2686,7 +2686,7 @@ void PhysicalShow::ExecuteShowSessionVariable(QueryContext *query_context, ShowO } case SessionVariable::kEnableProfile: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2719,9 +2719,9 @@ void PhysicalShow::ExecuteShowSessionVariables(QueryContext *query_context, Show auto varchar_type = MakeShared(LogicalType::kVarchar); Vector> column_defs = { - MakeShared(0, varchar_type, "variable_name", HashSet()), - MakeShared(1, varchar_type, "value", HashSet()), - MakeShared(2, varchar_type, "description", HashSet()), + MakeShared(0, varchar_type, "variable_name", std::set()), + MakeShared(1, varchar_type, "value", std::set()), + MakeShared(2, varchar_type, "description", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("configs"), column_defs); @@ -2872,7 +2872,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp switch (global_var) { case GlobalVariable::kQueryCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2890,7 +2890,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kSessionCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2911,7 +2911,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kBufferPoolUsage: { Vector> output_column_defs = { - MakeShared(0, varchar_type, "value", HashSet()), + MakeShared(0, varchar_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2933,7 +2933,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kSchedulePolicy: { Vector> output_column_defs = { - MakeShared(0, varchar_type, "value", HashSet()), + MakeShared(0, varchar_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2952,7 +2952,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kDeltaLogCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2972,7 +2972,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kNextTxnID: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -2992,7 +2992,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kBufferedObjectCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -3012,7 +3012,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kUnusedBufferObjectCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -3032,7 +3032,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kActiveTxnCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -3052,7 +3052,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kCurrentTs: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -3072,7 +3072,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kTotalCommitCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -3092,7 +3092,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kTotalRollbackCount: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -3112,7 +3112,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kActiveWALFilename: { Vector> output_column_defs = { - MakeShared(0, varchar_type, "value", HashSet()), + MakeShared(0, varchar_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -3132,7 +3132,7 @@ void PhysicalShow::ExecuteShowGlobalVariable(QueryContext *query_context, ShowOp } case GlobalVariable::kProfileRecordCapacity: { Vector> output_column_defs = { - MakeShared(0, integer_type, "value", HashSet()), + MakeShared(0, integer_type, "value", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); @@ -3166,9 +3166,9 @@ void PhysicalShow::ExecuteShowGlobalVariables(QueryContext *query_context, ShowO auto varchar_type = MakeShared(LogicalType::kVarchar); Vector> column_defs = { - MakeShared(0, varchar_type, "variable_name", HashSet()), - MakeShared(1, varchar_type, "value", HashSet()), - MakeShared(2, varchar_type, "description", HashSet()), + MakeShared(0, varchar_type, "variable_name", std::set()), + MakeShared(1, varchar_type, "value", std::set()), + MakeShared(2, varchar_type, "description", std::set()), }; SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("configs"), column_defs); @@ -3627,7 +3627,7 @@ void PhysicalShow::ExecuteShowConfig(QueryContext *query_context, ShowOperatorSt // void PhysicalShow::ExecuteShowVar(QueryContext *query_context, ShowOperatorState *show_operator_state) { // SharedPtr varchar_type = MakeShared(LogicalType::kVarchar); // Vector> output_column_defs = { -// MakeShared(0, varchar_type, "value", HashSet()), +// MakeShared(0, varchar_type, "value", std::set()), // }; // // SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("variables"), output_column_defs); diff --git a/src/executor/operator/physical_sink.cpp b/src/executor/operator/physical_sink.cpp index 43d71788f2..a2b4fa27ba 100644 --- a/src/executor/operator/physical_sink.cpp +++ b/src/executor/operator/physical_sink.cpp @@ -207,7 +207,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -217,7 +217,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -227,7 +227,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -237,7 +237,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -247,7 +247,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -257,7 +257,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet()), + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set()), }; } break; @@ -268,7 +268,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -278,7 +278,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -288,7 +288,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -298,7 +298,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -308,7 +308,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -318,7 +318,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si result_sink_state->status_ = std::move(output_state->status_); } else { result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet())}; + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set())}; } break; } @@ -329,7 +329,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si break; } result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet()), + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set()), }; break; } @@ -340,7 +340,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si break; } result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet()), + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set()), }; break; } @@ -351,7 +351,7 @@ void PhysicalSink::FillSinkStateFromLastOperatorState(ResultSinkState *result_si break; } result_sink_state->result_def_ = { - MakeShared(0, MakeShared(LogicalType::kInteger), "OK", HashSet()), + MakeShared(0, MakeShared(LogicalType::kInteger), "OK", std::set()), }; break; } diff --git a/src/network/http_server.cpp b/src/network/http_server.cpp index 903532280e..865879bde9 100644 --- a/src/network/http_server.cpp +++ b/src/network/http_server.cpp @@ -339,7 +339,7 @@ class CreateTableHandler final : public HttpRequestHandler { } if (column_type) { - HashSet constraints; + std::set constraints; for (auto &constraint_json : field_element["constraints"]) { String constraint = constraint_json; ToLower(constraint); diff --git a/src/network/infinity_thrift_service.cpp b/src/network/infinity_thrift_service.cpp index 7fa99f6b44..f937881f2e 100644 --- a/src/network/infinity_thrift_service.cpp +++ b/src/network/infinity_thrift_service.cpp @@ -1344,7 +1344,7 @@ Tuple InfinityThriftService::GetColumnDefFromProto(const in return {nullptr, Status::InvalidDataType()}; } - auto constraints = HashSet(); + auto constraints = std::set(); for (auto constraint : column_def.constraints) { auto type = GetConstraintTypeFromProto(constraint); diff --git a/src/parser/definition/column_def.cpp b/src/parser/definition/column_def.cpp index 96a7ea054e..23d1c839bf 100644 --- a/src/parser/definition/column_def.cpp +++ b/src/parser/definition/column_def.cpp @@ -55,7 +55,7 @@ ConstraintType StringToConstraintType(const std::string &type) { ColumnDef::ColumnDef(int64_t id, std::shared_ptr column_type, std::string column_name, - std::unordered_set constraints, + std::set constraints, std::shared_ptr default_expr) : TableElement(TableElementType::kColumn), id_(id), column_type_(std::move(column_type)), name_(std::move(column_name)), constraints_(std::move(constraints)), default_expr_(std::move(default_expr)) { diff --git a/src/parser/definition/column_def.h b/src/parser/definition/column_def.h index dd40371608..81bfb5552c 100644 --- a/src/parser/definition/column_def.h +++ b/src/parser/definition/column_def.h @@ -22,7 +22,7 @@ #include #include -#include +#include #include namespace infinity { @@ -80,7 +80,7 @@ class ColumnDef : public TableElement { ColumnDef(int64_t id, std::shared_ptr column_type, std::string column_name, - std::unordered_set constraints, + std::set constraints, std::shared_ptr default_expr = nullptr); ColumnDef(LogicalType logical_type, const std::shared_ptr &type_info_ptr, std::shared_ptr default_expr = nullptr); @@ -106,7 +106,7 @@ class ColumnDef : public TableElement { int64_t id_{-1}; const std::shared_ptr column_type_{}; std::string name_{}; - std::unordered_set constraints_{}; + std::set constraints_{}; std::shared_ptr default_expr_{nullptr}; bool build_bloom_filter_{}; }; diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp index 5d0e69c46d..179aea2cec 100644 --- a/src/parser/parser.cpp +++ b/src/parser/parser.cpp @@ -3784,7 +3784,7 @@ YYLTYPE yylloc = yyloc_default; case 90: /* column_constraints: column_constraint */ #line 814 "parser.y" { - (yyval.column_constraints_t) = new std::unordered_set(); + (yyval.column_constraints_t) = new std::set(); (yyval.column_constraints_t)->insert((yyvsp[0].column_constraint_t)); } #line 3791 "parser.cpp" diff --git a/src/parser/parser.h b/src/parser/parser.h index 327b612ca4..3658637fe9 100644 --- a/src/parser/parser.h +++ b/src/parser/parser.h @@ -335,7 +335,7 @@ union SQLSTYPE infinity::ColumnDef* table_column_t; infinity::ColumnType column_type_t; infinity::ConstraintType column_constraint_t; - std::unordered_set* column_constraints_t; + std::set* column_constraints_t; std::vector* identifier_array_t; infinity::TableConstraint* table_constraint_t; diff --git a/src/parser/parser.y b/src/parser/parser.y index 63c9f4806d..33229536d4 100644 --- a/src/parser/parser.y +++ b/src/parser/parser.y @@ -131,7 +131,7 @@ struct SQL_LTYPE { infinity::ColumnDef* table_column_t; infinity::ColumnType column_type_t; infinity::ConstraintType column_constraint_t; - std::unordered_set* column_constraints_t; + std::set* column_constraints_t; std::vector* identifier_array_t; infinity::TableConstraint* table_constraint_t; @@ -812,7 +812,7 @@ opt_decimal_specification : '(' INTVAL ',' INTVAL ')' { $$ = new std::pair(); + $$ = new std::set(); $$->insert($1); } | column_constraints column_constraint { diff --git a/src/planner/optimizer/secondary_index_scan/filter_expression_push_down_helper.cpp b/src/planner/optimizer/secondary_index_scan/filter_expression_push_down_helper.cpp index 22b277b88b..c62dfa2ee9 100644 --- a/src/planner/optimizer/secondary_index_scan/filter_expression_push_down_helper.cpp +++ b/src/planner/optimizer/secondary_index_scan/filter_expression_push_down_helper.cpp @@ -16,6 +16,7 @@ module; #include #include + module filter_expression_push_down_helper; import stl; diff --git a/src/scheduler/fragment_context.cpp b/src/scheduler/fragment_context.cpp index 440d36d8dc..ca58316c51 100644 --- a/src/scheduler/fragment_context.cpp +++ b/src/scheduler/fragment_context.cpp @@ -1330,7 +1330,7 @@ SharedPtr SerialMaterializedFragmentCtx::GetResultInternal() { column_defs.emplace_back(MakeShared(col_idx, materialize_sink_state->column_types_->at(col_idx), materialize_sink_state->column_names_->at(col_idx), - HashSet())); + std::set())); } SharedPtr result_table = DataTable::MakeResultTable(column_defs); @@ -1400,7 +1400,7 @@ SharedPtr ParallelMaterializedFragmentCtx::GetResultInternal() { column_defs.emplace_back(MakeShared(col_idx, first_materialize_sink_state->column_types_->at(col_idx), first_materialize_sink_state->column_names_->at(col_idx), - HashSet())); + std::set())); } for (const auto &task : tasks_) { @@ -1447,7 +1447,7 @@ SharedPtr ParallelStreamFragmentCtx::GetResultInternal() { column_defs.emplace_back(MakeShared(col_idx, first_materialize_sink_state->column_types_->at(col_idx), first_materialize_sink_state->column_names_->at(col_idx), - HashSet())); + std::set())); } for (const auto &task : tasks_) { diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp index cc3f1012a5..18c00ae557 100644 --- a/src/storage/data_table.cpp +++ b/src/storage/data_table.cpp @@ -110,8 +110,8 @@ SharedPtr DataTable::MakeEmptyResultTable() { SharedPtr DataTable::MakeSummaryResultTable(u64 count, u64 sum) { Vector> column_defs; - column_defs.emplace_back(MakeShared(0, std::make_shared(LogicalType::kBigInt, nullptr), "count", HashSet())); - column_defs.emplace_back(MakeShared(1, std::make_shared(LogicalType::kBigInt, nullptr), "sum", HashSet())); + column_defs.emplace_back(MakeShared(0, std::make_shared(LogicalType::kBigInt, nullptr), "count", std::set())); + column_defs.emplace_back(MakeShared(1, std::make_shared(LogicalType::kBigInt, nullptr), "sum", std::set())); SharedPtr result_table_def_ptr = MakeShared(nullptr, nullptr, column_defs); SharedPtr result_table = Make(result_table_def_ptr, TableType::kResult); diff --git a/src/storage/definition/table_def.cpp b/src/storage/definition/table_def.cpp index f012537822..dd004923ef 100644 --- a/src/storage/definition/table_def.cpp +++ b/src/storage/definition/table_def.cpp @@ -132,7 +132,7 @@ SharedPtr TableDef::ReadAdv(char *&ptr, i32 maxbytes) { SharedPtr column_type = DataType::ReadAdv(ptr, maxbytes); String column_name = ReadBufAdv(ptr); i32 constraints_size = ReadBufAdv(ptr); - HashSet constraints; + std::set constraints; for (i32 j = 0; j < constraints_size; j++) { ConstraintType ct = ReadBufAdv(ptr); constraints.insert(ct); diff --git a/src/storage/meta/entry/table_entry.cpp b/src/storage/meta/entry/table_entry.cpp index 7b4d823018..cddda6eb90 100644 --- a/src/storage/meta/entry/table_entry.cpp +++ b/src/storage/meta/entry/table_entry.cpp @@ -1037,7 +1037,7 @@ UniquePtr TableEntry::Deserialize(const nlohmann::json &table_entry_ i64 column_id = column_def_json["column_id"]; String column_name = column_def_json["column_name"]; - HashSet constraints; + std::set constraints; if (column_def_json.contains("constraints")) { for (const auto &column_constraint : column_def_json["constraints"]) { ConstraintType constraint = column_constraint; diff --git a/src/storage/wal/catalog_delta_entry.cpp b/src/storage/wal/catalog_delta_entry.cpp index 5c33542aff..c14f71af2d 100644 --- a/src/storage/wal/catalog_delta_entry.cpp +++ b/src/storage/wal/catalog_delta_entry.cpp @@ -327,7 +327,7 @@ UniquePtr AddTableEntryOp::ReadAdv(char *&ptr, char *ptr_end) { SharedPtr column_type = DataType::ReadAdv(ptr, max_bytes); String column_name = ReadBufAdv(ptr); i32 constraints_size = ReadBufAdv(ptr); - HashSet constraints; + std::set constraints; for (i32 j = 0; j < constraints_size; j++) { ConstraintType ct = ReadBufAdv(ptr); constraints.insert(ct); diff --git a/src/unit_test/executor/expression/new_expression_evaluator.cpp b/src/unit_test/executor/expression/new_expression_evaluator.cpp index af38c4c97e..a141a611b5 100644 --- a/src/unit_test/executor/expression/new_expression_evaluator.cpp +++ b/src/unit_test/executor/expression/new_expression_evaluator.cpp @@ -85,7 +85,7 @@ TEST_F(ExpressionEvaluatorTest, add_bigint_constant_1) { ExpressionEvaluator expr_evaluator; SharedPtr data_type = MakeShared(LogicalType::kBigInt); - SharedPtr col_def = MakeShared(0, data_type, "c1", HashSet()); + SharedPtr col_def = MakeShared(0, data_type, "c1", std::set()); SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("t1"), {col_def}); SharedPtr input_table = DataTable::Make(table_def, TableType::kDataTable); @@ -184,7 +184,7 @@ TEST_F(ExpressionEvaluatorTest, subtract_constant_8192_bigint) { ExpressionEvaluator expr_evaluator; - SharedPtr col_def = MakeShared(0, MakeShared(DataType(LogicalType::kBigInt)), "c1", HashSet()); + SharedPtr col_def = MakeShared(0, MakeShared(DataType(LogicalType::kBigInt)), "c1", std::set()); SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("t1"), {col_def}); SharedPtr input_table = DataTable::Make(table_def, TableType::kDataTable); diff --git a/src/unit_test/executor/operator/physical_sort.cpp b/src/unit_test/executor/operator/physical_sort.cpp index 3035d78071..20d05da5a0 100644 --- a/src/unit_test/executor/operator/physical_sort.cpp +++ b/src/unit_test/executor/operator/physical_sort.cpp @@ -50,7 +50,7 @@ TEST_F(PhysicalSortTest, test1) { SharedPtr col_def = MakeShared(0, col_type, col_name, - HashSet()); + std::set()); columns.emplace_back(col_def); col_type = MakeShared(LogicalType::kBigInt); @@ -59,7 +59,7 @@ TEST_F(PhysicalSortTest, test1) { col_def = MakeShared(1, col_type, col_name, - HashSet()); + std::set()); columns.emplace_back(col_def); SharedPtr table_def = TableDef::Make(MakeShared("default_db"), diff --git a/src/unit_test/main/infinity.cpp b/src/unit_test/main/infinity.cpp index f8c7d81787..fc261a99fb 100644 --- a/src/unit_test/main/infinity.cpp +++ b/src/unit_test/main/infinity.cpp @@ -107,12 +107,12 @@ TEST_F(InfinityTest, test1) { SharedPtr col_type = MakeShared(LogicalType::kBoolean); String col_name = "col1"; - auto col_def = new ColumnDef(0, col_type, col_name, HashSet()); + auto col_def = new ColumnDef(0, col_type, col_name, std::set()); column_defs.emplace_back(col_def); col_type = MakeShared(LogicalType::kBigInt); col_name = "col2"; - col_def = new ColumnDef(1, col_type, col_name, HashSet()); + col_def = new ColumnDef(1, col_type, col_name, std::set()); column_defs.emplace_back(col_def); result = infinity->CreateTable("default_db", "table1", column_defs, Vector(), create_table_opts); @@ -151,12 +151,12 @@ TEST_F(InfinityTest, test1) { SharedPtr col_type = MakeShared(LogicalType::kBigInt); String col1_name = "col1"; - auto col_def = new ColumnDef(0, col_type, col1_name, HashSet()); + auto col_def = new ColumnDef(0, col_type, col1_name, std::set()); column_defs.emplace_back(col_def); col_type = MakeShared(LogicalType::kSmallInt); String col2_name = "col2"; - col_def = new ColumnDef(1, col_type, col2_name, HashSet()); + col_def = new ColumnDef(1, col_type, col2_name, std::set()); column_defs.emplace_back(col_def); result = infinity->CreateTable("default_db", "table1", column_defs, Vector(), create_table_opts); diff --git a/src/unit_test/main/table.cpp b/src/unit_test/main/table.cpp index b74ac148bf..e94e876415 100644 --- a/src/unit_test/main/table.cpp +++ b/src/unit_test/main/table.cpp @@ -59,12 +59,12 @@ TEST_F(InfinityTableTest, test1) { std::shared_ptr col1_type = std::make_shared(LogicalType::kEmbedding, std::make_shared(EmbeddingDataType::kElemFloat, 128)); String col1_name = "col1"; - auto col1_def = std::make_unique(0, col1_type, col1_name, HashSet()); + auto col1_def = std::make_unique(0, col1_type, col1_name, std::set()); column_defs.emplace_back(col1_def.release()); std::shared_ptr col2_type = std::make_shared(LogicalType::kBigInt); String col2_name = "col2"; - auto col2_def = std::make_unique(0, col2_type, col2_name, HashSet()); + auto col2_def = std::make_unique(0, col2_type, col2_name, std::set()); column_defs.emplace_back(col2_def.release()); std::string db_name = "db1"; diff --git a/src/unit_test/storage/bg_task/cleanup_task.cpp b/src/unit_test/storage/bg_task/cleanup_task.cpp index 4cd8bb9893..9c45890310 100644 --- a/src/unit_test/storage/bg_task/cleanup_task.cpp +++ b/src/unit_test/storage/bg_task/cleanup_task.cpp @@ -178,7 +178,7 @@ TEST_F(CleanupTaskTest, test_delete_table_simple) { Vector> column_defs; { - HashSet constraints; + std::set constraints; ColumnID column_id = 0; column_defs.push_back(MakeShared(column_id++, MakeShared(DataType(LogicalType::kInteger)), "col1", constraints)); } @@ -228,7 +228,7 @@ TEST_F(CleanupTaskTest, test_delete_table_complex) { Vector> column_defs; { - HashSet constraints; + std::set constraints; ColumnID column_id = 0; column_defs.push_back(MakeShared(column_id++, MakeShared(DataType(LogicalType::kInteger)), "col1", constraints)); } @@ -303,7 +303,7 @@ TEST_F(CleanupTaskTest, test_compact_and_cleanup) { Vector> column_defs; { - HashSet constraints; + std::set constraints; ColumnID column_id = 0; column_defs.push_back(MakeShared(column_id++, MakeShared(DataType(LogicalType::kInteger)), "col1", constraints)); } @@ -390,7 +390,7 @@ TEST_F(CleanupTaskTest, test_with_index_compact_and_cleanup) { Vector> column_defs; { - HashSet constraints; + std::set constraints; ColumnID column_id = 0; column_defs.push_back(MakeShared(column_id++, MakeShared(DataType(LogicalType::kInteger)), *column_name, constraints)); } diff --git a/src/unit_test/storage/bg_task/compact_segments_task.cpp b/src/unit_test/storage/bg_task/compact_segments_task.cpp index 25c9faae23..210e7c0ad0 100644 --- a/src/unit_test/storage/bg_task/compact_segments_task.cpp +++ b/src/unit_test/storage/bg_task/compact_segments_task.cpp @@ -114,7 +114,7 @@ TEST_F(CompactTaskTest, compact_to_single_segment) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); @@ -174,7 +174,7 @@ TEST_F(CompactTaskTest, compact_to_two_segment) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); @@ -238,7 +238,7 @@ TEST_F(CompactTaskTest, compact_with_delete) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); @@ -324,7 +324,7 @@ TEST_F(CompactTaskTest, delete_in_compact_process) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); @@ -439,7 +439,7 @@ TEST_F(CompactTaskTest, uncommit_delete_in_compact_process) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); @@ -591,7 +591,7 @@ TEST_F(CompactTaskTest, compact_not_exist_table) { { Vector> columns; i64 column_id = 0; - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); tbl1_def = MakeShared(MakeShared("default_db"), MakeShared(table_name), columns); diff --git a/src/unit_test/storage/binary_fuse_filter/segment_sealing.cpp b/src/unit_test/storage/binary_fuse_filter/segment_sealing.cpp index 817de3205b..5ffe0278da 100644 --- a/src/unit_test/storage/binary_fuse_filter/segment_sealing.cpp +++ b/src/unit_test/storage/binary_fuse_filter/segment_sealing.cpp @@ -102,7 +102,7 @@ TEST_F(SealingTaskTest, append_unsealed_segment_sealed) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); diff --git a/src/unit_test/storage/buffer/buffer_obj.cpp b/src/unit_test/storage/buffer/buffer_obj.cpp index 6902d65eeb..c04d2ffc99 100644 --- a/src/unit_test/storage/buffer/buffer_obj.cpp +++ b/src/unit_test/storage/buffer/buffer_obj.cpp @@ -535,7 +535,7 @@ TEST_F(BufferObjTest, test_hnsw_index_buffer_obj_shutdown) { Vector> column_defs; { { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); i64 column_id = 0; auto embedding_info = MakeShared(EmbeddingDataType::kElemFloat, 128); @@ -691,7 +691,7 @@ TEST_F(BufferObjTest, test_big_with_gc_and_cleanup) { Vector> column_defs; { - HashSet constraints; + std::set constraints; ColumnID column_id = 0; column_defs.push_back(MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), *column_name, constraints)); } @@ -772,7 +772,7 @@ TEST_F(BufferObjTest, test_multiple_threads_read) { Vector> column_defs; { - HashSet constraints; + std::set constraints; ColumnID column_id = 0; column_defs.push_back(MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), *column_name, constraints)); } diff --git a/src/unit_test/storage/definition/column_def.cpp b/src/unit_test/storage/definition/column_def.cpp index 0981718a20..d50cdf532f 100644 --- a/src/unit_test/storage/definition/column_def.cpp +++ b/src/unit_test/storage/definition/column_def.cpp @@ -32,7 +32,7 @@ class ColumnDefTest : public BaseTest {}; TEST_F(ColumnDefTest, test1) { using namespace infinity; - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(0, MakeShared(DataType(LogicalType::kTinyInt)), "c1", constraints); @@ -45,7 +45,7 @@ TEST_F(ColumnDefTest, test1) { TEST_F(ColumnDefTest, test2) { using namespace infinity; - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(3, MakeShared(LogicalType::kVarchar), "c2", constraints); EXPECT_EQ(*column_def_ptr->type(), DataType(LogicalType::kVarchar)); diff --git a/src/unit_test/storage/definition/table.cpp b/src/unit_test/storage/definition/table.cpp index 2d43ad1cab..b5d38c1091 100644 --- a/src/unit_test/storage/definition/table.cpp +++ b/src/unit_test/storage/definition/table.cpp @@ -49,14 +49,14 @@ TEST_F(TableTest, test1) { SharedPtr col_type = MakeShared(LogicalType::kBoolean); column_types.emplace_back(col_type); String col_name = "col1"; - auto col_def = MakeShared(0, col_type, col_name, HashSet()); + auto col_def = MakeShared(0, col_type, col_name, std::set()); columns.emplace_back(col_def); col_type = MakeShared(LogicalType::kBigInt); column_types.emplace_back(col_type); col_name = "col2"; - col_def = MakeShared(1, col_type, col_name, HashSet()); + col_def = MakeShared(1, col_type, col_name, std::set()); columns.emplace_back(col_def); SharedPtr table_def = TableDef::Make(MakeShared("default_db"), MakeShared("order_by_table"), columns); diff --git a/src/unit_test/storage/definition/table_def.cpp b/src/unit_test/storage/definition/table_def.cpp index 6e5be950bb..3334af1f85 100644 --- a/src/unit_test/storage/definition/table_def.cpp +++ b/src/unit_test/storage/definition/table_def.cpp @@ -37,14 +37,14 @@ TEST_F(TableDefTest, test1) { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "c1", constraints); columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kVarchar)), "c2", constraints); columns.emplace_back(column_def_ptr); @@ -65,14 +65,14 @@ TEST_F(TableDefTest, ReadWrite) { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "c1", constraints); columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(LogicalType::kVarchar), "c2", constraints); diff --git a/src/unit_test/storage/invertedindex/search/query_match.cpp b/src/unit_test/storage/invertedindex/search/query_match.cpp index ebbbb18e3b..78994edcc0 100644 --- a/src/unit_test/storage/invertedindex/search/query_match.cpp +++ b/src/unit_test/storage/invertedindex/search/query_match.cpp @@ -139,19 +139,19 @@ void QueryMatchTest::CreateDBAndTable(const String& db_name, const String& table { String col1_name = "id"; auto col1_type = MakeShared(LogicalType::kVarchar); - auto col1_def = MakeShared(0, col1_type, std::move(col1_name), std::unordered_set()); + auto col1_def = MakeShared(0, col1_type, std::move(col1_name), std::set()); column_defs.push_back(col1_def); } { String col2_name = "title"; auto col2_type = MakeShared(LogicalType::kVarchar); - auto col2_def = MakeShared(1, col2_type, std::move(col2_name), std::unordered_set()); + auto col2_def = MakeShared(1, col2_type, std::move(col2_name), std::set()); column_defs.push_back(col2_def); } { String col3_name = "text"; auto col3_type = MakeShared(LogicalType::kVarchar); - auto col3_def = MakeShared(2, col3_type, std::move(col3_name), std::unordered_set()); + auto col3_def = MakeShared(2, col3_type, std::move(col3_name), std::set()); column_defs.push_back(col3_def); } auto table_def = TableDef::Make(MakeShared(db_name), MakeShared(table_name), std::move(column_defs)); diff --git a/src/unit_test/storage/knnindex/merge_optimize/test_optimize.cpp b/src/unit_test/storage/knnindex/merge_optimize/test_optimize.cpp index 494f186415..f27bab3b67 100644 --- a/src/unit_test/storage/knnindex/merge_optimize/test_optimize.cpp +++ b/src/unit_test/storage/knnindex/merge_optimize/test_optimize.cpp @@ -100,12 +100,12 @@ TEST_F(OptimizeKnnTest, test1) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = std::make_shared(0, std::make_shared(LogicalType::kEmbedding, EmbeddingInfo::Make(EmbeddingDataType::kElemFloat, 4)), "col2", - std::unordered_set{}); + std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); @@ -231,7 +231,7 @@ TEST_F(OptimizeKnnTest, test_secondary_index_optimize) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1}); auto index_name = std::make_shared("idx1"); diff --git a/src/unit_test/storage/meta/entry/table_collection_entry.cpp b/src/unit_test/storage/meta/entry/table_collection_entry.cpp index b49c39a3ea..a669d91eb5 100644 --- a/src/unit_test/storage/meta/entry/table_collection_entry.cpp +++ b/src/unit_test/storage/meta/entry/table_collection_entry.cpp @@ -76,7 +76,7 @@ TEST_F(TableEntryTest, test1) { Vector> columns; i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = @@ -84,7 +84,7 @@ TEST_F(TableEntryTest, test1) { columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), "big_int_col", constraints); @@ -129,7 +129,7 @@ TEST_F(TableEntryTest, test2) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = @@ -137,14 +137,14 @@ TEST_F(TableEntryTest, test2) { columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), "big_int_col", constraints); columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kDouble)), "double_col", constraints); columns.emplace_back(column_def_ptr); diff --git a/src/unit_test/storage/txn/conflict_check.cpp b/src/unit_test/storage/txn/conflict_check.cpp index bdb6700024..ce102b21a0 100644 --- a/src/unit_test/storage/txn/conflict_check.cpp +++ b/src/unit_test/storage/txn/conflict_check.cpp @@ -124,7 +124,7 @@ TEST_F(ConflictCheckTest, conflict_check_delete) { auto db_name = std::make_shared("default_db"); auto table_name = std::make_shared("table1"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto table_def = TableDef::Make(db_name, table_name, {column_def1}); SizeT row_cnt = 10; diff --git a/src/unit_test/storage/txn/table_txn.cpp b/src/unit_test/storage/txn/table_txn.cpp index db5a87cf60..bf908a865d 100644 --- a/src/unit_test/storage/txn/table_txn.cpp +++ b/src/unit_test/storage/txn/table_txn.cpp @@ -64,7 +64,7 @@ UniquePtr MockTableDesc() { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = @@ -72,14 +72,14 @@ UniquePtr MockTableDesc() { columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), "big_int_col", constraints); columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kDouble)), "double_col", constraints); columns.emplace_back(column_def_ptr); diff --git a/src/unit_test/storage/wal/catalog_delta_entry.cpp b/src/unit_test/storage/wal/catalog_delta_entry.cpp index 49dcf76423..b131838be6 100644 --- a/src/unit_test/storage/wal/catalog_delta_entry.cpp +++ b/src/unit_test/storage/wal/catalog_delta_entry.cpp @@ -44,9 +44,9 @@ TEST_F(CatalogDeltaEntryTest, test_DeltaOpEntry) { Vector> column_defs{}; { auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); column_defs.push_back(column_def1); column_defs.push_back(column_def2); } diff --git a/src/unit_test/storage/wal/catalog_delta_replay.cpp b/src/unit_test/storage/wal/catalog_delta_replay.cpp index dedde09b0e..b77bcf2db2 100644 --- a/src/unit_test/storage/wal/catalog_delta_replay.cpp +++ b/src/unit_test/storage/wal/catalog_delta_replay.cpp @@ -183,9 +183,9 @@ TEST_F(CatalogDeltaReplayTest, replay_table_entry) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name1 = std::make_shared("tb1"); auto table_name2 = std::make_shared("tb2"); auto table_name3 = std::make_shared("tb3"); @@ -260,9 +260,9 @@ TEST_F(CatalogDeltaReplayTest, replay_import) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); @@ -366,9 +366,9 @@ TEST_F(CatalogDeltaReplayTest, replay_append) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); @@ -451,9 +451,9 @@ TEST_F(CatalogDeltaReplayTest, replay_delete) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); @@ -525,9 +525,9 @@ TEST_F(CatalogDeltaReplayTest, replay_with_full_checkpoint) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name = std::make_shared("tb1"); auto table_name_committed = std::make_shared("tb_committed"); auto table_name_uncommitted = std::make_shared("tb_uncommitted"); @@ -711,7 +711,7 @@ TEST_F(CatalogDeltaReplayTest, replay_compact_to_single_rollback) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); @@ -767,9 +767,9 @@ TEST_F(CatalogDeltaReplayTest, replay_table_single_index) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); @@ -923,9 +923,9 @@ TEST_F(CatalogDeltaReplayTest, replay_table_single_index_named_db) { auto db_name = std::make_shared("db1"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); @@ -1093,7 +1093,7 @@ TEST_F(CatalogDeltaReplayTest, replay_table_single_index_and_compact) { auto table_name = std::make_shared("db1"); // auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); - HashSet constraints; + std::set constraints; auto column_def1 = MakeShared(0, MakeShared(DataType(LogicalType::kTinyInt)), "col1", constraints); auto table_def = TableDef::Make(db_name, table_name, {column_def1}); diff --git a/src/unit_test/storage/wal/checkpoint.cpp b/src/unit_test/storage/wal/checkpoint.cpp index 061e88b418..10b8d88915 100644 --- a/src/unit_test/storage/wal/checkpoint.cpp +++ b/src/unit_test/storage/wal/checkpoint.cpp @@ -167,7 +167,7 @@ TEST_F(CheckpointTest, test_cleanup_and_checkpoint) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); @@ -240,7 +240,7 @@ TEST_F(CheckpointTest, test_index_replay_with_full_and_delta_checkpoint1) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), *column_name, constraints); columns.emplace_back(column_def_ptr); @@ -354,7 +354,7 @@ TEST_F(CheckpointTest, test_index_replay_with_full_and_delta_checkpoint2) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kVarchar)), *column_name, constraints); columns.emplace_back(column_def_ptr); diff --git a/src/unit_test/storage/wal/repeat_replay.cpp b/src/unit_test/storage/wal/repeat_replay.cpp index 4db77783ec..e48a1da381 100644 --- a/src/unit_test/storage/wal/repeat_replay.cpp +++ b/src/unit_test/storage/wal/repeat_replay.cpp @@ -63,9 +63,9 @@ TEST_F(RepeatReplayTest, append) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); @@ -182,9 +182,9 @@ TEST_F(RepeatReplayTest, import) { auto db_name = std::make_shared("default_db"); auto column_def1 = - std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kInteger), "col1", std::set()); auto column_def2 = - std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::unordered_set{}); + std::make_shared(0, std::make_shared(LogicalType::kVarchar), "col2", std::set()); auto table_name = std::make_shared("tb1"); auto table_def = TableDef::Make(db_name, table_name, {column_def1, column_def2}); diff --git a/src/unit_test/storage/wal/wal_entry.cpp b/src/unit_test/storage/wal/wal_entry.cpp index f43f1cd09c..512f0b9fbc 100644 --- a/src/unit_test/storage/wal/wal_entry.cpp +++ b/src/unit_test/storage/wal/wal_entry.cpp @@ -48,7 +48,7 @@ SharedPtr MockTableDesc2() { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = @@ -56,14 +56,14 @@ SharedPtr MockTableDesc2() { columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), "big_int_col", constraints); columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kDouble)), "double_col", constraints); columns.emplace_back(column_def_ptr); diff --git a/src/unit_test/storage/wal/wal_replay.cpp b/src/unit_test/storage/wal/wal_replay.cpp index 0bc48b8e8f..2ff9fe0ee5 100644 --- a/src/unit_test/storage/wal/wal_replay.cpp +++ b/src/unit_test/storage/wal/wal_replay.cpp @@ -178,7 +178,7 @@ TEST_F(WalReplayTest, wal_replay_tables) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = @@ -186,14 +186,14 @@ TEST_F(WalReplayTest, wal_replay_tables) { columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), "big_int_col", constraints); columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kDouble)), "double_col", constraints); columns.emplace_back(column_def_ptr); @@ -302,7 +302,7 @@ TEST_F(WalReplayTest, wal_replay_append) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = @@ -310,14 +310,14 @@ TEST_F(WalReplayTest, wal_replay_append) { columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), "big_int_col", constraints); columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kDouble)), "double_col", constraints); @@ -404,7 +404,7 @@ TEST_F(WalReplayTest, wal_replay_append) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = @@ -483,7 +483,7 @@ TEST_F(WalReplayTest, wal_replay_import) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kUnique); constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = @@ -491,14 +491,14 @@ TEST_F(WalReplayTest, wal_replay_import) { columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kPrimaryKey); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kBigInt)), "big_int_col", constraints); columns.emplace_back(column_def_ptr); } { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kDouble)), "double_col", constraints); @@ -685,7 +685,7 @@ TEST_F(WalReplayTest, wal_replay_compact) { { i64 column_id = 0; { - HashSet constraints; + std::set constraints; auto column_def_ptr = MakeShared(column_id++, MakeShared(DataType(LogicalType::kTinyInt)), "tiny_int_col", constraints); columns.emplace_back(column_def_ptr); @@ -803,7 +803,7 @@ TEST_F(WalReplayTest, wal_replay_create_index_IvfFlat) { { Vector> columns; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); i64 column_id = 0; auto embeddingInfo = MakeShared(EmbeddingDataType::kElemFloat, 128); @@ -907,7 +907,7 @@ TEST_F(WalReplayTest, wal_replay_create_index_hnsw) { { Vector> columns; { - HashSet constraints; + std::set constraints; constraints.insert(ConstraintType::kNotNull); i64 column_id = 0; auto embeddingInfo = MakeShared(EmbeddingDataType::kElemFloat, 128);