From 62ff1b20156d1c255b01c005e75f24bb0ad5d78a Mon Sep 17 00:00:00 2001 From: bilibilifaker <919320834@qq.com> Date: Thu, 28 Mar 2024 23:14:16 +0800 Subject: [PATCH] 123 --- src/CMakeLists.txt | 1 + src/binder/bind_create.cpp | 90 +-- src/binder/bind_select.cpp | 66 +- src/binder/bind_variable.cpp | 14 - src/binder/fmt_impl.cpp | 37 - src/binder/node_tag_to_string.cpp | 25 - src/binder/statement/create_statement.cpp | 9 +- src/binder/statement/index_statement.cpp | 11 +- src/binder/transformer.cpp | 38 - src/buffer/CMakeLists.txt | 2 +- src/buffer/buffer_pool_manager.cpp | 67 -- src/buffer/buffer_pool_manager_instance.cpp | 216 +++++ src/buffer/lru_k_replacer.cpp | 119 ++- src/catalog/column.cpp | 14 +- src/catalog/schema.cpp | 6 +- src/catalog/table_generator.cpp | 6 +- src/common/CMakeLists.txt | 5 +- src/common/bustub_ddl.cpp | 248 ------ src/common/bustub_instance.cpp | 330 +++----- src/common/config.cpp | 2 - src/common/util/string_util.cpp | 9 - src/concurrency/CMakeLists.txt | 9 +- src/concurrency/lock_manager.cpp | 560 ++++++++++++- src/concurrency/transaction_manager.cpp | 139 ++-- src/concurrency/transaction_manager_impl.cpp | 125 --- src/concurrency/watermark.cpp | 19 - src/container/CMakeLists.txt | 1 + src/container/disk/hash/CMakeLists.txt | 3 +- .../disk/hash/disk_extendible_hash_table.cpp | 126 ++- .../hash/disk_extendible_hash_table_utils.cpp | 106 --- src/container/hash/CMakeLists.txt | 8 + src/container/hash/extendible_hash_table.cpp | 189 +++++ src/execution/CMakeLists.txt | 5 - src/execution/aggregation_executor.cpp | 37 +- src/execution/delete_executor.cpp | 57 +- src/execution/execution_common.cpp | 44 - src/execution/executor_factory.cpp | 35 - src/execution/fmt_impl.cpp | 28 - src/execution/hash_join_executor.cpp | 68 +- src/execution/index_scan_executor.cpp | 62 +- src/execution/init_check_executor.cpp | 40 - src/execution/insert_executor.cpp | 57 +- src/execution/limit_executor.cpp | 20 +- src/execution/mock_scan_executor.cpp | 138 +--- src/execution/nested_index_join_executor.cpp | 47 +- src/execution/nested_loop_join_executor.cpp | 58 +- src/execution/plan_node.cpp | 31 +- src/execution/seq_scan_executor.cpp | 55 +- src/execution/sort_executor.cpp | 55 +- src/execution/topn_check_executor.cpp | 48 -- src/execution/topn_executor.cpp | 60 +- src/execution/topn_per_group_executor.cpp | 25 - src/execution/update_executor.cpp | 72 +- src/execution/window_function_executor.cpp | 14 - src/include/binder/binder.h | 11 - src/include/binder/bound_expression.h | 10 - .../binder/expressions/bound_agg_call.h | 1 - src/include/binder/expressions/bound_alias.h | 2 - .../binder/expressions/bound_func_call.h | 32 - src/include/binder/expressions/bound_window.h | 90 --- .../binder/statement/create_statement.h | 3 +- .../binder/statement/index_statement.h | 10 +- .../binder/statement/set_show_statement.h | 10 - src/include/buffer/buffer_pool_manager.h | 232 ++---- .../buffer/buffer_pool_manager_instance.h | 181 +++++ src/include/buffer/lru_k_replacer.h | 40 +- src/include/catalog/catalog.h | 79 +- src/include/catalog/column.h | 49 +- src/include/catalog/schema.h | 2 +- src/include/common/bustub_instance.h | 80 +- src/include/common/channel.h | 59 -- src/include/common/config.h | 7 +- src/include/common/enums/statement_type.h | 4 - src/include/common/exception.h | 26 +- src/include/common/logger.h | 20 +- src/include/common/macros.h | 8 +- src/include/common/util/hash_util.h | 2 +- src/include/common/util/string_util.h | 3 - src/include/concurrency/lock_manager.h | 132 ++- src/include/concurrency/transaction.h | 414 ++++++---- src/include/concurrency/transaction_manager.h | 177 ++-- src/include/concurrency/watermark.h | 40 - .../disk/hash/disk_extendible_hash_table.h | 159 ++-- .../container/hash/extendible_hash_table.h | 201 +++++ src/include/container/hash/hash_function.h | 2 +- src/include/execution/check_options.h | 36 - src/include/execution/execution_common.h | 37 - src/include/execution/execution_engine.h | 15 +- src/include/execution/executor_context.h | 45 +- .../execution/executors/abstract_executor.h | 1 - .../executors/aggregation_executor.h | 47 +- .../execution/executors/delete_executor.h | 6 +- .../execution/executors/hash_join_executor.h | 13 +- .../execution/executors/index_scan_executor.h | 6 + .../execution/executors/init_check_executor.h | 76 -- .../execution/executors/insert_executor.h | 7 + .../execution/executors/limit_executor.h | 3 +- .../executors/nested_index_join_executor.h | 9 +- .../executors/nested_loop_join_executor.h | 10 +- .../execution/executors/seq_scan_executor.h | 2 + .../execution/executors/sort_executor.h | 4 + .../execution/executors/topn_check_executor.h | 67 -- .../execution/executors/topn_executor.h | 29 +- .../executors/topn_per_group_executor.h | 59 -- .../execution/executors/update_executor.h | 7 +- .../executors/window_function_executor.h | 94 --- .../expressions/abstract_expression.h | 10 +- .../expressions/arithmetic_expression.h | 7 +- .../execution/expressions/array_expression.h | 65 -- .../expressions/column_value_expression.h | 5 +- .../expressions/comparison_expression.h | 3 +- .../expressions/constant_value_expression.h | 2 +- .../execution/expressions/logic_expression.h | 7 +- .../execution/expressions/string_expression.h | 95 --- src/include/execution/plans/abstract_plan.h | 9 +- src/include/execution/plans/delete_plan.h | 2 +- src/include/execution/plans/hash_join_plan.h | 21 +- src/include/execution/plans/index_scan_plan.h | 36 +- src/include/execution/plans/insert_plan.h | 6 +- .../execution/plans/nested_index_join_plan.h | 12 +- .../execution/plans/nested_loop_join_plan.h | 2 +- src/include/execution/plans/seq_scan_plan.h | 6 +- .../execution/plans/topn_per_group_plan.h | 75 -- src/include/execution/plans/topn_plan.h | 2 +- src/include/execution/plans/update_plan.h | 8 +- src/include/execution/plans/window_plan.h | 143 ---- src/include/optimizer/optimizer.h | 24 +- src/include/optimizer/optimizer_internal.h | 8 - src/include/planner/planner.h | 15 +- src/include/primer/orset.h | 65 -- src/include/primer/orset_driver.h | 134 ---- src/include/primer/p0_trie.h | 444 +++++++++++ src/include/primer/trie.h | 136 ---- src/include/primer/trie_answer.h | 7 - src/include/primer/trie_store.h | 54 -- .../storage/disk/disk_manager_memory.h | 71 +- src/include/storage/disk/disk_scheduler.h | 95 --- src/include/storage/disk/write_back_cache.h | 97 --- src/include/storage/index/b_plus_tree.h | 199 ++--- src/include/storage/index/b_plus_tree_index.h | 21 +- .../index/extendible_hash_table_index.h | 9 +- src/include/storage/index/generic_key.h | 2 +- src/include/storage/index/index.h | 19 +- src/include/storage/index/index_iterator.h | 14 +- .../index/linear_probe_hash_table_index.h | 2 +- src/include/storage/index/stl_equal_wrapper.h | 15 - .../storage/index/stl_hasher_wrapper.h | 17 - src/include/storage/index/stl_ordered.h | 102 --- src/include/storage/index/stl_unordered.h | 74 -- .../storage/page/b_plus_tree_header_page.h | 20 - .../storage/page/b_plus_tree_internal_page.h | 91 +-- .../storage/page/b_plus_tree_leaf_page.h | 79 +- src/include/storage/page/b_plus_tree_page.h | 36 +- .../page/extendible_htable_bucket_page.h | 140 ---- .../page/extendible_htable_directory_page.h | 203 ----- .../page/extendible_htable_header_page.h | 88 -- .../storage/page/hash_table_page_defs.h | 2 - src/include/storage/page/header_page.h | 51 ++ src/include/storage/page/page.h | 13 +- src/include/storage/page/page_guard.h | 241 ------ src/include/storage/page/table_page.h | 175 +++- src/include/storage/table/table_heap.h | 136 ++-- src/include/storage/table/table_iterator.h | 38 +- src/include/storage/table/tuple.h | 61 +- src/include/type/type.h | 12 +- src/include/type/type_id.h | 2 +- src/include/type/value.h | 19 +- src/include/type/value_factory.h | 6 - src/include/type/varlen_type.h | 2 +- src/include/type/vector_type.h | 67 -- src/optimizer/CMakeLists.txt | 30 +- src/optimizer/eliminate_true_filter.cpp | 6 +- src/optimizer/merge_filter_nlj.cpp | 4 +- src/optimizer/merge_filter_scan.cpp | 4 + src/optimizer/nlj_as_hash_join.cpp | 46 +- src/optimizer/nlj_as_index_join.cpp | 2 +- src/optimizer/optimizer.cpp | 3 +- src/optimizer/optimizer_custom_rules.cpp | 365 ++++++++- src/optimizer/optimizer_internal.cpp | 5 - src/optimizer/order_by_index_scan.cpp | 36 +- src/optimizer/seqscan_as_indexscan.cpp | 11 - src/optimizer/sort_limit_as_topn.cpp | 25 +- src/planner/CMakeLists.txt | 2 - src/planner/expression_factory.cpp | 45 -- src/planner/plan_aggregation.cpp | 6 +- src/planner/plan_expression.cpp | 26 +- src/planner/plan_func_call.cpp | 35 - src/planner/plan_insert.cpp | 3 +- src/planner/plan_select.cpp | 18 +- src/planner/plan_table_ref.cpp | 8 +- src/planner/plan_window_function.cpp | 155 ---- src/primer/.clang-tidy | 10 + src/primer/CMakeLists.txt | 9 +- src/primer/orset.cpp | 50 -- src/primer/orset_driver.cpp | 51 -- src/primer/p0_trie.cpp | 9 + src/primer/trie.cpp | 59 -- src/primer/trie_store.cpp | 48 -- src/recovery/CMakeLists.txt | 8 +- src/recovery/log_recovery.cpp | 40 + src/storage/disk/CMakeLists.txt | 3 +- src/storage/disk/disk_scheduler.cpp | 41 - src/storage/index/b_plus_tree.cpp | 753 +++++++++++++----- src/storage/index/b_plus_tree_index.cpp | 23 +- .../index/extendible_hash_table_index.cpp | 8 +- src/storage/index/index_iterator.cpp | 49 +- .../index/linear_probe_hash_table_index.cpp | 4 +- src/storage/page/CMakeLists.txt | 6 +- .../page/b_plus_tree_internal_page.cpp | 152 +++- src/storage/page/b_plus_tree_leaf_page.cpp | 120 ++- src/storage/page/b_plus_tree_page.cpp | 40 +- .../page/extendible_htable_bucket_page.cpp | 83 -- .../page/extendible_htable_directory_page.cpp | 65 -- .../page/extendible_htable_header_page.cpp | 33 - .../page/extendible_htable_page_utils.cpp | 110 --- src/storage/page/header_page.cpp | 104 +++ src/storage/page/page_guard.cpp | 38 - src/storage/page/table_page.cpp | 379 +++++++-- src/storage/table/table_heap.cpp | 265 +++--- src/storage/table/table_iterator.cpp | 87 +- src/storage/table/tuple.cpp | 83 +- src/type/CMakeLists.txt | 3 +- src/type/type.cpp | 28 +- src/type/value.cpp | 38 - src/type/varlen_type.cpp | 30 +- src/type/vector_type.cpp | 117 --- 226 files changed, 6438 insertions(+), 7280 deletions(-) delete mode 100644 src/buffer/buffer_pool_manager.cpp create mode 100644 src/buffer/buffer_pool_manager_instance.cpp delete mode 100644 src/common/bustub_ddl.cpp delete mode 100644 src/concurrency/transaction_manager_impl.cpp delete mode 100644 src/concurrency/watermark.cpp delete mode 100644 src/container/disk/hash/disk_extendible_hash_table_utils.cpp create mode 100644 src/container/hash/CMakeLists.txt create mode 100644 src/container/hash/extendible_hash_table.cpp delete mode 100644 src/execution/execution_common.cpp delete mode 100644 src/execution/init_check_executor.cpp delete mode 100644 src/execution/topn_check_executor.cpp delete mode 100644 src/execution/topn_per_group_executor.cpp delete mode 100644 src/execution/window_function_executor.cpp delete mode 100644 src/include/binder/expressions/bound_func_call.h delete mode 100644 src/include/binder/expressions/bound_window.h create mode 100644 src/include/buffer/buffer_pool_manager_instance.h delete mode 100644 src/include/common/channel.h delete mode 100644 src/include/concurrency/watermark.h create mode 100644 src/include/container/hash/extendible_hash_table.h delete mode 100644 src/include/execution/check_options.h delete mode 100644 src/include/execution/execution_common.h delete mode 100644 src/include/execution/executors/init_check_executor.h delete mode 100644 src/include/execution/executors/topn_check_executor.h delete mode 100644 src/include/execution/executors/topn_per_group_executor.h delete mode 100644 src/include/execution/executors/window_function_executor.h delete mode 100644 src/include/execution/expressions/array_expression.h delete mode 100644 src/include/execution/expressions/string_expression.h delete mode 100644 src/include/execution/plans/topn_per_group_plan.h delete mode 100644 src/include/execution/plans/window_plan.h delete mode 100644 src/include/optimizer/optimizer_internal.h delete mode 100644 src/include/primer/orset.h delete mode 100644 src/include/primer/orset_driver.h create mode 100644 src/include/primer/p0_trie.h delete mode 100644 src/include/primer/trie.h delete mode 100644 src/include/primer/trie_answer.h delete mode 100644 src/include/primer/trie_store.h delete mode 100644 src/include/storage/disk/disk_scheduler.h delete mode 100644 src/include/storage/disk/write_back_cache.h delete mode 100644 src/include/storage/index/stl_equal_wrapper.h delete mode 100644 src/include/storage/index/stl_hasher_wrapper.h delete mode 100644 src/include/storage/index/stl_ordered.h delete mode 100644 src/include/storage/index/stl_unordered.h delete mode 100644 src/include/storage/page/b_plus_tree_header_page.h delete mode 100644 src/include/storage/page/extendible_htable_bucket_page.h delete mode 100644 src/include/storage/page/extendible_htable_directory_page.h delete mode 100644 src/include/storage/page/extendible_htable_header_page.h create mode 100644 src/include/storage/page/header_page.h delete mode 100644 src/include/storage/page/page_guard.h delete mode 100644 src/include/type/vector_type.h delete mode 100644 src/optimizer/optimizer_internal.cpp delete mode 100644 src/optimizer/seqscan_as_indexscan.cpp delete mode 100644 src/planner/plan_func_call.cpp delete mode 100644 src/planner/plan_window_function.cpp create mode 100644 src/primer/.clang-tidy delete mode 100644 src/primer/orset.cpp delete mode 100644 src/primer/orset_driver.cpp create mode 100644 src/primer/p0_trie.cpp delete mode 100644 src/primer/trie.cpp delete mode 100644 src/primer/trie_store.cpp create mode 100644 src/recovery/log_recovery.cpp delete mode 100644 src/storage/disk/disk_scheduler.cpp delete mode 100644 src/storage/page/extendible_htable_bucket_page.cpp delete mode 100644 src/storage/page/extendible_htable_directory_page.cpp delete mode 100644 src/storage/page/extendible_htable_header_page.cpp delete mode 100644 src/storage/page/extendible_htable_page_utils.cpp create mode 100644 src/storage/page/header_page.cpp delete mode 100644 src/storage/page/page_guard.cpp delete mode 100644 src/type/vector_type.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5eef901..5a20628 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,6 +23,7 @@ set(BUSTUB_LIBS bustub_execution bustub_recovery bustub_type + bustub_container_hash bustub_container_disk_hash bustub_storage_disk bustub_storage_index diff --git a/src/binder/bind_create.cpp b/src/binder/bind_create.cpp index 17f6056..95fdea7 100644 --- a/src/binder/bind_create.cpp +++ b/src/binder/bind_create.cpp @@ -46,7 +46,6 @@ #include "fmt/ranges.h" #include "nodes/nodes.hpp" #include "nodes/primnodes.hpp" -#include "nodes/value.hpp" #include "pg_definitions.hpp" #include "postgres_parser.hpp" #include "type/type_id.h" @@ -69,14 +68,6 @@ auto Binder::BindColumnDefinition(duckdb_libpgquery::PGColumnDef *cdef) -> Colum return {colname, TypeId::INTEGER}; } - if (name == "double") { - return {colname, TypeId::DECIMAL}; - } - - if (name == "bool") { - return {colname, TypeId::BOOLEAN}; - } - if (name == "varchar") { auto exprs = BindExpressionList(cdef->typeName->typmods); if (exprs.size() != 1) { @@ -87,16 +78,6 @@ auto Binder::BindColumnDefinition(duckdb_libpgquery::PGColumnDef *cdef) -> Colum return {colname, TypeId::VARCHAR, varchar_max_length}; } - if (name == "vector") { - auto exprs = BindExpressionList(cdef->typeName->typmods); - if (exprs.size() != 1) { - throw bustub::Exception("should specify vector length"); - } - const auto &vector_length_val = dynamic_cast(*exprs[0]); - uint32_t vector_length = std::stoi(vector_length_val.ToString()); - return {colname, TypeId::VECTOR, vector_length}; - } - throw NotImplementedException(fmt::format("unsupported type: {}", name)); } @@ -104,7 +85,6 @@ auto Binder::BindCreate(duckdb_libpgquery::PGCreateStmt *pg_stmt) -> std::unique auto table = std::string(pg_stmt->relation->relname); auto columns = std::vector{}; size_t column_count = 0; - std::vector pk; for (auto c = pg_stmt->tableElts->head; c != nullptr; c = lnext(c)) { auto node = reinterpret_cast(c->data.ptr_value); @@ -113,44 +93,14 @@ auto Binder::BindCreate(duckdb_libpgquery::PGCreateStmt *pg_stmt) -> std::unique auto cdef = reinterpret_cast(c->data.ptr_value); auto centry = BindColumnDefinition(cdef); if (cdef->constraints != nullptr) { - for (auto constr = cdef->constraints->head; constr != nullptr; constr = constr->next) { - auto constraint = reinterpret_cast(constr->data.ptr_value); - switch (constraint->contype) { - case duckdb_libpgquery::PG_CONSTR_PRIMARY: { - if (!pk.empty()) { - throw NotImplementedException("cannot have two primary keys"); - } - pk = {centry.GetName()}; - break; - } - default: - throw NotImplementedException("unsupported constraint"); - } - } + throw NotImplementedException("constraints not supported"); } columns.push_back(std::move(centry)); column_count++; break; } case duckdb_libpgquery::T_PGConstraint: { - for (auto con = c; con != nullptr; con = con->next) { - auto constraint = reinterpret_cast(con->data.ptr_value); - switch (constraint->contype) { - case duckdb_libpgquery::PG_CONSTR_PRIMARY: { - std::vector columns; - for (auto kc = constraint->keys->head; kc != nullptr; kc = kc->next) { - columns.emplace_back(reinterpret_cast(kc->data.ptr_value)->val.str); - } - if (!pk.empty()) { - throw NotImplementedException("cannot have two primary keys"); - } - pk = std::move(columns); - break; - } - default: - throw NotImplementedException("unsupported constraint"); - } - } + throw NotImplementedException("constraints not supported"); break; } default: @@ -162,12 +112,11 @@ auto Binder::BindCreate(duckdb_libpgquery::PGCreateStmt *pg_stmt) -> std::unique throw bustub::Exception("should have at least 1 column"); } - return std::make_unique(std::move(table), std::move(columns), std::move(pk)); + return std::make_unique(std::move(table), std::move(columns)); } auto Binder::BindIndex(duckdb_libpgquery::PGIndexStmt *stmt) -> std::unique_ptr { std::vector> cols; - std::vector col_options; auto table = BindBaseTableRef(stmt->relation->relname, std::nullopt); for (auto cell = stmt->indexParams->head; cell != nullptr; cell = cell->next) { @@ -175,43 +124,12 @@ auto Binder::BindIndex(duckdb_libpgquery::PGIndexStmt *stmt) -> std::unique_ptr< if (index_element->name != nullptr) { auto column_ref = ResolveColumn(*table, std::vector{std::string(index_element->name)}); cols.emplace_back(std::make_unique(dynamic_cast(*column_ref))); - std::string opt; - if (index_element->opclass != nullptr) { - for (auto c = index_element->opclass->head; c != nullptr; c = lnext(c)) { - opt = reinterpret_cast(c->data.ptr_value)->val.str; - break; - } - } - col_options.emplace_back(opt); } else { throw NotImplementedException("create index by expr is not supported yet"); } } - std::string index_type; - - if (stmt->accessMethod != nullptr) { - index_type = stmt->accessMethod; - if (index_type == "art") { - index_type = ""; - } - } - - std::vector> options; - - if (stmt->options != nullptr) { - for (auto c = stmt->options->head; c != nullptr; c = lnext(c)) { - auto def_elem = reinterpret_cast(c->data.ptr_value); - int val; - if (def_elem->arg != nullptr) { - val = reinterpret_cast(def_elem->arg)->val.ival; - } - options.emplace_back(def_elem->defname, val); - } - } - - return std::make_unique(stmt->idxname, std::move(table), std::move(cols), std::move(index_type), - std::move(col_options), std::move(options)); + return std::make_unique(stmt->idxname, std::move(table), std::move(cols)); } } // namespace bustub diff --git a/src/binder/bind_select.cpp b/src/binder/bind_select.cpp index d9eac42..8f895fa 100644 --- a/src/binder/bind_select.cpp +++ b/src/binder/bind_select.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include "binder/binder.h" #include "binder/bound_expression.h" @@ -14,10 +13,8 @@ #include "binder/expressions/bound_binary_op.h" #include "binder/expressions/bound_column_ref.h" #include "binder/expressions/bound_constant.h" -#include "binder/expressions/bound_func_call.h" #include "binder/expressions/bound_star.h" #include "binder/expressions/bound_unary_op.h" -#include "binder/expressions/bound_window.h" #include "binder/statement/explain_statement.h" #include "binder/statement/select_statement.h" #include "binder/table_ref/bound_base_table_ref.h" @@ -405,8 +402,6 @@ auto Binder::BindSelectList(duckdb_libpgquery::PGList *list) -> std::vector> exprs; auto select_list = std::vector>{}; bool is_select_star = false; - bool has_agg = false; - bool has_window_agg = false; for (auto node = list->head; node != nullptr; node = lnext(node)) { auto target = reinterpret_cast(node->data.ptr_value); @@ -424,20 +419,10 @@ auto Binder::BindSelectList(duckdb_libpgquery::PGList *list) -> std::vectorHasAggregation()) { - has_agg = true; - } - if (expr->HasWindowFunction()) { - has_window_agg = true; - } select_list.push_back(std::move(expr)); } } - if (has_agg && has_window_agg) { - throw bustub::Exception("cannot have both normal agg and window agg in same query"); - } - return select_list; } @@ -468,10 +453,6 @@ auto Binder::BindConstant(duckdb_libpgquery::PGAConst *node) -> std::unique_ptr< BUSTUB_ENSURE(val.val.ival <= BUSTUB_INT32_MAX, "value out of range"); return std::make_unique(ValueFactory::GetIntegerValue(static_cast(val.val.ival))); } - case duckdb_libpgquery::T_PGFloat: { - double parsed_val = std::stod(std::string(val.val.str)); - return std::make_unique(ValueFactory::GetDecimalValue(parsed_val)); - } case duckdb_libpgquery::T_PGString: { return std::make_unique(ValueFactory::GetVarcharValue(val.val.str)); } @@ -526,34 +507,6 @@ auto Binder::BindStar(duckdb_libpgquery::PGAStar *node) -> std::unique_ptr(); } -auto Binder::BindWindowExpression(std::string func_name, std::vector> children, - duckdb_libpgquery::PGWindowDef *node) -> std::unique_ptr { - BUSTUB_ASSERT(node, "nullptr"); - auto partition_by = std::vector>{}; - if (node->partitionClause != nullptr) { - partition_by = BindExpressionList(node->partitionClause); - } - - auto sort = std::vector>{}; - if (node->orderClause != nullptr) { - sort = BindSort(node->orderClause); - } - - std::optional> start_offset; - if (node->startOffset != nullptr) { - start_offset = std::make_optional(BindExpression(node->startOffset)); - } - std::optional> end_offset; - if (node->endOffset != nullptr) { - end_offset = std::make_optional(BindExpression(node->endOffset)); - } - - auto window = std::make_unique(std::move(func_name), std::move(children), std::move(partition_by), - std::move(sort), std::move(start_offset), std::move(end_offset)); - window = BindWindowFrame(node, std::move(window)); - return window; -} - auto Binder::BindFuncCall(duckdb_libpgquery::PGFuncCall *root) -> std::unique_ptr { BUSTUB_ASSERT(root, "nullptr"); auto name = root->funcname; @@ -569,27 +522,16 @@ auto Binder::BindFuncCall(duckdb_libpgquery::PGFuncCall *root) -> std::unique_pt } if (function_name == "min" || function_name == "max" || function_name == "first" || function_name == "last" || - function_name == "sum" || function_name == "count" || function_name == "rank" || function_name == "row_number") { - // Rewrite row_number()/count(*) to count_star(). - if ((function_name == "count" && children.empty()) || function_name == "row_number") { + function_name == "sum" || function_name == "count") { + // Rewrite count(*) to count_star(). + if (function_name == "count" && children.empty()) { function_name = "count_star"; } - if (root->over != nullptr) { - if (root->agg_distinct) { - throw bustub::Exception("DISTINCT is not supported in window functions"); - } - - auto window = BindWindowExpression(function_name, std::move(children), root->over); - return window; - } - // Bind function as agg call. return std::make_unique(function_name, root->agg_distinct, std::move(children)); } - - // In other cases, bind as func call. - return std::make_unique(function_name, std::move(children)); + throw bustub::Exception(fmt::format("unsupported func call {}", function_name)); } /** diff --git a/src/binder/bind_variable.cpp b/src/binder/bind_variable.cpp index 4b1bd39..e2f5db0 100644 --- a/src/binder/bind_variable.cpp +++ b/src/binder/bind_variable.cpp @@ -5,7 +5,6 @@ #include "binder/expressions/bound_constant.h" #include "binder/statement/set_show_statement.h" #include "common/exception.h" -#include "nodes/parsenodes.hpp" namespace bustub { auto Binder::BindVariableSet(duckdb_libpgquery::PGVariableSetStmt *stmt) -> std::unique_ptr { @@ -24,17 +23,4 @@ auto Binder::BindVariableShow(duckdb_libpgquery::PGVariableShowStmt *stmt) -> st return std::make_unique(stmt->name); } -auto Binder::BindTransaction(duckdb_libpgquery::PGTransactionStmt *stmt) -> std::unique_ptr { - switch (stmt->kind) { - case duckdb_libpgquery::PG_TRANS_STMT_COMMIT: - return std::make_unique("commit"); - case duckdb_libpgquery::PG_TRANS_STMT_ROLLBACK: - return std::make_unique("abort"); - case duckdb_libpgquery::PG_TRANS_STMT_BEGIN: - return std::make_unique("begin"); - default: - throw bustub::NotImplementedException("unsupported txn statement kind"); - } -} - } // namespace bustub diff --git a/src/binder/fmt_impl.cpp b/src/binder/fmt_impl.cpp index 4379c2d..389e57b 100644 --- a/src/binder/fmt_impl.cpp +++ b/src/binder/fmt_impl.cpp @@ -1,10 +1,5 @@ -#include "binder/binder.h" -#include "binder/bound_expression.h" #include "binder/bound_order_by.h" #include "binder/expressions/bound_agg_call.h" -#include "binder/expressions/bound_constant.h" -#include "binder/expressions/bound_func_call.h" -#include "binder/expressions/bound_window.h" #include "binder/statement/select_statement.h" #include "binder/table_ref/bound_cte_ref.h" #include "binder/table_ref/bound_expression_list_ref.h" @@ -15,8 +10,6 @@ namespace bustub { -auto BoundFuncCall::ToString() const -> std::string { return fmt::format("{}({})", func_name_, args_); } - auto BoundAggCall::ToString() const -> std::string { if (is_distinct_) { return fmt::format("{}_distinct({})", func_name_, args_); @@ -41,34 +34,4 @@ auto BoundSubqueryRef::ToString() const -> std::string { StringUtil::IndentAllLines(subquery_->ToString(), 2, true), columns); } -auto BoundWindow::ToString() const -> std::string { - std::vector partition_by; - for (const auto &expr : partition_by_) { - partition_by.push_back(expr->ToString()); - } - - std::vector order_bys; - for (const auto &expr : order_bys_) { - order_bys.push_back(expr->ToString()); - } - - std::string start_offset = "None"; - if (start_offset_.has_value()) { - start_offset = (*start_offset_)->ToString(); - } - - std::string end_offset = "None"; - if (end_offset_.has_value()) { - end_offset = (*end_offset_)->ToString(); - } - - std::string start_mode = Binder::WindowBoundaryToString(start_); - std::string end_mode = Binder::WindowBoundaryToString(end_); - - // TODO(avery): add frame - return fmt::format("{}({}) Over {{ partition_by={}, order_by={} }}", func_name_, args_, - StringUtil::IndentAllLines(fmt::format("[{}]", fmt::join(partition_by, ", ")), 2, true), - StringUtil::IndentAllLines(fmt::format("[{}]", fmt::join(order_bys, ", ")), 2, true)); -} - } // namespace bustub diff --git a/src/binder/node_tag_to_string.cpp b/src/binder/node_tag_to_string.cpp index 53eb55b..65e36cc 100644 --- a/src/binder/node_tag_to_string.cpp +++ b/src/binder/node_tag_to_string.cpp @@ -21,7 +21,6 @@ //===----------------------------------------------------------------------===// #include "binder/binder.h" -#include "binder/expressions/bound_window.h" namespace bustub { @@ -840,28 +839,4 @@ auto Binder::NodeTagToString(duckdb_libpgquery::PGNodeTag type) -> std::string { } } // LCOV_EXCL_STOP -auto Binder::WindowBoundaryToString(WindowBoundary wb) -> std::string { - switch (wb) { - case WindowBoundary::INVALID: - return "INVALID"; - case WindowBoundary::UNBOUNDED_PRECEDING: - return "UNBOUNDED PRECEDING"; - case WindowBoundary::UNBOUNDED_FOLLOWING: - return "UNBOUNDED FOLLOWING"; - case WindowBoundary::CURRENT_ROW_RANGE: - return "CURRENT ROW RANGE"; - case WindowBoundary::CURRENT_ROW_ROWS: - return "CURRENT ROW ROWS"; - case WindowBoundary::EXPR_FOLLOWING_ROWS: - return "EXPR FOLLOWING ROWS"; - case WindowBoundary::EXPR_PRECEDING_ROWS: - return "EXPR PRECEDING ROWS"; - case WindowBoundary::EXPR_FOLLOWING_RANGE: - return "EXPR FOLLOWING RANGE"; - case WindowBoundary::EXPR_PRECEDING_RANGE: - return "EXPR PRECEDING RANGE"; - default: - return "(UNKNOWN)"; - } -} // LCOV_EXCL_STOP } // namespace bustub diff --git a/src/binder/statement/create_statement.cpp b/src/binder/statement/create_statement.cpp index 53d5488..7aa2c36 100644 --- a/src/binder/statement/create_statement.cpp +++ b/src/binder/statement/create_statement.cpp @@ -5,14 +5,11 @@ namespace bustub { -CreateStatement::CreateStatement(std::string table, std::vector columns, std::vector primary_key) - : BoundStatement(StatementType::CREATE_STATEMENT), - table_(std::move(table)), - columns_(std::move(columns)), - primary_key_(std::move(primary_key)) {} +CreateStatement::CreateStatement(std::string table, std::vector columns) + : BoundStatement(StatementType::CREATE_STATEMENT), table_(std::move(table)), columns_(std::move(columns)) {} auto CreateStatement::ToString() const -> std::string { - return fmt::format("BoundCreate {{\n table={}\n columns={}\n primary_key={}\n}}", table_, columns_, primary_key_); + return fmt::format("BoundCreate {{\n table={}\n columns={}\n}}", table_, columns_); } } // namespace bustub diff --git a/src/binder/statement/index_statement.cpp b/src/binder/statement/index_statement.cpp index 5595e0f..90471d4 100644 --- a/src/binder/statement/index_statement.cpp +++ b/src/binder/statement/index_statement.cpp @@ -7,19 +7,14 @@ namespace bustub { IndexStatement::IndexStatement(std::string index_name, std::unique_ptr table, - std::vector> cols, std::string index_type, - std::vector col_options, std::vector> options) + std::vector> cols) : BoundStatement(StatementType::INDEX_STATEMENT), index_name_(std::move(index_name)), table_(std::move(table)), - cols_(std::move(cols)), - index_type_(std::move(index_type)), - col_options_(std::move(col_options)), - options_(std::move(options)) {} + cols_(std::move(cols)) {} auto IndexStatement::ToString() const -> std::string { - return fmt::format("BoundIndex {{ index_name={}, table={}, cols={}, using={}, col_options=[{}], options=[{}] }}", - index_name_, *table_, cols_, index_type_, fmt::join(col_options_, ","), fmt::join(options_, ",")); + return fmt::format("BoundIndex {{ index_name={}, table={}, cols={} }}", index_name_, *table_, cols_); } } // namespace bustub diff --git a/src/binder/transformer.cpp b/src/binder/transformer.cpp index 0255b43..e15e65a 100644 --- a/src/binder/transformer.cpp +++ b/src/binder/transformer.cpp @@ -25,7 +25,6 @@ #include "binder/bound_expression.h" #include "binder/bound_order_by.h" #include "binder/bound_statement.h" -#include "binder/expressions/bound_window.h" #include "binder/statement/create_statement.h" #include "binder/statement/delete_statement.h" #include "binder/statement/explain_statement.h" @@ -72,46 +71,9 @@ auto Binder::BindStatement(duckdb_libpgquery::PGNode *stmt) -> std::unique_ptr(stmt)); case duckdb_libpgquery::T_PGVariableShowStmt: return BindVariableShow(reinterpret_cast(stmt)); - case duckdb_libpgquery::T_PGTransactionStmt: - return BindTransaction(reinterpret_cast(stmt)); default: throw NotImplementedException(NodeTagToString(stmt->type)); } } -auto Binder::BindWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, std::unique_ptr expr) - -> std::unique_ptr { - if ((window_spec->frameOptions & FRAMEOPTION_END_UNBOUNDED_PRECEDING) != 0 || - (window_spec->frameOptions & FRAMEOPTION_START_UNBOUNDED_FOLLOWING) != 0) { - throw Exception("Window frames starting with unbounded following or ending in unbounded preceding make no sense"); - } - - const bool range_mode = (window_spec->frameOptions & FRAMEOPTION_RANGE) != 0; - WindowBoundary start = WindowBoundary::INVALID; - WindowBoundary end = WindowBoundary::INVALID; - if ((window_spec->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) != 0) { - start = WindowBoundary::UNBOUNDED_PRECEDING; - } else if ((window_spec->frameOptions & FRAMEOPTION_START_VALUE_PRECEDING) != 0) { - start = range_mode ? WindowBoundary::EXPR_PRECEDING_RANGE : WindowBoundary::EXPR_PRECEDING_ROWS; - } else if ((window_spec->frameOptions & FRAMEOPTION_START_VALUE_FOLLOWING) != 0) { - start = range_mode ? WindowBoundary::EXPR_FOLLOWING_RANGE : WindowBoundary::EXPR_FOLLOWING_ROWS; - } else if ((window_spec->frameOptions & FRAMEOPTION_START_CURRENT_ROW) != 0) { - start = range_mode ? WindowBoundary::CURRENT_ROW_RANGE : WindowBoundary::CURRENT_ROW_ROWS; - } - - if ((window_spec->frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) != 0) { - end = WindowBoundary::UNBOUNDED_FOLLOWING; - } else if ((window_spec->frameOptions & FRAMEOPTION_END_VALUE_PRECEDING) != 0) { - end = range_mode ? WindowBoundary::EXPR_PRECEDING_RANGE : WindowBoundary::EXPR_PRECEDING_ROWS; - } else if ((window_spec->frameOptions & FRAMEOPTION_END_VALUE_FOLLOWING) != 0) { - end = range_mode ? WindowBoundary::EXPR_FOLLOWING_RANGE : WindowBoundary::EXPR_FOLLOWING_ROWS; - } else if ((window_spec->frameOptions & FRAMEOPTION_END_CURRENT_ROW) != 0) { - end = range_mode ? WindowBoundary::CURRENT_ROW_RANGE : WindowBoundary::CURRENT_ROW_ROWS; - } - - expr->SetStart(start); - expr->SetEnd(end); - return expr; -} - } // namespace bustub diff --git a/src/buffer/CMakeLists.txt b/src/buffer/CMakeLists.txt index 643fe43..7026b9d 100644 --- a/src/buffer/CMakeLists.txt +++ b/src/buffer/CMakeLists.txt @@ -1,7 +1,7 @@ add_library( bustub_buffer OBJECT - buffer_pool_manager.cpp + buffer_pool_manager_instance.cpp clock_replacer.cpp lru_replacer.cpp lru_k_replacer.cpp) diff --git a/src/buffer/buffer_pool_manager.cpp b/src/buffer/buffer_pool_manager.cpp deleted file mode 100644 index 6c6e76e..0000000 --- a/src/buffer/buffer_pool_manager.cpp +++ /dev/null @@ -1,67 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// buffer_pool_manager.cpp -// -// Identification: src/buffer/buffer_pool_manager.cpp -// -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "buffer/buffer_pool_manager.h" - -#include "common/exception.h" -#include "common/macros.h" -#include "storage/page/page_guard.h" - -namespace bustub { - -BufferPoolManager::BufferPoolManager(size_t pool_size, DiskManager *disk_manager, size_t replacer_k, - LogManager *log_manager) - : pool_size_(pool_size), disk_scheduler_(std::make_unique(disk_manager)), log_manager_(log_manager) { - // TODO(students): remove this line after you have implemented the buffer pool manager - throw NotImplementedException( - "BufferPoolManager is not implemented yet. If you have finished implementing BPM, please remove the throw " - "exception line in `buffer_pool_manager.cpp`."); - - // we allocate a consecutive memory space for the buffer pool - pages_ = new Page[pool_size_]; - replacer_ = std::make_unique(pool_size, replacer_k); - - // Initially, every page is in the free list. - for (size_t i = 0; i < pool_size_; ++i) { - free_list_.emplace_back(static_cast(i)); - } -} - -BufferPoolManager::~BufferPoolManager() { delete[] pages_; } - -auto BufferPoolManager::NewPage(page_id_t *page_id) -> Page * { return nullptr; } - -auto BufferPoolManager::FetchPage(page_id_t page_id, [[maybe_unused]] AccessType access_type) -> Page * { - return nullptr; -} - -auto BufferPoolManager::UnpinPage(page_id_t page_id, bool is_dirty, [[maybe_unused]] AccessType access_type) -> bool { - return false; -} - -auto BufferPoolManager::FlushPage(page_id_t page_id) -> bool { return false; } - -void BufferPoolManager::FlushAllPages() {} - -auto BufferPoolManager::DeletePage(page_id_t page_id) -> bool { return false; } - -auto BufferPoolManager::AllocatePage() -> page_id_t { return next_page_id_++; } - -auto BufferPoolManager::FetchPageBasic(page_id_t page_id) -> BasicPageGuard { return {this, nullptr}; } - -auto BufferPoolManager::FetchPageRead(page_id_t page_id) -> ReadPageGuard { return {this, nullptr}; } - -auto BufferPoolManager::FetchPageWrite(page_id_t page_id) -> WritePageGuard { return {this, nullptr}; } - -auto BufferPoolManager::NewPageGuarded(page_id_t *page_id) -> BasicPageGuard { return {this, nullptr}; } - -} // namespace bustub diff --git a/src/buffer/buffer_pool_manager_instance.cpp b/src/buffer/buffer_pool_manager_instance.cpp new file mode 100644 index 0000000..e83289b --- /dev/null +++ b/src/buffer/buffer_pool_manager_instance.cpp @@ -0,0 +1,216 @@ +//===----------------------------------------------------------------------===// +// +// BusTub +// +// buffer_pool_manager_instance.cpp +// +// Identification: src/buffer/buffer_pool_manager.cpp +// +// Copyright (c) 2015-2021, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "buffer/buffer_pool_manager_instance.h" + +#include "common/exception.h" +#include "common/macros.h" + +namespace bustub { + +BufferPoolManagerInstance::BufferPoolManagerInstance(size_t pool_size, DiskManager *disk_manager, size_t replacer_k, + LogManager *log_manager) + : pool_size_(pool_size), disk_manager_(disk_manager), log_manager_(log_manager) { + // we allocate a consecutive memory space for the buffer pool + pages_ = new Page[pool_size_]; + page_table_ = new ExtendibleHashTable(bucket_size_); + replacer_ = new LRUKReplacer(pool_size, replacer_k); + + // Initially, every page is in the free list. + for (size_t i = 0; i < pool_size_; ++i) { + free_list_.emplace_back(static_cast(i)); + } +} + +BufferPoolManagerInstance::~BufferPoolManagerInstance() { + delete[] pages_; + delete page_table_; + delete replacer_; +} + +auto BufferPoolManagerInstance::NewPgImp(page_id_t *page_id) -> Page * { + std::scoped_lock lock(latch_); + + bool is_free_page = false; + for (size_t i = 0; i < pool_size_; i++) { + if (pages_[i].GetPinCount() == 0) { + is_free_page = true; + break; + } + } + + if (!is_free_page) { + return nullptr; + } + + *page_id = AllocatePage(); + + frame_id_t frame_id; + + if (!free_list_.empty()) { + frame_id = free_list_.front(); + free_list_.pop_front(); + } else { + // assert(replacer_->Evict(&frame_id)); + replacer_->Evict(&frame_id); + page_id_t evicted_page_id = pages_[frame_id].GetPageId(); + + if (pages_[frame_id].IsDirty()) { + disk_manager_->WritePage(evicted_page_id, pages_[frame_id].GetData()); + pages_[frame_id].is_dirty_ = false; + } + + pages_[frame_id].ResetMemory(); + + page_table_->Remove(evicted_page_id); + } + + page_table_->Insert(*page_id, frame_id); + + pages_[frame_id].page_id_ = *page_id; + pages_[frame_id].pin_count_ = 1; + + replacer_->RecordAccess(frame_id); + replacer_->SetEvictable(frame_id, false); + + return &pages_[frame_id]; +} + +auto BufferPoolManagerInstance::FetchPgImp(page_id_t page_id) -> Page * { + std::scoped_lock lock(latch_); + + frame_id_t frame_id; + if (page_table_->Find(page_id, frame_id)) { + pages_[frame_id].pin_count_++; + replacer_->RecordAccess(frame_id); + replacer_->SetEvictable(frame_id, false); + return &pages_[frame_id]; + } + + bool is_free_page = false; + for (size_t i = 0; i < pool_size_; i++) { + if (pages_[i].GetPinCount() == 0) { + is_free_page = true; + break; + } + } + + if (!is_free_page) { + return nullptr; + } + + if (!free_list_.empty()) { + frame_id = free_list_.front(); + free_list_.pop_front(); + } else { + // assert(replacer_->Evict(&frame_id)); + replacer_->Evict(&frame_id); + page_id_t evicted_page_id = pages_[frame_id].GetPageId(); + + if (pages_[frame_id].IsDirty()) { + disk_manager_->WritePage(evicted_page_id, pages_[frame_id].GetData()); + pages_[frame_id].is_dirty_ = false; + } + + pages_[frame_id].ResetMemory(); + + page_table_->Remove(evicted_page_id); + } + + page_table_->Insert(page_id, frame_id); + + pages_[frame_id].page_id_ = page_id; + pages_[frame_id].pin_count_ = 1; + disk_manager_->ReadPage(page_id, pages_[frame_id].GetData()); + + replacer_->RecordAccess(frame_id); + replacer_->SetEvictable(frame_id, false); + + return &pages_[frame_id]; +} + +auto BufferPoolManagerInstance::UnpinPgImp(page_id_t page_id, bool is_dirty) -> bool { + std::scoped_lock lock(latch_); + + frame_id_t frame_id; + if (!page_table_->Find(page_id, frame_id)) { + return false; + } + + if (pages_[frame_id].GetPinCount() <= 0) { + return false; + } + + if (is_dirty) { + pages_[frame_id].is_dirty_ = is_dirty; + } + + pages_[frame_id].pin_count_--; + + if (pages_[frame_id].pin_count_ == 0) { + replacer_->SetEvictable(frame_id, true); + } + + return true; +} + +auto BufferPoolManagerInstance::FlushPgImp(page_id_t page_id) -> bool { + if (page_id == INVALID_PAGE_ID) { + return false; + } + + frame_id_t frame_id; + + if (!page_table_->Find(page_id, frame_id)) { + return false; + } + + disk_manager_->WritePage(page_id, pages_[frame_id].data_); + return true; +} + +void BufferPoolManagerInstance::FlushAllPgsImp() { + std::scoped_lock lock(latch_); + for (size_t frame_id = 0; frame_id < pool_size_; frame_id++) { + FlushPgImp(pages_[frame_id].GetPageId()); + } +} + +auto BufferPoolManagerInstance::DeletePgImp(page_id_t page_id) -> bool { + std::scoped_lock lock(latch_); + + frame_id_t frame_id; + if (!page_table_->Find(page_id, frame_id)) { + return true; + } + + if (pages_[frame_id].GetPinCount() > 0) { + return false; + } + + replacer_->Remove(frame_id); + + pages_[frame_id].ResetMemory(); + pages_[frame_id].page_id_ = INVALID_PAGE_ID; + pages_[frame_id].pin_count_ = 0; + pages_[frame_id].is_dirty_ = false; + + page_table_->Remove(page_id); + free_list_.push_back(frame_id); + DeallocatePage(page_id); + + return true; +} + +auto BufferPoolManagerInstance::AllocatePage() -> page_id_t { return next_page_id_++; } + +} // namespace bustub diff --git a/src/buffer/lru_k_replacer.cpp b/src/buffer/lru_k_replacer.cpp index 1313d94..9c80120 100644 --- a/src/buffer/lru_k_replacer.cpp +++ b/src/buffer/lru_k_replacer.cpp @@ -11,20 +11,127 @@ //===----------------------------------------------------------------------===// #include "buffer/lru_k_replacer.h" -#include "common/exception.h" namespace bustub { LRUKReplacer::LRUKReplacer(size_t num_frames, size_t k) : replacer_size_(num_frames), k_(k) {} -auto LRUKReplacer::Evict(frame_id_t *frame_id) -> bool { return false; } +auto LRUKReplacer::Evict(frame_id_t *frame_id) -> bool { + std::scoped_lock lock(latch_); -void LRUKReplacer::RecordAccess(frame_id_t frame_id, [[maybe_unused]] AccessType access_type) {} + if (curr_size_ == 0) { + return false; + } -void LRUKReplacer::SetEvictable(frame_id_t frame_id, bool set_evictable) {} + for (auto it = history_list_.rbegin(); it != history_list_.rend(); it++) { + auto frame = *it; + if (is_evictable_[frame]) { + access_count_[frame] = 0; + history_list_.erase(history_map_[frame]); + history_map_.erase(frame); + *frame_id = frame; + curr_size_--; + is_evictable_[frame] = false; + return true; + } + } -void LRUKReplacer::Remove(frame_id_t frame_id) {} + for (auto it = cache_list_.rbegin(); it != cache_list_.rend(); it++) { + auto frame = *it; + if (is_evictable_[frame]) { + access_count_[frame] = 0; + cache_list_.erase(cache_map_[frame]); + cache_map_.erase(frame); + *frame_id = frame; + curr_size_--; + is_evictable_[frame] = false; + return true; + } + } -auto LRUKReplacer::Size() -> size_t { return 0; } + return false; +} + +void LRUKReplacer::RecordAccess(frame_id_t frame_id) { + std::scoped_lock lock(latch_); + + if (frame_id > static_cast(replacer_size_)) { + throw std::exception(); + } + + access_count_[frame_id]++; + + if (access_count_[frame_id] == k_) { + auto it = history_map_[frame_id]; + history_list_.erase(it); + history_map_.erase(frame_id); + + cache_list_.push_front(frame_id); + cache_map_[frame_id] = cache_list_.begin(); + } else if (access_count_[frame_id] > k_) { + if (cache_map_.count(frame_id) != 0U) { + auto it = cache_map_[frame_id]; + cache_list_.erase(it); + } + cache_list_.push_front(frame_id); + cache_map_[frame_id] = cache_list_.begin(); + } else { + if (history_map_.count(frame_id) == 0U) { + history_list_.push_front(frame_id); + history_map_[frame_id] = history_list_.begin(); + } + } +} + +void LRUKReplacer::SetEvictable(frame_id_t frame_id, bool set_evictable) { + std::scoped_lock lock(latch_); + if (frame_id > static_cast(replacer_size_)) { + throw std::exception(); + } + + if (access_count_[frame_id] == 0) { + return; + } + + if (!is_evictable_[frame_id] && set_evictable) { + curr_size_++; + } + if (is_evictable_[frame_id] && !set_evictable) { + curr_size_--; + } + is_evictable_[frame_id] = set_evictable; +} + +void LRUKReplacer::Remove(frame_id_t frame_id) { + std::scoped_lock lock(latch_); + + if (frame_id > static_cast(replacer_size_)) { + throw std::exception(); + } + + auto cnt = access_count_[frame_id]; + if (cnt == 0) { + return; + } + if (!is_evictable_[frame_id]) { + throw std::exception(); + } + if (cnt < k_) { + history_list_.erase(history_map_[frame_id]); + history_map_.erase(frame_id); + + } else { + cache_list_.erase(cache_map_[frame_id]); + cache_map_.erase(frame_id); + } + curr_size_--; + access_count_[frame_id] = 0; + is_evictable_[frame_id] = false; +} + +auto LRUKReplacer::Size() -> size_t { + std::scoped_lock lock(latch_); + return curr_size_; +} } // namespace bustub diff --git a/src/catalog/column.cpp b/src/catalog/column.cpp index 4877481..db9aa78 100644 --- a/src/catalog/column.cpp +++ b/src/catalog/column.cpp @@ -14,7 +14,6 @@ #include #include -#include "type/type_id.h" namespace bustub { @@ -22,12 +21,6 @@ auto Column::ToString(bool simplified) const -> std::string { if (simplified) { std::ostringstream os; os << column_name_ << ":" << Type::TypeIdToString(column_type_); - if (column_type_ == VARCHAR) { - os << "(" << length_ << ")"; - } - if (column_type_ == VECTOR) { - os << "(" << length_ / sizeof(double) << ")"; - } return (os.str()); } @@ -35,7 +28,12 @@ auto Column::ToString(bool simplified) const -> std::string { os << "Column[" << column_name_ << ", " << Type::TypeIdToString(column_type_) << ", " << "Offset:" << column_offset_ << ", "; - os << "Length:" << length_; + + if (IsInlined()) { + os << "FixedLength:" << fixed_length_; + } else { + os << "VarLength:" << variable_length_; + } os << "]"; return (os.str()); } diff --git a/src/catalog/schema.cpp b/src/catalog/schema.cpp index dbc8186..5a4dd71 100644 --- a/src/catalog/schema.cpp +++ b/src/catalog/schema.cpp @@ -29,11 +29,7 @@ Schema::Schema(const std::vector &columns) { } // set column offset column.column_offset_ = curr_offset; - if (column.IsInlined()) { - curr_offset += column.GetStorageSize(); - } else { - curr_offset += sizeof(uint32_t); - } + curr_offset += column.GetFixedLength(); // add column this->columns_.push_back(column); diff --git a/src/catalog/table_generator.cpp b/src/catalog/table_generator.cpp index 7e62238..cb78f53 100644 --- a/src/catalog/table_generator.cpp +++ b/src/catalog/table_generator.cpp @@ -3,7 +3,6 @@ #include #include #include -#include "common/config.h" namespace bustub { @@ -77,8 +76,9 @@ void TableGenerator::FillTable(TableInfo *info, TableInsertMeta *table_meta) { for (const auto &col : values) { entry.emplace_back(col[i]); } - auto rid = info->table_->InsertTuple(TupleMeta{0, false}, Tuple(entry, &info->schema_)); - BUSTUB_ENSURE(rid != std::nullopt, "Sequential insertion cannot fail"); + RID rid; + bool inserted = info->table_->InsertTuple(Tuple(entry, &info->schema_), &rid, exec_ctx_->GetTransaction()); + BUSTUB_ENSURE(inserted, "Sequential insertion cannot fail"); num_inserted++; } } diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index d5ff2e6..39a3e26 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -2,10 +2,9 @@ add_library( bustub_common OBJECT bustub_instance.cpp - bustub_ddl.cpp config.cpp util/string_util.cpp) set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ - PARENT_SCOPE) + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/common/bustub_ddl.cpp b/src/common/bustub_ddl.cpp deleted file mode 100644 index f3a8ade..0000000 --- a/src/common/bustub_ddl.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// DDL (Data Definition Language) statement handling in BusTub, including create table, create index, and set/show -// variable. - -#include -#include -#include -#include - -#include "binder/binder.h" -#include "binder/bound_expression.h" -#include "binder/bound_statement.h" -#include "binder/statement/create_statement.h" -#include "binder/statement/explain_statement.h" -#include "binder/statement/index_statement.h" -#include "binder/statement/select_statement.h" -#include "binder/statement/set_show_statement.h" -#include "buffer/buffer_pool_manager.h" -#include "catalog/catalog.h" -#include "catalog/schema.h" -#include "catalog/table_generator.h" -#include "common/bustub_instance.h" -#include "common/enums/statement_type.h" -#include "common/exception.h" -#include "common/macros.h" -#include "common/util/string_util.h" -#include "concurrency/lock_manager.h" -#include "concurrency/transaction.h" -#include "execution/execution_engine.h" -#include "execution/executor_context.h" -#include "execution/executors/mock_scan_executor.h" -#include "execution/expressions/abstract_expression.h" -#include "execution/plans/abstract_plan.h" -#include "fmt/core.h" -#include "fmt/format.h" -#include "optimizer/optimizer.h" -#include "planner/planner.h" -#include "recovery/checkpoint_manager.h" -#include "recovery/log_manager.h" -#include "storage/disk/disk_manager.h" -#include "storage/disk/disk_manager_memory.h" -#include "type/value_factory.h" - -namespace bustub { - -void BustubInstance::HandleCreateStatement(Transaction *txn, const CreateStatement &stmt, ResultWriter &writer) { - std::unique_lock l(catalog_lock_); - auto info = catalog_->CreateTable(txn, stmt.table_, Schema(stmt.columns_)); - IndexInfo *index = nullptr; - if (!stmt.primary_key_.empty()) { - std::vector col_ids; - for (const auto &col : stmt.primary_key_) { - auto idx = info->schema_.GetColIdx(col); - col_ids.push_back(idx); - if (info->schema_.GetColumn(idx).GetType() != TypeId::INTEGER) { - throw NotImplementedException("only support creating index on integer column"); - } - } - auto key_schema = Schema::CopySchema(&info->schema_, col_ids); - - // TODO(spring2023): If you want to support composite index key for leaderboard optimization, remove this assertion - // and create index with different key type that can hold multiple keys based on number of index columns. - // - // You can also create clustered index that directly stores value inside the index by modifying the value type. - - if (col_ids.empty() || col_ids.size() > 2) { - throw NotImplementedException("only support creating index with exactly one or two columns"); - } - - index = catalog_->CreateIndex( - txn, stmt.table_ + "_pk", stmt.table_, info->schema_, key_schema, col_ids, TWO_INTEGER_SIZE, - IntegerHashFunctionType{}, true); - } - l.unlock(); - - if (info == nullptr) { - throw bustub::Exception("Failed to create table"); - } - - if (index != nullptr) { - WriteOneCell(fmt::format("Table created with id = {}, Primary key index created with id = {}", info->oid_, - index->index_oid_), - writer); - } else { - WriteOneCell(fmt::format("Table created with id = {}", info->oid_), writer); - } -} - -void BustubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement &stmt, ResultWriter &writer) { - std::vector col_ids; - for (const auto &col : stmt.cols_) { - auto idx = stmt.table_->schema_.GetColIdx(col->col_name_.back()); - col_ids.push_back(idx); - if (stmt.table_->schema_.GetColumn(idx).GetType() != TypeId::INTEGER) { - throw NotImplementedException("only support creating index on integer column"); - } - } - auto key_schema = Schema::CopySchema(&stmt.table_->schema_, col_ids); - - // TODO(spring2023): If you want to support composite index key for leaderboard optimization, remove this assertion - // and create index with different key type that can hold multiple keys based on number of index columns. - // - // You can also create clustered index that directly stores value inside the index by modifying the value type. - - if (col_ids.empty() || col_ids.size() > 2) { - throw NotImplementedException("only support creating index with exactly one or two columns"); - } - - std::unique_lock l(catalog_lock_); - IndexInfo *info = nullptr; - - if (stmt.index_type_.empty()) { - info = catalog_->CreateIndex( - txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE, - IntegerHashFunctionType{}, false); // create default index - } else if (stmt.index_type_ == "hash") { - info = catalog_->CreateIndex( - txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE, - IntegerHashFunctionType{}, false, IndexType::HashTableIndex); - } else if (stmt.index_type_ == "bplustree") { - info = catalog_->CreateIndex( - txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE, - IntegerHashFunctionType{}, false, IndexType::BPlusTreeIndex); - } else if (stmt.index_type_ == "stl_ordered") { - info = catalog_->CreateIndex( - txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE, - IntegerHashFunctionType{}, false, IndexType::STLOrderedIndex); - } else if (stmt.index_type_ == "stl_unordered") { - info = catalog_->CreateIndex( - txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE, - IntegerHashFunctionType{}, false, IndexType::STLUnorderedIndex); - } else { - UNIMPLEMENTED("unsupported index type " + stmt.index_type_); - } - l.unlock(); - - if (info == nullptr) { - throw bustub::Exception("Failed to create index"); - } - WriteOneCell(fmt::format("Index created with id = {} with type = {}", info->index_oid_, info->index_type_), writer); -} - -void BustubInstance::HandleExplainStatement(Transaction *txn, const ExplainStatement &stmt, ResultWriter &writer) { - std::string output; - - // Print binder result. - if ((stmt.options_ & ExplainOptions::BINDER) != 0) { - output += "=== BINDER ==="; - output += "\n"; - output += stmt.statement_->ToString(); - output += "\n"; - } - - std::shared_lock l(catalog_lock_); - - bustub::Planner planner(*catalog_); - planner.PlanQuery(*stmt.statement_); - - bool show_schema = (stmt.options_ & ExplainOptions::SCHEMA) != 0; - - // Print planner result. - if ((stmt.options_ & ExplainOptions::PLANNER) != 0) { - output += "=== PLANNER ==="; - output += "\n"; - output += planner.plan_->ToString(show_schema); - output += "\n"; - } - - // Print optimizer result. - bustub::Optimizer optimizer(*catalog_, IsForceStarterRule()); - auto optimized_plan = optimizer.Optimize(planner.plan_); - - l.unlock(); - - if ((stmt.options_ & ExplainOptions::OPTIMIZER) != 0) { - output += "=== OPTIMIZER ==="; - output += "\n"; - output += optimized_plan->ToString(show_schema); - output += "\n"; - } - - WriteOneCell(output, writer); -} - -void BustubInstance::HandleVariableShowStatement(Transaction *txn, const VariableShowStatement &stmt, - ResultWriter &writer) { - auto content = GetSessionVariable(stmt.variable_); - WriteOneCell(fmt::format("{}={}", stmt.variable_, content), writer); -} - -void BustubInstance::HandleVariableSetStatement(Transaction *txn, const VariableSetStatement &stmt, - ResultWriter &writer) { - session_variables_[stmt.variable_] = stmt.value_; -} - -void BustubInstance::HandleTxnStatement(Transaction *txn, const TransactionStatement &stmt, ResultWriter &writer) { - if (managed_txn_mode_ && current_txn_ != nullptr) { - BUSTUB_ASSERT(current_txn_ == txn, "txn mismatched??"); - } - auto dump_current_txn = [&](const std::string &prefix) { - writer.OneCell(fmt::format("{}txn_id={} txn_real_id={} read_ts={} commit_ts={} status={} iso_lvl={}", prefix, - current_txn_->GetTransactionIdHumanReadable(), current_txn_->GetTransactionId(), - current_txn_->GetReadTs(), current_txn_->GetCommitTs(), - current_txn_->GetTransactionState(), current_txn_->GetIsolationLevel())); - }; - if (txn == nullptr) { - writer.OneCell("commit / rollback can only be used with txn"); - return; - } - if (stmt.type_ == "begin") { - if (!managed_txn_mode_) { - writer.OneCell("begin statement is only supported in managed txn mode, please use bustub-shell"); - return; - } - bool txn_activated = current_txn_ != nullptr; - auto iso_lvl = StringUtil::Lower(GetSessionVariable("global_isolation_level")); - if (iso_lvl == "serializable") { - current_txn_ = txn_manager_->Begin(IsolationLevel::SERIALIZABLE); - } else if (iso_lvl == "snapshot_isolation" || iso_lvl.empty()) { - current_txn_ = txn_manager_->Begin(IsolationLevel::SNAPSHOT_ISOLATION); - } else { - throw Exception("unsupported global_isolation_level"); - } - dump_current_txn(txn_activated ? "pause current txn and begin new txn " : "begin txn "); - return; - } - if (stmt.type_ == "commit") { - auto res = txn_manager_->Commit(txn); - if (res) { - writer.OneCell(fmt::format("txn committed, txn_id={}, status={}, read_ts={}, commit_ts={}", - txn->GetTransactionIdHumanReadable(), txn->GetTransactionState(), txn->GetReadTs(), - txn->GetCommitTs())); - } else { - writer.OneCell(fmt::format("txn failed to commit, txn_id={}, status={}, read_ts={}, commit_ts={}", - txn->GetTransactionIdHumanReadable(), txn->GetTransactionState(), txn->GetReadTs(), - txn->GetCommitTs())); - } - current_txn_ = nullptr; - return; - } - if (stmt.type_ == "abort") { - txn_manager_->Abort(txn); - writer.OneCell(fmt::format("txn aborted, txn_id={}, status={}, read_ts={}", txn->GetTransactionIdHumanReadable(), - txn->GetTransactionState(), txn->GetReadTs())); - current_txn_ = nullptr; - return; - } -} -} // namespace bustub diff --git a/src/common/bustub_instance.cpp b/src/common/bustub_instance.cpp index 945027f..713d9af 100644 --- a/src/common/bustub_instance.cpp +++ b/src/common/bustub_instance.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -12,17 +11,15 @@ #include "binder/statement/index_statement.h" #include "binder/statement/select_statement.h" #include "binder/statement/set_show_statement.h" -#include "buffer/buffer_pool_manager.h" +#include "buffer/buffer_pool_manager_instance.h" #include "catalog/schema.h" #include "catalog/table_generator.h" #include "common/bustub_instance.h" -#include "common/config.h" +#include "common/enums/statement_type.h" #include "common/exception.h" #include "common/util/string_util.h" #include "concurrency/lock_manager.h" #include "concurrency/transaction.h" -#include "execution/check_options.h" -#include "execution/execution_common.h" #include "execution/execution_engine.h" #include "execution/executor_context.h" #include "execution/executors/mock_scan_executor.h" @@ -40,129 +37,72 @@ namespace bustub { -auto BustubInstance::MakeExecutorContext(Transaction *txn, bool is_modify) -> std::unique_ptr { - return std::make_unique(txn, catalog_.get(), buffer_pool_manager_.get(), txn_manager_.get(), - lock_manager_.get(), is_modify); +auto BustubInstance::MakeExecutorContext(Transaction *txn) -> std::unique_ptr { + return std::make_unique(txn, catalog_, buffer_pool_manager_, txn_manager_, lock_manager_); } -BustubInstance::BustubInstance(const std::string &db_file_name, size_t bpm_size) { +BustubInstance::BustubInstance(const std::string &db_file_name) { enable_logging = false; // Storage related. - disk_manager_ = std::make_unique(db_file_name); + disk_manager_ = new DiskManager(db_file_name); -#ifndef DISABLE_CHECKPOINT_MANAGER // Log related. - log_manager_ = std::make_unique(disk_manager_.get()); -#endif + log_manager_ = new LogManager(disk_manager_); // We need more frames for GenerateTestTable to work. Therefore, we use 128 instead of the default // buffer pool size specified in `config.h`. try { - buffer_pool_manager_ = - std::make_unique(bpm_size, disk_manager_.get(), LRUK_REPLACER_K, log_manager_.get()); + buffer_pool_manager_ = new BufferPoolManagerInstance(128, disk_manager_, LRUK_REPLACER_K, log_manager_); } catch (NotImplementedException &e) { std::cerr << "BufferPoolManager is not implemented, only mock tables are supported." << std::endl; buffer_pool_manager_ = nullptr; } -// Transaction (txn) related. -#ifndef DISABLE_LOCK_MANAGER - lock_manager_ = std::make_unique(); - txn_manager_ = std::make_unique(lock_manager_.get()); -#else - txn_manager_ = std::make_unique(); -#endif + // Transaction (txn) related. + lock_manager_ = new LockManager(); + txn_manager_ = new TransactionManager(lock_manager_, log_manager_); -#ifndef DISABLE_LOCK_MANAGER - lock_manager_->txn_manager_ = txn_manager_.get(); - -#ifndef __EMSCRIPTEN__ - lock_manager_->StartDeadlockDetection(); -#endif - -#endif - -#ifndef DISABLE_CHECKPOINT_MANAGER // Checkpoint related. - checkpoint_manager_ = - std::make_unique(txn_manager_.get(), log_manager_.get(), buffer_pool_manager_.get()); -#endif - - // Catalog related. - catalog_ = std::make_unique(buffer_pool_manager_.get(), lock_manager_.get(), log_manager_.get()); + checkpoint_manager_ = new CheckpointManager(txn_manager_, log_manager_, buffer_pool_manager_); - txn_manager_->catalog_ = catalog_.get(); + // Catalog. + catalog_ = new Catalog(buffer_pool_manager_, lock_manager_, log_manager_); - // Execution engine related. - execution_engine_ = std::make_unique(buffer_pool_manager_.get(), txn_manager_.get(), catalog_.get()); + // Execution engine. + execution_engine_ = new ExecutionEngine(buffer_pool_manager_, txn_manager_, catalog_); } -BustubInstance::BustubInstance(size_t bpm_size) { +BustubInstance::BustubInstance() { enable_logging = false; // Storage related. - disk_manager_ = std::make_unique(); + disk_manager_ = new DiskManagerUnlimitedMemory(); -#ifndef DISABLE_CHECKPOINT_MANAGER // Log related. - log_manager_ = std::make_unique(disk_manager_.get()); -#endif + log_manager_ = new LogManager(disk_manager_); // We need more frames for GenerateTestTable to work. Therefore, we use 128 instead of the default // buffer pool size specified in `config.h`. try { - buffer_pool_manager_ = - std::make_unique(bpm_size, disk_manager_.get(), LRUK_REPLACER_K, log_manager_.get()); + buffer_pool_manager_ = new BufferPoolManagerInstance(128, disk_manager_, LRUK_REPLACER_K, log_manager_); } catch (NotImplementedException &e) { std::cerr << "BufferPoolManager is not implemented, only mock tables are supported." << std::endl; buffer_pool_manager_ = nullptr; } -#ifndef DISABLE_LOCK_MANAGER - lock_manager_ = std::make_unique(); - txn_manager_ = std::make_unique(lock_manager_.get()); -#else - txn_manager_ = std::make_unique(); -#endif + // Transaction (txn) related. + lock_manager_ = new LockManager(); + txn_manager_ = new TransactionManager(lock_manager_, log_manager_); -#ifndef DISABLE_LOCK_MANAGER - lock_manager_->txn_manager_ = txn_manager_.get(); - -#ifndef __EMSCRIPTEN__ - lock_manager_->StartDeadlockDetection(); -#endif -#endif - -#ifndef DISABLE_CHECKPOINT_MANAGER // Checkpoint related. - checkpoint_manager_ = - std::make_unique(txn_manager_.get(), log_manager_.get(), buffer_pool_manager_.get()); -#endif - - // Catalog related. - catalog_ = std::make_unique(buffer_pool_manager_.get(), lock_manager_.get(), log_manager_.get()); + checkpoint_manager_ = new CheckpointManager(txn_manager_, log_manager_, buffer_pool_manager_); - txn_manager_->catalog_ = catalog_.get(); + // Catalog. + catalog_ = new Catalog(buffer_pool_manager_, lock_manager_, log_manager_); - // Execution engine related. - execution_engine_ = std::make_unique(buffer_pool_manager_.get(), txn_manager_.get(), catalog_.get()); -} - -void BustubInstance::CmdDbgMvcc(const std::vector ¶ms, ResultWriter &writer) { - if (params.size() != 2) { - writer.OneCell("please provide a table name"); - return; - } - const auto &table = params[1]; - writer.OneCell("please view the result in the BusTub console (or Chrome DevTools console), table=" + table); - std::shared_lock lck(catalog_lock_); - auto table_info = catalog_->GetTable(table); - if (table_info == nullptr) { - writer.OneCell("table " + table + " not found"); - return; - } - TxnMgrDbg("\\dbgmvcc", txn_manager_.get(), table_info, table_info->table_.get()); + // Execution engine. + execution_engine_ = new ExecutionEngine(buffer_pool_manager_, txn_manager_, catalog_); } void BustubInstance::CmdDisplayTables(ResultWriter &writer) { @@ -206,19 +146,20 @@ void BustubInstance::CmdDisplayIndices(ResultWriter &writer) { writer.EndTable(); } -void BustubInstance::WriteOneCell(const std::string &cell, ResultWriter &writer) { writer.OneCell(cell); } +void BustubInstance::WriteOneCell(const std::string &cell, ResultWriter &writer) { + writer.BeginTable(true); + writer.BeginRow(); + writer.WriteCell(cell); + writer.EndRow(); + writer.EndTable(); +} void BustubInstance::CmdDisplayHelp(ResultWriter &writer) { std::string help = R"(Welcome to the BusTub shell! \dt: show all tables \di: show all indices -\dbgmvcc : show version chain of a table \help: show this message again -\txn: show current txn information -\txn : switch to txn -\txn gc: run garbage collection -\txn -1: exit txn mode BusTub shell currently only supports a small set of Postgres queries. We'll set up a doc describing the current status later. It will silently ignore some parts @@ -231,28 +172,15 @@ see the execution plan of your query. WriteOneCell(help, writer); } -auto BustubInstance::ExecuteSql(const std::string &sql, ResultWriter &writer, - std::shared_ptr check_options) -> bool { - bool is_local_txn = current_txn_ != nullptr; - auto *txn = is_local_txn ? current_txn_ : txn_manager_->Begin(); - try { - auto result = ExecuteSqlTxn(sql, writer, txn, std::move(check_options)); - if (!is_local_txn) { - auto res = txn_manager_->Commit(txn); - if (!res) { - throw Exception("failed to commit txn"); - } - } - return result; - } catch (bustub::Exception &ex) { - txn_manager_->Abort(txn); - current_txn_ = nullptr; - throw ex; - } +auto BustubInstance::ExecuteSql(const std::string &sql, ResultWriter &writer) -> bool { + auto txn = txn_manager_->Begin(); + auto result = ExecuteSqlTxn(sql, writer, txn); + txn_manager_->Commit(txn); + delete txn; + return result; } -auto BustubInstance::ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, Transaction *txn, - std::shared_ptr check_options) -> bool { +auto BustubInstance::ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, Transaction *txn) -> bool { if (!sql.empty() && sql[0] == '\\') { // Internal meta-commands, like in `psql`. if (sql == "\\dt") { @@ -267,16 +195,6 @@ auto BustubInstance::ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, CmdDisplayHelp(writer); return true; } - if (StringUtil::StartsWith(sql, "\\dbgmvcc")) { - auto split = StringUtil::Split(sql, " "); - CmdDbgMvcc(split, writer); - return true; - } - if (StringUtil::StartsWith(sql, "\\txn")) { - auto split = StringUtil::Split(sql, " "); - CmdTxn(split, writer); - return true; - } throw Exception(fmt::format("unsupported internal command: {}", sql)); } @@ -289,43 +207,103 @@ auto BustubInstance::ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, for (auto *stmt : binder.statement_nodes_) { auto statement = binder.BindStatement(stmt); - - bool is_delete = false; - switch (statement->type_) { case StatementType::CREATE_STATEMENT: { const auto &create_stmt = dynamic_cast(*statement); - HandleCreateStatement(txn, create_stmt, writer); + + std::unique_lock l(catalog_lock_); + auto info = catalog_->CreateTable(txn, create_stmt.table_, Schema(create_stmt.columns_)); + l.unlock(); + + if (info == nullptr) { + throw bustub::Exception("Failed to create table"); + } + WriteOneCell(fmt::format("Table created with id = {}", info->oid_), writer); continue; } case StatementType::INDEX_STATEMENT: { const auto &index_stmt = dynamic_cast(*statement); - HandleIndexStatement(txn, index_stmt, writer); + + std::vector col_ids; + for (const auto &col : index_stmt.cols_) { + auto idx = index_stmt.table_->schema_.GetColIdx(col->col_name_.back()); + col_ids.push_back(idx); + if (index_stmt.table_->schema_.GetColumn(idx).GetType() != TypeId::INTEGER) { + throw NotImplementedException("only support creating index on integer column"); + } + } + if (col_ids.size() != 1) { + throw NotImplementedException("only support creating index with exactly one column"); + } + auto key_schema = Schema::CopySchema(&index_stmt.table_->schema_, col_ids); + + std::unique_lock l(catalog_lock_); + auto info = catalog_->CreateIndex( + txn, index_stmt.index_name_, index_stmt.table_->table_, index_stmt.table_->schema_, key_schema, col_ids, + INTEGER_SIZE, IntegerHashFunctionType{}); + l.unlock(); + + if (info == nullptr) { + throw bustub::Exception("Failed to create index"); + } + WriteOneCell(fmt::format("Index created with id = {}", info->index_oid_), writer); continue; } case StatementType::VARIABLE_SHOW_STATEMENT: { const auto &show_stmt = dynamic_cast(*statement); - HandleVariableShowStatement(txn, show_stmt, writer); + auto content = GetSessionVariable(show_stmt.variable_); + WriteOneCell(fmt::format("{}={}", show_stmt.variable_, content), writer); continue; } case StatementType::VARIABLE_SET_STATEMENT: { const auto &set_stmt = dynamic_cast(*statement); - HandleVariableSetStatement(txn, set_stmt, writer); + session_variables_[set_stmt.variable_] = set_stmt.value_; continue; } case StatementType::EXPLAIN_STATEMENT: { const auto &explain_stmt = dynamic_cast(*statement); - HandleExplainStatement(txn, explain_stmt, writer); - continue; - } - case StatementType::TRANSACTION_STATEMENT: { - const auto &txn_stmt = dynamic_cast(*statement); - HandleTxnStatement(txn, txn_stmt, writer); + std::string output; + + // Print binder result. + if ((explain_stmt.options_ & ExplainOptions::BINDER) != 0) { + output += "=== BINDER ==="; + output += "\n"; + output += explain_stmt.statement_->ToString(); + output += "\n"; + } + + std::shared_lock l(catalog_lock_); + + bustub::Planner planner(*catalog_); + planner.PlanQuery(*explain_stmt.statement_); + + bool show_schema = (explain_stmt.options_ & ExplainOptions::SCHEMA) != 0; + + // Print planner result. + if ((explain_stmt.options_ & ExplainOptions::PLANNER) != 0) { + output += "=== PLANNER ==="; + output += "\n"; + output += planner.plan_->ToString(show_schema); + output += "\n"; + } + + // Print optimizer result. + bustub::Optimizer optimizer(*catalog_, IsForceStarterRule()); + auto optimized_plan = optimizer.Optimize(planner.plan_); + + l.unlock(); + + if ((explain_stmt.options_ & ExplainOptions::OPTIMIZER) != 0) { + output += "=== OPTIMIZER ==="; + output += "\n"; + output += optimized_plan->ToString(show_schema); + output += "\n"; + } + + WriteOneCell(output, writer); + continue; } - case StatementType::DELETE_STATEMENT: - case StatementType::UPDATE_STATEMENT: - is_delete = true; default: break; } @@ -343,10 +321,7 @@ auto BustubInstance::ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, l.unlock(); // Execute the query. - auto exec_ctx = MakeExecutorContext(txn, is_delete); - if (check_options != nullptr) { - exec_ctx->InitCheckOptions(std::move(check_options)); - } + auto exec_ctx = MakeExecutorContext(txn); std::vector result_set{}; is_successful &= execution_engine_->Execute(optimized_plan, &result_set, txn, exec_ctx.get()); @@ -381,8 +356,8 @@ auto BustubInstance::ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, * create / drop table and insert for now. Should remove it in the future. */ void BustubInstance::GenerateTestTable() { - auto *txn = txn_manager_->Begin(); - auto exec_ctx = MakeExecutorContext(txn, false); + auto txn = txn_manager_->Begin(); + auto exec_ctx = MakeExecutorContext(txn); TableGenerator gen{exec_ctx.get()}; std::shared_lock l(catalog_lock_); @@ -390,6 +365,7 @@ void BustubInstance::GenerateTestTable() { l.unlock(); txn_manager_->Commit(txn); + delete txn; } /** @@ -408,65 +384,21 @@ void BustubInstance::GenerateMockTable() { l.unlock(); txn_manager_->Commit(txn); + delete txn; } BustubInstance::~BustubInstance() { if (enable_logging) { log_manager_->StopFlushThread(); } -} - -/** Enable managed txn mode on this BusTub instance, allowing statements like `BEGIN`. */ -void BustubInstance::EnableManagedTxn() { managed_txn_mode_ = true; } - -/** Get the current transaction. */ -auto BustubInstance::CurrentManagedTxn() -> Transaction * { return current_txn_; } - -void BustubInstance::CmdTxn(const std::vector ¶ms, ResultWriter &writer) { - if (!managed_txn_mode_) { - writer.OneCell("only supported in managed mode, please use bustub-shell"); - return; - } - auto dump_current_txn = [&](const std::string &prefix) { - writer.OneCell(fmt::format("{}txn_id={} txn_real_id={} read_ts={} commit_ts={} status={} iso_lvl={}", prefix, - current_txn_->GetTransactionIdHumanReadable(), current_txn_->GetTransactionId(), - current_txn_->GetReadTs(), current_txn_->GetCommitTs(), - current_txn_->GetTransactionState(), current_txn_->GetIsolationLevel())); - }; - if (params.size() == 1) { - if (current_txn_ != nullptr) { - dump_current_txn(""); - } else { - writer.OneCell("no active txn, each statement starts a new txn."); - } - return; - } - if (params.size() == 2) { - const std::string ¶m1 = params[1]; - if (param1 == "gc") { - txn_manager_->GarbageCollection(); - writer.OneCell("GC complete"); - return; - } - auto txn_id = std::stoi(param1); - if (txn_id == -1) { - dump_current_txn("pause current txn "); - current_txn_ = nullptr; - return; - } - auto iter = txn_manager_->txn_map_.find(txn_id); - if (iter == txn_manager_->txn_map_.end()) { - iter = txn_manager_->txn_map_.find(txn_id + TXN_START_ID); - if (iter == txn_manager_->txn_map_.end()) { - writer.OneCell("cannot find txn."); - return; - } - } - current_txn_ = iter->second.get(); - dump_current_txn("switch to new txn "); - return; - } - writer.OneCell("unsupported txn cmd."); + delete execution_engine_; + delete catalog_; + delete checkpoint_manager_; + delete log_manager_; + delete buffer_pool_manager_; + delete lock_manager_; + delete txn_manager_; + delete disk_manager_; } } // namespace bustub diff --git a/src/common/config.cpp b/src/common/config.cpp index 146f8f7..7405670 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -20,6 +20,4 @@ std::chrono::duration log_timeout = std::chrono::seconds(1); std::chrono::milliseconds cycle_detection_interval = std::chrono::milliseconds(50); -std::atomic global_disable_execution_exception_print{false}; - } // namespace bustub diff --git a/src/common/util/string_util.cpp b/src/common/util/string_util.cpp index 26ee48c..37d3be5 100644 --- a/src/common/util/string_util.cpp +++ b/src/common/util/string_util.cpp @@ -28,15 +28,6 @@ auto StringUtil::Contains(const std::string &haystack, const std::string &needle return (haystack.find(needle) != std::string::npos); } -auto StringUtil::ContainsAfter(const std::string &keyword, const std::string &haystack, const std::string &needle) - -> bool { - auto pos = haystack.find(keyword); - if (pos == std::string::npos) { - return false; - } - return (haystack.find(needle, pos) != std::string::npos); -} - void StringUtil::RTrim(std::string *str) { // remove trailing ' ', \f, \n, \r, \t, \v str->erase(std::find_if(str->rbegin(), str->rend(), [](int ch) { return std::isspace(ch) == 0; }).base(), str->end()); diff --git a/src/concurrency/CMakeLists.txt b/src/concurrency/CMakeLists.txt index 41d41b9..8974845 100644 --- a/src/concurrency/CMakeLists.txt +++ b/src/concurrency/CMakeLists.txt @@ -1,10 +1,9 @@ add_library( bustub_concurrency OBJECT - transaction_manager.cpp - transaction_manager_impl.cpp - watermark.cpp) + lock_manager.cpp + transaction_manager.cpp) set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ - PARENT_SCOPE) + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/concurrency/lock_manager.cpp b/src/concurrency/lock_manager.cpp index 90f3831..e721239 100644 --- a/src/concurrency/lock_manager.cpp +++ b/src/concurrency/lock_manager.cpp @@ -18,39 +18,577 @@ namespace bustub { -auto LockManager::LockTable(Transaction *txn, LockMode lock_mode, const table_oid_t &oid) -> bool { return true; } +auto LockManager::LockTable(Transaction *txn, LockMode lock_mode, const table_oid_t &oid) -> bool { + if (txn->GetIsolationLevel() == IsolationLevel::READ_UNCOMMITTED) { + if (lock_mode == LockMode::SHARED || lock_mode == LockMode::INTENTION_SHARED || + lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::LOCK_SHARED_ON_READ_UNCOMMITTED); + } + if (txn->GetState() == TransactionState::SHRINKING && + (lock_mode == LockMode::EXCLUSIVE || lock_mode == LockMode::INTENTION_EXCLUSIVE)) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::LOCK_ON_SHRINKING); + } + } + if (txn->GetIsolationLevel() == IsolationLevel::READ_COMMITTED) { + if (txn->GetState() == TransactionState::SHRINKING && lock_mode != LockMode::INTENTION_SHARED && + lock_mode != LockMode::SHARED) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::LOCK_ON_SHRINKING); + } + } + if (txn->GetIsolationLevel() == IsolationLevel::REPEATABLE_READ) { + if (txn->GetState() == TransactionState::SHRINKING) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::LOCK_ON_SHRINKING); + } + } + table_lock_map_latch_.lock(); + if (table_lock_map_.find(oid) == table_lock_map_.end()) { + table_lock_map_.emplace(oid, std::make_shared()); + } + auto lock_request_queue = table_lock_map_.find(oid)->second; + lock_request_queue->latch_.lock(); + table_lock_map_latch_.unlock(); -auto LockManager::UnlockTable(Transaction *txn, const table_oid_t &oid) -> bool { return true; } + for (auto request : lock_request_queue->request_queue_) { // NOLINT + if (request->txn_id_ == txn->GetTransactionId()) { + if (request->lock_mode_ == lock_mode) { + lock_request_queue->latch_.unlock(); + return true; + } + + if (lock_request_queue->upgrading_ != INVALID_TXN_ID) { + lock_request_queue->latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::UPGRADE_CONFLICT); + } + + if (!(request->lock_mode_ == LockMode::INTENTION_SHARED && + (lock_mode == LockMode::SHARED || lock_mode == LockMode::EXCLUSIVE || + lock_mode == LockMode::INTENTION_EXCLUSIVE || lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE)) && + !(request->lock_mode_ == LockMode::SHARED && + (lock_mode == LockMode::EXCLUSIVE || lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE)) && + !(request->lock_mode_ == LockMode::INTENTION_EXCLUSIVE && + (lock_mode == LockMode::EXCLUSIVE || lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE)) && + !(request->lock_mode_ == LockMode::SHARED_INTENTION_EXCLUSIVE && (lock_mode == LockMode::EXCLUSIVE))) { + lock_request_queue->latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::INCOMPATIBLE_UPGRADE); + } + + lock_request_queue->request_queue_.remove(request); + InsertOrDeleteTableLockSet(txn, request, false); + + auto upgrade_lock_request = std::make_shared(txn->GetTransactionId(), lock_mode, oid); + + std::list>::iterator lr_iter; + for (lr_iter = lock_request_queue->request_queue_.begin(); lr_iter != lock_request_queue->request_queue_.end(); + lr_iter++) { + if (!(*lr_iter)->granted_) { + break; + } + } + lock_request_queue->request_queue_.insert(lr_iter, upgrade_lock_request); + lock_request_queue->upgrading_ = txn->GetTransactionId(); + + std::unique_lock lock(lock_request_queue->latch_, std::adopt_lock); + while (!GrantLock(upgrade_lock_request, lock_request_queue)) { + lock_request_queue->cv_.wait(lock); + if (txn->GetState() == TransactionState::ABORTED) { + lock_request_queue->upgrading_ = INVALID_TXN_ID; + lock_request_queue->request_queue_.remove(upgrade_lock_request); + lock_request_queue->cv_.notify_all(); + return false; + } + } + + lock_request_queue->upgrading_ = INVALID_TXN_ID; + upgrade_lock_request->granted_ = true; + InsertOrDeleteTableLockSet(txn, upgrade_lock_request, true); + + if (lock_mode != LockMode::EXCLUSIVE) { + lock_request_queue->cv_.notify_all(); + } + return true; + } + } + + auto lock_request = std::make_shared(txn->GetTransactionId(), lock_mode, oid); + lock_request_queue->request_queue_.push_back(lock_request); + + std::unique_lock lock(lock_request_queue->latch_, std::adopt_lock); + while (!GrantLock(lock_request, lock_request_queue)) { + lock_request_queue->cv_.wait(lock); + if (txn->GetState() == TransactionState::ABORTED) { + lock_request_queue->request_queue_.remove(lock_request); + lock_request_queue->cv_.notify_all(); + return false; + } + } + + lock_request->granted_ = true; + InsertOrDeleteTableLockSet(txn, lock_request, true); + + if (lock_mode != LockMode::EXCLUSIVE) { + lock_request_queue->cv_.notify_all(); + } -auto LockManager::LockRow(Transaction *txn, LockMode lock_mode, const table_oid_t &oid, const RID &rid) -> bool { return true; } -auto LockManager::UnlockRow(Transaction *txn, const table_oid_t &oid, const RID &rid, bool force) -> bool { +auto LockManager::UnlockTable(Transaction *txn, const table_oid_t &oid) -> bool { + table_lock_map_latch_.lock(); + + if (table_lock_map_.find(oid) == table_lock_map_.end()) { + table_lock_map_latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw bustub::TransactionAbortException(txn->GetTransactionId(), AbortReason::ATTEMPTED_UNLOCK_BUT_NO_LOCK_HELD); + } + + auto s_row_lock_set = txn->GetSharedRowLockSet(); + auto x_row_lock_set = txn->GetExclusiveRowLockSet(); + + if (!(s_row_lock_set->find(oid) == s_row_lock_set->end() || s_row_lock_set->at(oid).empty()) || + !(x_row_lock_set->find(oid) == x_row_lock_set->end() || x_row_lock_set->at(oid).empty())) { + table_lock_map_latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw bustub::TransactionAbortException(txn->GetTransactionId(), AbortReason::TABLE_UNLOCKED_BEFORE_UNLOCKING_ROWS); + } + + auto lock_request_queue = table_lock_map_[oid]; + + lock_request_queue->latch_.lock(); + table_lock_map_latch_.unlock(); + + for (auto lock_request : lock_request_queue->request_queue_) { // NOLINT + if (lock_request->txn_id_ == txn->GetTransactionId() && lock_request->granted_) { + lock_request_queue->request_queue_.remove(lock_request); + + lock_request_queue->cv_.notify_all(); + lock_request_queue->latch_.unlock(); + + if ((txn->GetIsolationLevel() == IsolationLevel::REPEATABLE_READ && + (lock_request->lock_mode_ == LockMode::SHARED || lock_request->lock_mode_ == LockMode::EXCLUSIVE)) || + (txn->GetIsolationLevel() == IsolationLevel::READ_COMMITTED && + lock_request->lock_mode_ == LockMode::EXCLUSIVE) || + (txn->GetIsolationLevel() == IsolationLevel::READ_UNCOMMITTED && + lock_request->lock_mode_ == LockMode::EXCLUSIVE)) { + if (txn->GetState() != TransactionState::COMMITTED && txn->GetState() != TransactionState::ABORTED) { + txn->SetState(TransactionState::SHRINKING); + } + } + + InsertOrDeleteTableLockSet(txn, lock_request, false); + return true; + } + } + + lock_request_queue->latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw bustub::TransactionAbortException(txn->GetTransactionId(), AbortReason::ATTEMPTED_UNLOCK_BUT_NO_LOCK_HELD); +} + +auto LockManager::LockRow(Transaction *txn, LockMode lock_mode, const table_oid_t &oid, const RID &rid) -> bool { + if (lock_mode == LockMode::INTENTION_EXCLUSIVE || lock_mode == LockMode::INTENTION_SHARED || + lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::ATTEMPTED_INTENTION_LOCK_ON_ROW); + } + + if (txn->GetIsolationLevel() == IsolationLevel::READ_UNCOMMITTED) { + if (lock_mode == LockMode::SHARED || lock_mode == LockMode::INTENTION_SHARED || + lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::LOCK_SHARED_ON_READ_UNCOMMITTED); + } + if (txn->GetState() == TransactionState::SHRINKING && + (lock_mode == LockMode::EXCLUSIVE || lock_mode == LockMode::INTENTION_EXCLUSIVE)) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::LOCK_ON_SHRINKING); + } + } + if (txn->GetIsolationLevel() == IsolationLevel::READ_COMMITTED) { + if (txn->GetState() == TransactionState::SHRINKING && lock_mode != LockMode::INTENTION_SHARED && + lock_mode != LockMode::SHARED) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::LOCK_ON_SHRINKING); + } + } + if (txn->GetIsolationLevel() == IsolationLevel::REPEATABLE_READ) { + if (txn->GetState() == TransactionState::SHRINKING) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::LOCK_ON_SHRINKING); + } + } + + if (lock_mode == LockMode::EXCLUSIVE) { + if (!txn->IsTableExclusiveLocked(oid) && !txn->IsTableIntentionExclusiveLocked(oid) && + !txn->IsTableSharedIntentionExclusiveLocked(oid)) { + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::TABLE_LOCK_NOT_PRESENT); + } + } + + row_lock_map_latch_.lock(); + if (row_lock_map_.find(rid) == row_lock_map_.end()) { + row_lock_map_.emplace(rid, std::make_shared()); + } + auto lock_request_queue = row_lock_map_.find(rid)->second; + lock_request_queue->latch_.lock(); + row_lock_map_latch_.unlock(); + + for (auto request : lock_request_queue->request_queue_) { // NOLINT + if (request->txn_id_ == txn->GetTransactionId()) { + if (request->lock_mode_ == lock_mode) { + lock_request_queue->latch_.unlock(); + return true; + } + + if (lock_request_queue->upgrading_ != INVALID_TXN_ID) { + lock_request_queue->latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::UPGRADE_CONFLICT); + } + + if (!(request->lock_mode_ == LockMode::INTENTION_SHARED && + (lock_mode == LockMode::SHARED || lock_mode == LockMode::EXCLUSIVE || + lock_mode == LockMode::INTENTION_EXCLUSIVE || lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE)) && + !(request->lock_mode_ == LockMode::SHARED && + (lock_mode == LockMode::EXCLUSIVE || lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE)) && + !(request->lock_mode_ == LockMode::INTENTION_EXCLUSIVE && + (lock_mode == LockMode::EXCLUSIVE || lock_mode == LockMode::SHARED_INTENTION_EXCLUSIVE)) && + !(request->lock_mode_ == LockMode::SHARED_INTENTION_EXCLUSIVE && (lock_mode == LockMode::EXCLUSIVE))) { + lock_request_queue->latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw TransactionAbortException(txn->GetTransactionId(), AbortReason::INCOMPATIBLE_UPGRADE); + } + + lock_request_queue->request_queue_.remove(request); + InsertOrDeleteRowLockSet(txn, request, false); + auto upgrade_lock_request = std::make_shared(txn->GetTransactionId(), lock_mode, oid, rid); + + std::list>::iterator lr_iter; + for (lr_iter = lock_request_queue->request_queue_.begin(); lr_iter != lock_request_queue->request_queue_.end(); + lr_iter++) { + if (!(*lr_iter)->granted_) { + break; + } + } + lock_request_queue->request_queue_.insert(lr_iter, upgrade_lock_request); + lock_request_queue->upgrading_ = txn->GetTransactionId(); + + std::unique_lock lock(lock_request_queue->latch_, std::adopt_lock); + while (!GrantLock(upgrade_lock_request, lock_request_queue)) { + lock_request_queue->cv_.wait(lock); + if (txn->GetState() == TransactionState::ABORTED) { + lock_request_queue->upgrading_ = INVALID_TXN_ID; + lock_request_queue->request_queue_.remove(upgrade_lock_request); + lock_request_queue->cv_.notify_all(); + return false; + } + } + + lock_request_queue->upgrading_ = INVALID_TXN_ID; + upgrade_lock_request->granted_ = true; + InsertOrDeleteRowLockSet(txn, upgrade_lock_request, true); + + if (lock_mode != LockMode::EXCLUSIVE) { + lock_request_queue->cv_.notify_all(); + } + return true; + } + } + + auto lock_request = std::make_shared(txn->GetTransactionId(), lock_mode, oid, rid); + lock_request_queue->request_queue_.push_back(lock_request); + + std::unique_lock lock(lock_request_queue->latch_, std::adopt_lock); + while (!GrantLock(lock_request, lock_request_queue)) { + lock_request_queue->cv_.wait(lock); + if (txn->GetState() == TransactionState::ABORTED) { + lock_request_queue->request_queue_.remove(lock_request); + lock_request_queue->cv_.notify_all(); + return false; + } + } + + lock_request->granted_ = true; + InsertOrDeleteRowLockSet(txn, lock_request, true); + + if (lock_mode != LockMode::EXCLUSIVE) { + lock_request_queue->cv_.notify_all(); + } + return true; } -void LockManager::UnlockAll() { - // You probably want to unlock all table and txn locks here. +auto LockManager::UnlockRow(Transaction *txn, const table_oid_t &oid, const RID &rid) -> bool { + row_lock_map_latch_.lock(); + + if (row_lock_map_.find(rid) == row_lock_map_.end()) { + row_lock_map_latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw bustub::TransactionAbortException(txn->GetTransactionId(), AbortReason::ATTEMPTED_UNLOCK_BUT_NO_LOCK_HELD); + } + auto lock_request_queue = row_lock_map_[rid]; + + lock_request_queue->latch_.lock(); + row_lock_map_latch_.unlock(); + + for (auto lock_request : lock_request_queue->request_queue_) { // NOLINT + if (lock_request->txn_id_ == txn->GetTransactionId() && lock_request->granted_) { + lock_request_queue->request_queue_.remove(lock_request); + + lock_request_queue->cv_.notify_all(); + lock_request_queue->latch_.unlock(); + + if ((txn->GetIsolationLevel() == IsolationLevel::REPEATABLE_READ && + (lock_request->lock_mode_ == LockMode::SHARED || lock_request->lock_mode_ == LockMode::EXCLUSIVE)) || + (txn->GetIsolationLevel() == IsolationLevel::READ_COMMITTED && + lock_request->lock_mode_ == LockMode::EXCLUSIVE) || + (txn->GetIsolationLevel() == IsolationLevel::READ_UNCOMMITTED && + lock_request->lock_mode_ == LockMode::EXCLUSIVE)) { + if (txn->GetState() != TransactionState::COMMITTED && txn->GetState() != TransactionState::ABORTED) { + txn->SetState(TransactionState::SHRINKING); + } + } + + InsertOrDeleteRowLockSet(txn, lock_request, false); + return true; + } + } + + lock_request_queue->latch_.unlock(); + txn->SetState(TransactionState::ABORTED); + throw bustub::TransactionAbortException(txn->GetTransactionId(), AbortReason::ATTEMPTED_UNLOCK_BUT_NO_LOCK_HELD); } -void LockManager::AddEdge(txn_id_t t1, txn_id_t t2) {} +void LockManager::AddEdge(txn_id_t t1, txn_id_t t2) { + txn_set_.insert(t1); + txn_set_.insert(t2); + waits_for_[t1].push_back(t2); +} -void LockManager::RemoveEdge(txn_id_t t1, txn_id_t t2) {} +void LockManager::RemoveEdge(txn_id_t t1, txn_id_t t2) { + auto iter = std::find(waits_for_[t1].begin(), waits_for_[t1].end(), t2); + if (iter != waits_for_[t1].end()) { + waits_for_[t1].erase(iter); + } +} + +auto LockManager::HasCycle(txn_id_t *txn_id) -> bool { + for (auto const &start_txn_id : txn_set_) { + if (Dfs(start_txn_id)) { + *txn_id = *active_set_.begin(); + for (auto const &active_txn_id : active_set_) { + *txn_id = std::max(*txn_id, active_txn_id); + } + active_set_.clear(); + return true; + } + + active_set_.clear(); + } + return false; +} + +auto LockManager::DeleteNode(txn_id_t txn_id) -> void { + waits_for_.erase(txn_id); -auto LockManager::HasCycle(txn_id_t *txn_id) -> bool { return false; } + for (auto a_txn_id : txn_set_) { + if (a_txn_id != txn_id) { + RemoveEdge(a_txn_id, txn_id); + } + } +} auto LockManager::GetEdgeList() -> std::vector> { - std::vector> edges(0); - return edges; + std::vector> result; + for (auto const &pair : waits_for_) { + auto t1 = pair.first; + for (auto const &t2 : pair.second) { + result.emplace_back(t1, t2); + } + } + return result; } void LockManager::RunCycleDetection() { while (enable_cycle_detection_) { std::this_thread::sleep_for(cycle_detection_interval); { // TODO(students): detect deadlock + table_lock_map_latch_.lock(); + row_lock_map_latch_.lock(); + for (auto &pair : table_lock_map_) { + std::unordered_set granted_set; + pair.second->latch_.lock(); + for (auto const &lock_request : pair.second->request_queue_) { + if (lock_request->granted_) { + granted_set.emplace(lock_request->txn_id_); + } else { + for (auto txn_id : granted_set) { + map_txn_oid_.emplace(lock_request->txn_id_, lock_request->oid_); + AddEdge(lock_request->txn_id_, txn_id); + } + } + } + pair.second->latch_.unlock(); + } + + for (auto &pair : row_lock_map_) { + std::unordered_set granted_set; + pair.second->latch_.lock(); + for (auto const &lock_request : pair.second->request_queue_) { + if (lock_request->granted_) { + granted_set.emplace(lock_request->txn_id_); + } else { + for (auto txn_id : granted_set) { + map_txn_rid_.emplace(lock_request->txn_id_, lock_request->rid_); + AddEdge(lock_request->txn_id_, txn_id); + } + } + } + pair.second->latch_.unlock(); + } + + row_lock_map_latch_.unlock(); + table_lock_map_latch_.unlock(); + + txn_id_t txn_id; + while (HasCycle(&txn_id)) { + Transaction *txn = TransactionManager::GetTransaction(txn_id); + txn->SetState(TransactionState::ABORTED); + DeleteNode(txn_id); + + if (map_txn_oid_.count(txn_id) > 0) { + table_lock_map_[map_txn_oid_[txn_id]]->latch_.lock(); + table_lock_map_[map_txn_oid_[txn_id]]->cv_.notify_all(); + table_lock_map_[map_txn_oid_[txn_id]]->latch_.unlock(); + } + + if (map_txn_rid_.count(txn_id) > 0) { + row_lock_map_[map_txn_rid_[txn_id]]->latch_.lock(); + row_lock_map_[map_txn_rid_[txn_id]]->cv_.notify_all(); + row_lock_map_[map_txn_rid_[txn_id]]->latch_.unlock(); + } + } + + waits_for_.clear(); + safe_set_.clear(); + txn_set_.clear(); + map_txn_oid_.clear(); + map_txn_rid_.clear(); + } + } +} + +auto LockManager::GrantLock(const std::shared_ptr &lock_request, + const std::shared_ptr &lock_request_queue) -> bool { + for (auto &lr : lock_request_queue->request_queue_) { + if (lr->granted_) { + switch (lock_request->lock_mode_) { + case LockMode::SHARED: + if (lr->lock_mode_ == LockMode::INTENTION_EXCLUSIVE || + lr->lock_mode_ == LockMode::SHARED_INTENTION_EXCLUSIVE || lr->lock_mode_ == LockMode::EXCLUSIVE) { + return false; + } + break; + case LockMode::EXCLUSIVE: + return false; + break; + case LockMode::INTENTION_SHARED: + if (lr->lock_mode_ == LockMode::EXCLUSIVE) { + return false; + } + break; + case LockMode::INTENTION_EXCLUSIVE: + if (lr->lock_mode_ == LockMode::SHARED || lr->lock_mode_ == LockMode::SHARED_INTENTION_EXCLUSIVE || + lr->lock_mode_ == LockMode::EXCLUSIVE) { + return false; + } + break; + case LockMode::SHARED_INTENTION_EXCLUSIVE: + if (lr->lock_mode_ != LockMode::INTENTION_SHARED) { + return false; + } + break; + } + } else if (lock_request.get() != lr.get()) { + return false; + } else { + return true; } } + return false; +} + +void LockManager::InsertOrDeleteTableLockSet(Transaction *txn, const std::shared_ptr &lock_request, + bool insert) { + switch (lock_request->lock_mode_) { + case LockMode::SHARED: + if (insert) { + txn->GetSharedTableLockSet()->insert(lock_request->oid_); + } else { + txn->GetSharedTableLockSet()->erase(lock_request->oid_); + } + break; + case LockMode::EXCLUSIVE: + if (insert) { + txn->GetExclusiveTableLockSet()->insert(lock_request->oid_); + } else { + txn->GetExclusiveTableLockSet()->erase(lock_request->oid_); + } + break; + case LockMode::INTENTION_SHARED: + if (insert) { + txn->GetIntentionSharedTableLockSet()->insert(lock_request->oid_); + } else { + txn->GetIntentionSharedTableLockSet()->erase(lock_request->oid_); + } + break; + case LockMode::INTENTION_EXCLUSIVE: + if (insert) { + txn->GetIntentionExclusiveTableLockSet()->insert(lock_request->oid_); + } else { + txn->GetIntentionExclusiveTableLockSet()->erase(lock_request->oid_); + } + break; + case LockMode::SHARED_INTENTION_EXCLUSIVE: + if (insert) { + txn->GetSharedIntentionExclusiveTableLockSet()->insert(lock_request->oid_); + } else { + txn->GetSharedIntentionExclusiveTableLockSet()->erase(lock_request->oid_); + } + break; + } +} + +void LockManager::InsertOrDeleteRowLockSet(Transaction *txn, const std::shared_ptr &lock_request, + bool insert) { + auto s_row_lock_set = txn->GetSharedRowLockSet(); + auto x_row_lock_set = txn->GetExclusiveRowLockSet(); + switch (lock_request->lock_mode_) { + case LockMode::SHARED: + if (insert) { + InsertRowLockSet(s_row_lock_set, lock_request->oid_, lock_request->rid_); + } else { + DeleteRowLockSet(s_row_lock_set, lock_request->oid_, lock_request->rid_); + } + break; + case LockMode::EXCLUSIVE: + if (insert) { + InsertRowLockSet(x_row_lock_set, lock_request->oid_, lock_request->rid_); + } else { + DeleteRowLockSet(x_row_lock_set, lock_request->oid_, lock_request->rid_); + } + break; + case LockMode::INTENTION_SHARED: + case LockMode::INTENTION_EXCLUSIVE: + case LockMode::SHARED_INTENTION_EXCLUSIVE: + break; + } } } // namespace bustub diff --git a/src/concurrency/transaction_manager.cpp b/src/concurrency/transaction_manager.cpp index 56dab1c..99970a9 100644 --- a/src/concurrency/transaction_manager.cpp +++ b/src/concurrency/transaction_manager.cpp @@ -12,86 +12,111 @@ #include "concurrency/transaction_manager.h" -#include #include // NOLINT -#include #include #include #include #include "catalog/catalog.h" -#include "catalog/column.h" -#include "catalog/schema.h" -#include "common/config.h" -#include "common/exception.h" -#include "common/macros.h" -#include "concurrency/transaction.h" -#include "execution/execution_common.h" #include "storage/table/table_heap.h" -#include "storage/table/tuple.h" -#include "type/type_id.h" -#include "type/value.h" -#include "type/value_factory.h" - namespace bustub { -auto TransactionManager::Begin(IsolationLevel isolation_level) -> Transaction * { - std::unique_lock l(txn_map_mutex_); - auto txn_id = next_txn_id_++; - auto txn = std::make_unique(txn_id, isolation_level); - auto *txn_ref = txn.get(); - txn_map_.insert(std::make_pair(txn_id, std::move(txn))); - - // TODO(fall2023): set the timestamps here. Watermark updated below. - - running_txns_.AddTxn(txn_ref->read_ts_); - return txn_ref; -} - -auto TransactionManager::VerifyTxn(Transaction *txn) -> bool { return true; } +std::unordered_map TransactionManager::txn_map = {}; +std::shared_mutex TransactionManager::txn_map_mutex = {}; -auto TransactionManager::Commit(Transaction *txn) -> bool { - std::unique_lock commit_lck(commit_mutex_); +auto TransactionManager::Begin(Transaction *txn, IsolationLevel isolation_level) -> Transaction * { + // Acquire the global transaction latch in shared mode. + global_txn_latch_.RLock(); - // TODO(fall2023): acquire commit ts! - - if (txn->state_ != TransactionState::RUNNING) { - throw Exception("txn not in running state"); + if (txn == nullptr) { + txn = new Transaction(next_txn_id_++, isolation_level); } - if (txn->GetIsolationLevel() == IsolationLevel::SERIALIZABLE) { - if (!VerifyTxn(txn)) { - commit_lck.unlock(); - Abort(txn); - return false; - } + if (enable_logging) { + LogRecord record = LogRecord(txn->GetTransactionId(), txn->GetPrevLSN(), LogRecordType::BEGIN); + lsn_t lsn = log_manager_->AppendLogRecord(&record); + txn->SetPrevLSN(lsn); } - // TODO(fall2023): Implement the commit logic! - - std::unique_lock lck(txn_map_mutex_); - - // TODO(fall2023): set commit timestamp + update last committed timestamp here. + std::unique_lock l(txn_map_mutex); + txn_map[txn->GetTransactionId()] = txn; + return txn; +} - txn->state_ = TransactionState::COMMITTED; - running_txns_.UpdateCommitTs(txn->commit_ts_); - running_txns_.RemoveTxn(txn->read_ts_); +void TransactionManager::Commit(Transaction *txn) { + txn->SetState(TransactionState::COMMITTED); + + // Perform all deletes before we commit. + auto write_set = txn->GetWriteSet(); + while (!write_set->empty()) { + auto &item = write_set->back(); + auto *table = item.table_; + if (item.wtype_ == WType::DELETE) { + // Note that this also releases the lock when holding the page latch. + table->ApplyDelete(item.rid_, txn); + } + write_set->pop_back(); + } + write_set->clear(); - return true; + // Release all the locks. + ReleaseLocks(txn); + // Release the global transaction latch. + global_txn_latch_.RUnlock(); } void TransactionManager::Abort(Transaction *txn) { - if (txn->state_ != TransactionState::RUNNING && txn->state_ != TransactionState::TAINTED) { - throw Exception("txn not in running / tainted state"); + txn->SetState(TransactionState::ABORTED); + // Rollback before releasing the lock. + auto table_write_set = txn->GetWriteSet(); + while (!table_write_set->empty()) { + auto &item = table_write_set->back(); + auto *table = item.table_; + if (item.wtype_ == WType::DELETE) { + table->RollbackDelete(item.rid_, txn); + } else if (item.wtype_ == WType::INSERT) { + // Note that this also releases the lock when holding the page latch. + table->ApplyDelete(item.rid_, txn); + } else if (item.wtype_ == WType::UPDATE) { + table->UpdateTuple(item.tuple_, item.rid_, txn); + } + table_write_set->pop_back(); } + table_write_set->clear(); + // Rollback index updates + auto index_write_set = txn->GetIndexWriteSet(); + while (!index_write_set->empty()) { + auto &item = index_write_set->back(); + auto *catalog = item.catalog_; + // Metadata identifying the table that should be deleted from. + TableInfo *table_info = catalog->GetTable(item.table_oid_); + IndexInfo *index_info = catalog->GetIndex(item.index_oid_); + auto new_key = item.tuple_.KeyFromTuple(table_info->schema_, *(index_info->index_->GetKeySchema()), + index_info->index_->GetKeyAttrs()); + if (item.wtype_ == WType::DELETE) { + index_info->index_->InsertEntry(new_key, item.rid_, txn); + } else if (item.wtype_ == WType::INSERT) { + index_info->index_->DeleteEntry(new_key, item.rid_, txn); + } else if (item.wtype_ == WType::UPDATE) { + // Delete the new key and insert the old key + index_info->index_->DeleteEntry(new_key, item.rid_, txn); + auto old_key = item.old_tuple_.KeyFromTuple(table_info->schema_, *(index_info->index_->GetKeySchema()), + index_info->index_->GetKeyAttrs()); + index_info->index_->InsertEntry(old_key, item.rid_, txn); + } + index_write_set->pop_back(); + } + table_write_set->clear(); + index_write_set->clear(); - // TODO(fall2023): Implement the abort logic! - - std::unique_lock lck(txn_map_mutex_); - txn->state_ = TransactionState::ABORTED; - running_txns_.RemoveTxn(txn->read_ts_); + // Release all the locks. + ReleaseLocks(txn); + // Release the global transaction latch. + global_txn_latch_.RUnlock(); } -void TransactionManager::GarbageCollection() { UNIMPLEMENTED("not implemented"); } +void TransactionManager::BlockAllTransactions() { global_txn_latch_.WLock(); } + +void TransactionManager::ResumeTransactions() { global_txn_latch_.WUnlock(); } } // namespace bustub diff --git a/src/concurrency/transaction_manager_impl.cpp b/src/concurrency/transaction_manager_impl.cpp deleted file mode 100644 index 318c1fd..0000000 --- a/src/concurrency/transaction_manager_impl.cpp +++ /dev/null @@ -1,125 +0,0 @@ -// DO NOT CHANGE THIS FILE, this file will not be included in the autograder. - -#include -#include -#include // NOLINT -#include -#include -#include -#include - -#include "catalog/catalog.h" -#include "catalog/column.h" -#include "catalog/schema.h" -#include "common/config.h" -#include "common/exception.h" -#include "common/macros.h" -#include "concurrency/transaction.h" -#include "concurrency/transaction_manager.h" -#include "execution/execution_common.h" -#include "storage/table/table_heap.h" -#include "storage/table/tuple.h" -#include "type/type_id.h" -#include "type/value.h" -#include "type/value_factory.h" - -namespace bustub { - -auto TransactionManager::UpdateUndoLink(RID rid, std::optional prev_link, - std::function)> &&check) -> bool { - std::function)> wrapper_func = - [check](std::optional link) -> bool { - if (link.has_value()) { - return check(link->prev_); - } - return check(std::nullopt); - }; - return UpdateVersionLink(rid, prev_link.has_value() ? std::make_optional(VersionUndoLink{*prev_link}) : std::nullopt, - check != nullptr ? wrapper_func : nullptr); -} - -auto TransactionManager::UpdateVersionLink(RID rid, std::optional prev_version, - std::function)> &&check) -> bool { - std::unique_lock lck(version_info_mutex_); - std::shared_ptr pg_ver_info = nullptr; - auto iter = version_info_.find(rid.GetPageId()); - if (iter == version_info_.end()) { - pg_ver_info = std::make_shared(); - version_info_[rid.GetPageId()] = pg_ver_info; - } else { - pg_ver_info = iter->second; - } - std::unique_lock lck2(pg_ver_info->mutex_); - lck.unlock(); - auto iter2 = pg_ver_info->prev_version_.find(rid.GetSlotNum()); - if (iter2 == pg_ver_info->prev_version_.end()) { - if (check != nullptr && !check(std::nullopt)) { - return false; - } - } else { - if (check != nullptr && !check(iter2->second)) { - return false; - } - } - if (prev_version.has_value()) { - pg_ver_info->prev_version_[rid.GetSlotNum()] = *prev_version; - } else { - pg_ver_info->prev_version_.erase(rid.GetSlotNum()); - } - return true; -} - -auto TransactionManager::GetVersionLink(RID rid) -> std::optional { - std::shared_lock lck(version_info_mutex_); - auto iter = version_info_.find(rid.GetPageId()); - if (iter == version_info_.end()) { - return std::nullopt; - } - std::shared_ptr pg_ver_info = iter->second; - std::unique_lock lck2(pg_ver_info->mutex_); - lck.unlock(); - auto iter2 = pg_ver_info->prev_version_.find(rid.GetSlotNum()); - if (iter2 == pg_ver_info->prev_version_.end()) { - return std::nullopt; - } - return std::make_optional(iter2->second); -} - -auto TransactionManager::GetUndoLink(RID rid) -> std::optional { - auto version_link = GetVersionLink(rid); - if (version_link.has_value()) { - return version_link->prev_; - } - return std::nullopt; -} - -auto TransactionManager::GetUndoLogOptional(UndoLink link) -> std::optional { - std::shared_lock lck(txn_map_mutex_); - auto iter = txn_map_.find(link.prev_txn_); - if (iter == txn_map_.end()) { - return std::nullopt; - } - auto txn = iter->second; - lck.unlock(); - return txn->GetUndoLog(link.prev_log_idx_); -} - -auto TransactionManager::GetUndoLog(UndoLink link) -> UndoLog { - auto undo_log = GetUndoLogOptional(link); - if (undo_log.has_value()) { - return *undo_log; - } - throw Exception("undo log not exist"); -} - -void Transaction::SetTainted() { - auto state = state_.load(); - if (state == TransactionState::RUNNING) { - state_.store(TransactionState::TAINTED); - return; - } - fmt::println(stderr, "transaction not in running state: {}", state); - std::terminate(); -} - -} // namespace bustub diff --git a/src/concurrency/watermark.cpp b/src/concurrency/watermark.cpp deleted file mode 100644 index 88fa392..0000000 --- a/src/concurrency/watermark.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "concurrency/watermark.h" -#include -#include "common/exception.h" - -namespace bustub { - -auto Watermark::AddTxn(timestamp_t read_ts) -> void { - if (read_ts < commit_ts_) { - throw Exception("read ts < commit ts"); - } - - // TODO(fall2023): implement me! -} - -auto Watermark::RemoveTxn(timestamp_t read_ts) -> void { - // TODO(fall2023): implement me! -} - -} // namespace bustub diff --git a/src/container/CMakeLists.txt b/src/container/CMakeLists.txt index 1707a37..4394540 100644 --- a/src/container/CMakeLists.txt +++ b/src/container/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(disk/hash) +add_subdirectory(hash) diff --git a/src/container/disk/hash/CMakeLists.txt b/src/container/disk/hash/CMakeLists.txt index 9238973..9c72cd4 100644 --- a/src/container/disk/hash/CMakeLists.txt +++ b/src/container/disk/hash/CMakeLists.txt @@ -2,8 +2,7 @@ add_library( bustub_container_disk_hash OBJECT disk_extendible_hash_table.cpp - linear_probe_hash_table.cpp - disk_extendible_hash_table_utils.cpp) + linear_probe_hash_table.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/container/disk/hash/disk_extendible_hash_table.cpp b/src/container/disk/hash/disk_extendible_hash_table.cpp index 460901b..d89e95a 100644 --- a/src/container/disk/hash/disk_extendible_hash_table.cpp +++ b/src/container/disk/hash/disk_extendible_hash_table.cpp @@ -4,9 +4,9 @@ // // disk_extendible_hash_table.cpp // -// Identification: src/container/disk/hash/disk_extendible_hash_table.cpp +// Identification: src/container/hash/extendible_hash_table.cpp // -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group +// Copyright (c) 2015-2021, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -15,84 +15,124 @@ #include #include -#include "common/config.h" #include "common/exception.h" #include "common/logger.h" -#include "common/macros.h" #include "common/rid.h" -#include "common/util/hash_util.h" #include "container/disk/hash/disk_extendible_hash_table.h" -#include "storage/index/hash_comparator.h" -#include "storage/page/extendible_htable_bucket_page.h" -#include "storage/page/extendible_htable_directory_page.h" -#include "storage/page/extendible_htable_header_page.h" -#include "storage/page/page_guard.h" namespace bustub { -template -DiskExtendibleHashTable::DiskExtendibleHashTable(const std::string &name, BufferPoolManager *bpm, - const KC &cmp, const HashFunction &hash_fn, - uint32_t header_max_depth, uint32_t directory_max_depth, - uint32_t bucket_max_size) - : bpm_(bpm), - cmp_(cmp), - hash_fn_(std::move(hash_fn)), - header_max_depth_(header_max_depth), - directory_max_depth_(directory_max_depth), - bucket_max_size_(bucket_max_size) { - throw NotImplementedException("DiskExtendibleHashTable is not implemented"); +template +HASH_TABLE_TYPE::DiskExtendibleHashTable(const std::string &name, BufferPoolManager *buffer_pool_manager, + const KeyComparator &comparator, HashFunction hash_fn) + : buffer_pool_manager_(buffer_pool_manager), comparator_(comparator), hash_fn_(std::move(hash_fn)) { + // implement me! +} + +/***************************************************************************** + * HELPERS + *****************************************************************************/ +/** + * Hash - simple helper to downcast MurmurHash's 64-bit hash to 32-bit + * for extendible hashing. + * + * @param key the key to hash + * @return the downcasted 32-bit hash + */ +template +auto HASH_TABLE_TYPE::Hash(KeyType key) -> uint32_t { + return static_cast(hash_fn_.GetHash(key)); +} + +template +inline auto HASH_TABLE_TYPE::KeyToDirectoryIndex(KeyType key, HashTableDirectoryPage *dir_page) -> uint32_t { + return 0; +} + +template +inline auto HASH_TABLE_TYPE::KeyToPageId(KeyType key, HashTableDirectoryPage *dir_page) -> page_id_t { + return 0; +} + +template +auto HASH_TABLE_TYPE::FetchDirectoryPage() -> HashTableDirectoryPage * { + return nullptr; +} + +template +auto HASH_TABLE_TYPE::FetchBucketPage(page_id_t bucket_page_id) -> HASH_TABLE_BUCKET_TYPE * { + return nullptr; } /***************************************************************************** * SEARCH *****************************************************************************/ -template -auto DiskExtendibleHashTable::GetValue(const K &key, std::vector *result, Transaction *transaction) const - -> bool { +template +auto HASH_TABLE_TYPE::GetValue(Transaction *transaction, const KeyType &key, std::vector *result) -> bool { return false; } /***************************************************************************** * INSERTION *****************************************************************************/ - -template -auto DiskExtendibleHashTable::Insert(const K &key, const V &value, Transaction *transaction) -> bool { +template +auto HASH_TABLE_TYPE::Insert(Transaction *transaction, const KeyType &key, const ValueType &value) -> bool { return false; } -template -auto DiskExtendibleHashTable::InsertToNewDirectory(ExtendibleHTableHeaderPage *header, uint32_t directory_idx, - uint32_t hash, const K &key, const V &value) -> bool { +template +auto HASH_TABLE_TYPE::SplitInsert(Transaction *transaction, const KeyType &key, const ValueType &value) -> bool { return false; } -template -auto DiskExtendibleHashTable::InsertToNewBucket(ExtendibleHTableDirectoryPage *directory, uint32_t bucket_idx, - const K &key, const V &value) -> bool { +/***************************************************************************** + * REMOVE + *****************************************************************************/ +template +auto HASH_TABLE_TYPE::Remove(Transaction *transaction, const KeyType &key, const ValueType &value) -> bool { return false; } -template -void DiskExtendibleHashTable::UpdateDirectoryMapping(ExtendibleHTableDirectoryPage *directory, - uint32_t new_bucket_idx, page_id_t new_bucket_page_id, - uint32_t new_local_depth, uint32_t local_depth_mask) { - throw NotImplementedException("DiskExtendibleHashTable is not implemented"); +/***************************************************************************** + * MERGE + *****************************************************************************/ +template +void HASH_TABLE_TYPE::Merge(Transaction *transaction, const KeyType &key, const ValueType &value) {} + +/***************************************************************************** + * GETGLOBALDEPTH - DO NOT TOUCH + *****************************************************************************/ +template +auto HASH_TABLE_TYPE::GetGlobalDepth() -> uint32_t { + table_latch_.RLock(); + HashTableDirectoryPage *dir_page = FetchDirectoryPage(); + uint32_t global_depth = dir_page->GetGlobalDepth(); + assert(buffer_pool_manager_->UnpinPage(directory_page_id_, false, nullptr)); + table_latch_.RUnlock(); + return global_depth; } /***************************************************************************** - * REMOVE + * VERIFY INTEGRITY - DO NOT TOUCH *****************************************************************************/ -template -auto DiskExtendibleHashTable::Remove(const K &key, Transaction *transaction) -> bool { - return false; +template +void HASH_TABLE_TYPE::VerifyIntegrity() { + table_latch_.RLock(); + HashTableDirectoryPage *dir_page = FetchDirectoryPage(); + dir_page->VerifyIntegrity(); + assert(buffer_pool_manager_->UnpinPage(directory_page_id_, false, nullptr)); + table_latch_.RUnlock(); } +/***************************************************************************** + * TEMPLATE DEFINITIONS - DO NOT TOUCH + *****************************************************************************/ template class DiskExtendibleHashTable; + template class DiskExtendibleHashTable, RID, GenericComparator<4>>; template class DiskExtendibleHashTable, RID, GenericComparator<8>>; template class DiskExtendibleHashTable, RID, GenericComparator<16>>; template class DiskExtendibleHashTable, RID, GenericComparator<32>>; template class DiskExtendibleHashTable, RID, GenericComparator<64>>; + } // namespace bustub diff --git a/src/container/disk/hash/disk_extendible_hash_table_utils.cpp b/src/container/disk/hash/disk_extendible_hash_table_utils.cpp deleted file mode 100644 index 2691b63..0000000 --- a/src/container/disk/hash/disk_extendible_hash_table_utils.cpp +++ /dev/null @@ -1,106 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// disk_extendible_hash_table_utils.cpp -// -// Identification: src/container/disk/hash/disk_extendible_hash_table_utils.cpp -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include - -#include "container/disk/hash/disk_extendible_hash_table.h" - -namespace bustub { - -/** - * Hash - simple helper to downcast MurmurHash's 64-bit hash to 32-bit - * for extendible hashing. - * - * @param key the key to hash - * @return the downcasted 32-bit hash - */ -template -auto DiskExtendibleHashTable::Hash(K key) const -> uint32_t { - return static_cast(hash_fn_.GetHash(key)); -} - -/** - * @brief Identity Hash for testing purposes. - */ -template <> -auto DiskExtendibleHashTable::Hash(int key) const -> uint32_t { - return static_cast(key); -} - -template -void DiskExtendibleHashTable::PrintHT() const { - std::cout << "\n"; - std::cout << "==================== PRINT! ====================\n"; - BasicPageGuard header_guard = bpm_->FetchPageBasic(header_page_id_); - auto *header = header_guard.As(); - - header->PrintHeader(); - - for (uint32_t idx = 0; idx < header->MaxSize(); idx++) { - page_id_t directory_page_id = header->GetDirectoryPageId(idx); - if (directory_page_id == INVALID_PAGE_ID) { - std::cout << "Directory " << idx << ", page id: " << directory_page_id << "\n"; - continue; - } - BasicPageGuard directory_guard = bpm_->FetchPageBasic(directory_page_id); - auto *directory = directory_guard.As(); - - std::cout << "Directory " << idx << ", page id: " << directory_page_id << "\n"; - directory->PrintDirectory(); - - for (uint32_t idx2 = 0; idx2 < directory->Size(); idx2++) { - page_id_t bucket_page_id = directory->GetBucketPageId(idx2); - BasicPageGuard bucket_guard = bpm_->FetchPageBasic(bucket_page_id); - auto *bucket = bucket_guard.As>(); - - std::cout << "Bucket " << idx2 << ", page id: " << bucket_page_id << "\n"; - bucket->PrintBucket(); - } - } - std::cout << "==================== END OF PRINT! ====================\n"; - std::cout << "\n"; -} - -/***************************************************************************** - * Verification - *****************************************************************************/ - -template -void DiskExtendibleHashTable::VerifyIntegrity() const { - BUSTUB_ASSERT(header_page_id_ != INVALID_PAGE_ID, "header page id is invalid"); - BasicPageGuard header_guard = bpm_->FetchPageBasic(header_page_id_); - auto *header = header_guard.As(); - - // for each of the directory pages, check their integrity using directory page VerifyIntegrity - for (uint32_t idx = 0; idx < header->MaxSize(); idx++) { - auto directory_page_id = header->GetDirectoryPageId(idx); - if (static_cast(directory_page_id) != INVALID_PAGE_ID) { - BasicPageGuard directory_guard = bpm_->FetchPageBasic(directory_page_id); - auto *directory = directory_guard.As(); - directory->VerifyIntegrity(); - } - } -} - -template -auto DiskExtendibleHashTable::GetHeaderPageId() const -> page_id_t { - return header_page_id_; -} - -template class DiskExtendibleHashTable; -template class DiskExtendibleHashTable, RID, GenericComparator<4>>; -template class DiskExtendibleHashTable, RID, GenericComparator<8>>; -template class DiskExtendibleHashTable, RID, GenericComparator<16>>; -template class DiskExtendibleHashTable, RID, GenericComparator<32>>; -template class DiskExtendibleHashTable, RID, GenericComparator<64>>; - -} // namespace bustub diff --git a/src/container/hash/CMakeLists.txt b/src/container/hash/CMakeLists.txt new file mode 100644 index 0000000..ee85783 --- /dev/null +++ b/src/container/hash/CMakeLists.txt @@ -0,0 +1,8 @@ +add_library( + bustub_container_hash + OBJECT + extendible_hash_table.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/container/hash/extendible_hash_table.cpp b/src/container/hash/extendible_hash_table.cpp new file mode 100644 index 0000000..85092a0 --- /dev/null +++ b/src/container/hash/extendible_hash_table.cpp @@ -0,0 +1,189 @@ +//===----------------------------------------------------------------------===// +// +// BusTub +// +// extendible_hash_table.cpp +// +// Identification: src/container/hash/extendible_hash_table.cpp +// +// Copyright (c) 2022, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "container/hash/extendible_hash_table.h" +#include "storage/page/page.h" + +namespace bustub { + +template +ExtendibleHashTable::ExtendibleHashTable(size_t bucket_size) + : global_depth_(0), bucket_size_(bucket_size), num_buckets_(1) { + dir_.push_back(std::make_shared(bucket_size, 0)); +} + +template +auto ExtendibleHashTable::IndexOf(const K &key) -> size_t { + int mask = (1 << global_depth_) - 1; + return std::hash()(key) & mask; +} + +template +auto ExtendibleHashTable::GetGlobalDepth() const -> int { + std::scoped_lock lock(latch_); + return GetGlobalDepthInternal(); +} + +template +auto ExtendibleHashTable::GetGlobalDepthInternal() const -> int { + return global_depth_; +} + +template +auto ExtendibleHashTable::GetLocalDepth(int dir_index) const -> int { + std::scoped_lock lock(latch_); + return GetLocalDepthInternal(dir_index); +} + +template +auto ExtendibleHashTable::GetLocalDepthInternal(int dir_index) const -> int { + return dir_[dir_index]->GetDepth(); +} + +template +auto ExtendibleHashTable::GetNumBuckets() const -> int { + std::scoped_lock lock(latch_); + return GetNumBucketsInternal(); +} + +template +auto ExtendibleHashTable::GetNumBucketsInternal() const -> int { + return num_buckets_; +} + +template +auto ExtendibleHashTable::Find(const K &key, V &value) -> bool { + std::scoped_lock lock(latch_); + + auto index = IndexOf(key); + auto target_bucket = dir_[index]; + + return target_bucket->Find(key, value); +} + +template +auto ExtendibleHashTable::Remove(const K &key) -> bool { + std::scoped_lock lock(latch_); + + auto index = IndexOf(key); + auto target_bucket = dir_[index]; + + return target_bucket->Remove(key); +} + +template +void ExtendibleHashTable::Insert(const K &key, const V &value) { + std::scoped_lock lock(latch_); + + while (dir_[IndexOf(key)]->IsFull()) { + auto index = IndexOf(key); + auto target_bucket = dir_[index]; + + if (target_bucket->GetDepth() == GetGlobalDepthInternal()) { + global_depth_++; + int capacity = dir_.size(); + dir_.resize(capacity << 1); + for (int i = 0; i < capacity; i++) { + dir_[i + capacity] = dir_[i]; + } + } + + int mask = 1 << target_bucket->GetDepth(); + auto bucket_0 = std::make_shared(bucket_size_, target_bucket->GetDepth() + 1); + auto bucket_1 = std::make_shared(bucket_size_, target_bucket->GetDepth() + 1); + + for (const auto &item : target_bucket->GetItems()) { + size_t hash_key = std::hash()(item.first); + if ((hash_key & mask) != 0U) { + bucket_1->Insert(item.first, item.second); + } else { + bucket_0->Insert(item.first, item.second); + } + } + + num_buckets_++; + + for (size_t i = 0; i < dir_.size(); i++) { + if (dir_[i] == target_bucket) { + if ((i & mask) != 0U) { + dir_[i] = bucket_1; + } else { + dir_[i] = bucket_0; + } + } + } + } + + auto index = IndexOf(key); + auto target_bucket = dir_[index]; + + for (auto &item : target_bucket->GetItems()) { + if (item.first == key) { + item.second = value; + return; + } + } + + target_bucket->Insert(key, value); +} + +//===--------------------------------------------------------------------===// +// Bucket +//===--------------------------------------------------------------------===// +template +ExtendibleHashTable::Bucket::Bucket(size_t array_size, int depth) : size_(array_size), depth_(depth) {} + +template +auto ExtendibleHashTable::Bucket::Find(const K &key, V &value) -> bool { + return std::any_of(list_.begin(), list_.end(), [&key, &value](const auto &item) { + if (item.first == key) { + value = item.second; + return true; + } + return false; + }); +} + +template +auto ExtendibleHashTable::Bucket::Remove(const K &key) -> bool { + return std::any_of(list_.begin(), list_.end(), [&key, this](const auto &item) { + if (item.first == key) { + this->list_.remove(item); + return true; + } + return false; + }); +} + +template +auto ExtendibleHashTable::Bucket::Insert(const K &key, const V &value) -> bool { + if (IsFull()) { + return false; + } + list_.emplace_back(key, value); + return true; +} + +template class ExtendibleHashTable; +template class ExtendibleHashTable::iterator>; +template class ExtendibleHashTable; +// test purpose +template class ExtendibleHashTable; +template class ExtendibleHashTable::iterator>; + +} // namespace bustub diff --git a/src/execution/CMakeLists.txt b/src/execution/CMakeLists.txt index cab89f5..80033c1 100644 --- a/src/execution/CMakeLists.txt +++ b/src/execution/CMakeLists.txt @@ -3,13 +3,11 @@ add_library( OBJECT aggregation_executor.cpp delete_executor.cpp - execution_common.cpp executor_factory.cpp filter_executor.cpp fmt_impl.cpp hash_join_executor.cpp index_scan_executor.cpp - init_check_executor.cpp insert_executor.cpp limit_executor.cpp mock_scan_executor.cpp @@ -20,11 +18,8 @@ add_library( seq_scan_executor.cpp sort_executor.cpp topn_executor.cpp - topn_per_group_executor.cpp - topn_check_executor.cpp update_executor.cpp values_executor.cpp - window_function_executor.cpp ) set(ALL_OBJECT_FILES diff --git a/src/execution/aggregation_executor.cpp b/src/execution/aggregation_executor.cpp index 29c7882..5fcce2b 100644 --- a/src/execution/aggregation_executor.cpp +++ b/src/execution/aggregation_executor.cpp @@ -17,13 +17,40 @@ namespace bustub { AggregationExecutor::AggregationExecutor(ExecutorContext *exec_ctx, const AggregationPlanNode *plan, - std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx) {} + std::unique_ptr &&child) + : AbstractExecutor(exec_ctx), + plan_(plan), + child_(std::move(child)), + aht_(plan_->aggregates_, plan_->agg_types_), + aht_iterator_(aht_.Begin()) {} -void AggregationExecutor::Init() {} +void AggregationExecutor::Init() { + child_->Init(); + Tuple tuple{}; + RID rid{}; + while (child_->Next(&tuple, &rid)) { + aht_.InsertCombine(MakeAggregateKey(&tuple), MakeAggregateValue(&tuple)); + } + if (aht_.Size() == 0 && GetOutputSchema().GetColumnCount() == 1) { + aht_.InsertIntialCombine(); + } + aht_iterator_ = aht_.Begin(); +} -auto AggregationExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto AggregationExecutor::Next(Tuple *tuple, RID *rid) -> bool { + if (aht_iterator_ == aht_.End()) { + return false; + } + std::vector values; -auto AggregationExecutor::GetChildExecutor() const -> const AbstractExecutor * { return child_executor_.get(); } + values.insert(values.end(), aht_iterator_.Key().group_bys_.begin(), aht_iterator_.Key().group_bys_.end()); + values.insert(values.end(), aht_iterator_.Val().aggregates_.begin(), aht_iterator_.Val().aggregates_.end()); + *tuple = Tuple{values, &GetOutputSchema()}; + ++aht_iterator_; + + return true; +} + +auto AggregationExecutor::GetChildExecutor() const -> const AbstractExecutor * { return child_.get(); } } // namespace bustub diff --git a/src/execution/delete_executor.cpp b/src/execution/delete_executor.cpp index 7b8efc7..93cbe36 100644 --- a/src/execution/delete_executor.cpp +++ b/src/execution/delete_executor.cpp @@ -18,10 +18,61 @@ namespace bustub { DeleteExecutor::DeleteExecutor(ExecutorContext *exec_ctx, const DeletePlanNode *plan, std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx) {} + : AbstractExecutor(exec_ctx), plan_{plan}, child_executor_{std::move(child_executor)} { + this->table_info_ = this->exec_ctx_->GetCatalog()->GetTable(plan_->table_oid_); +} -void DeleteExecutor::Init() { throw NotImplementedException("DeleteExecutor is not implemented"); } +void DeleteExecutor::Init() { + child_executor_->Init(); + try { + bool is_locked = exec_ctx_->GetLockManager()->LockTable( + exec_ctx_->GetTransaction(), LockManager::LockMode::INTENTION_EXCLUSIVE, table_info_->oid_); + if (!is_locked) { + throw ExecutionException("Delete Executor Get Table Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("Delete Executor Get Table Lock Failed"); + } + table_indexes_ = exec_ctx_->GetCatalog()->GetTableIndexes(table_info_->name_); +} -auto DeleteExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { return false; } +auto DeleteExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { + if (is_end_) { + return false; + } + Tuple to_delete_tuple{}; + RID emit_rid; + int32_t delete_count = 0; + + while (child_executor_->Next(&to_delete_tuple, &emit_rid)) { + try { + bool is_locked = exec_ctx_->GetLockManager()->LockRow( + exec_ctx_->GetTransaction(), LockManager::LockMode::EXCLUSIVE, table_info_->oid_, emit_rid); + if (!is_locked) { + throw ExecutionException("Delete Executor Get Row Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("Delete Executor Get Row Lock Failed"); + } + + bool deleted = table_info_->table_->MarkDelete(emit_rid, exec_ctx_->GetTransaction()); + + if (deleted) { + std::for_each(table_indexes_.begin(), table_indexes_.end(), + [&to_delete_tuple, &rid, &table_info = table_info_, &exec_ctx = exec_ctx_](IndexInfo *index) { + index->index_->DeleteEntry(to_delete_tuple.KeyFromTuple(table_info->schema_, index->key_schema_, + index->index_->GetKeyAttrs()), + *rid, exec_ctx->GetTransaction()); + }); + delete_count++; + } + } + std::vector values{}; + values.reserve(GetOutputSchema().GetColumnCount()); + values.emplace_back(TypeId::INTEGER, delete_count); + *tuple = Tuple{values, &GetOutputSchema()}; + is_end_ = true; + return true; +} } // namespace bustub diff --git a/src/execution/execution_common.cpp b/src/execution/execution_common.cpp deleted file mode 100644 index 0df9d1a..0000000 --- a/src/execution/execution_common.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "execution/execution_common.h" -#include "catalog/catalog.h" -#include "common/config.h" -#include "common/macros.h" -#include "concurrency/transaction_manager.h" -#include "fmt/core.h" -#include "storage/table/table_heap.h" -#include "type/value.h" -#include "type/value_factory.h" - -namespace bustub { - -auto ReconstructTuple(const Schema *schema, const Tuple &base_tuple, const TupleMeta &base_meta, - const std::vector &undo_logs) -> std::optional { - UNIMPLEMENTED("not implemented"); -} - -void TxnMgrDbg(const std::string &info, TransactionManager *txn_mgr, const TableInfo *table_info, - TableHeap *table_heap) { - // always use stderr for printing logs... - fmt::println(stderr, "debug_hook: {}", info); - - fmt::println( - stderr, - "You see this line of text because you have not implemented `TxnMgrDbg`. You should do this once you have " - "finished task 2. Implementing this helper function will save you a lot of time for debugging in later tasks."); - - // We recommend implementing this function as traversing the table heap and print the version chain. An example output - // of our reference solution: - // - // debug_hook: before verify scan - // RID=0/0 ts=txn8 tuple=(1, , ) - // txn8@0 (2, _, _) ts=1 - // RID=0/1 ts=3 tuple=(3, , ) - // txn5@0 ts=2 - // txn3@0 (4, , ) ts=1 - // RID=0/2 ts=4 tuple=(, , ) - // txn7@0 (5, , ) ts=3 - // RID=0/3 ts=txn6 tuple=(, , ) - // txn6@0 (6, , ) ts=2 - // txn3@1 (7, _, _) ts=1 -} - -} // namespace bustub diff --git a/src/execution/executor_factory.cpp b/src/execution/executor_factory.cpp index dd98b4f..be37e4e 100644 --- a/src/execution/executor_factory.cpp +++ b/src/execution/executor_factory.cpp @@ -21,7 +21,6 @@ #include "execution/executors/filter_executor.h" #include "execution/executors/hash_join_executor.h" #include "execution/executors/index_scan_executor.h" -#include "execution/executors/init_check_executor.h" #include "execution/executors/insert_executor.h" #include "execution/executors/limit_executor.h" #include "execution/executors/mock_scan_executor.h" @@ -30,27 +29,21 @@ #include "execution/executors/projection_executor.h" #include "execution/executors/seq_scan_executor.h" #include "execution/executors/sort_executor.h" -#include "execution/executors/topn_check_executor.h" #include "execution/executors/topn_executor.h" -#include "execution/executors/topn_per_group_executor.h" #include "execution/executors/update_executor.h" #include "execution/executors/values_executor.h" -#include "execution/executors/window_function_executor.h" #include "execution/plans/filter_plan.h" #include "execution/plans/mock_scan_plan.h" #include "execution/plans/projection_plan.h" #include "execution/plans/sort_plan.h" -#include "execution/plans/topn_per_group_plan.h" #include "execution/plans/topn_plan.h" #include "execution/plans/values_plan.h" -#include "execution/plans/window_plan.h" #include "storage/index/generic_key.h" namespace bustub { auto ExecutorFactory::CreateExecutor(ExecutorContext *exec_ctx, const AbstractPlanNodeRef &plan) -> std::unique_ptr { - auto check_options_set = exec_ctx->GetCheckOptions()->check_options_set_; switch (plan->GetType()) { // Create a new sequential scan executor case PlanType::SeqScan: { @@ -97,26 +90,11 @@ auto ExecutorFactory::CreateExecutor(ExecutorContext *exec_ctx, const AbstractPl return std::make_unique(exec_ctx, agg_plan, std::move(child_executor)); } - case PlanType::Window: { - auto window_plan = dynamic_cast(plan.get()); - auto child_executor = ExecutorFactory::CreateExecutor(exec_ctx, window_plan->GetChildPlan()); - return std::make_unique(exec_ctx, window_plan, std::move(child_executor)); - } - // Create a new nested-loop join executor case PlanType::NestedLoopJoin: { auto nested_loop_join_plan = dynamic_cast(plan.get()); auto left = ExecutorFactory::CreateExecutor(exec_ctx, nested_loop_join_plan->GetLeftPlan()); auto right = ExecutorFactory::CreateExecutor(exec_ctx, nested_loop_join_plan->GetRightPlan()); - if (check_options_set.find(CheckOption::ENABLE_NLJ_CHECK) != check_options_set.end()) { - auto left_check = - std::make_unique(exec_ctx, nested_loop_join_plan->GetLeftPlan(), std::move(left)); - auto right_check = - std::make_unique(exec_ctx, nested_loop_join_plan->GetRightPlan(), std::move(right)); - exec_ctx->AddCheckExecutor(left_check.get(), right_check.get()); - return std::make_unique(exec_ctx, nested_loop_join_plan, std::move(left_check), - std::move(right_check)); - } return std::make_unique(exec_ctx, nested_loop_join_plan, std::move(left), std::move(right)); } @@ -173,22 +151,9 @@ auto ExecutorFactory::CreateExecutor(ExecutorContext *exec_ctx, const AbstractPl case PlanType::TopN: { const auto *topn_plan = dynamic_cast(plan.get()); auto child = ExecutorFactory::CreateExecutor(exec_ctx, topn_plan->GetChildPlan()); - if (check_options_set.find(CheckOption::ENABLE_TOPN_CHECK) != check_options_set.end()) { - auto topn_executor = std::make_unique(exec_ctx, topn_plan, nullptr); - auto check = std::make_unique(exec_ctx, topn_plan, std::move(child), topn_executor.get()); - topn_executor->SetChildExecutor(std::move(check)); - return topn_executor; - } return std::make_unique(exec_ctx, topn_plan, std::move(child)); } - // Create a new groupTopN executor - case PlanType::TopNPerGroup: { - const auto *group_topn_plan = dynamic_cast(plan.get()); - auto child = ExecutorFactory::CreateExecutor(exec_ctx, group_topn_plan->GetChildPlan()); - return std::make_unique(exec_ctx, group_topn_plan, std::move(child)); - } - default: UNREACHABLE("Unsupported plan type."); } diff --git a/src/execution/fmt_impl.cpp b/src/execution/fmt_impl.cpp index ea3eb0b..5643857 100644 --- a/src/execution/fmt_impl.cpp +++ b/src/execution/fmt_impl.cpp @@ -1,7 +1,5 @@ #include -#include "execution/expressions/column_value_expression.h" #include "execution/plans/update_plan.h" -#include "execution/plans/window_plan.h" #include "fmt/format.h" #include "fmt/ranges.h" @@ -9,7 +7,6 @@ #include "execution/expressions/abstract_expression.h" #include "execution/plans/abstract_plan.h" #include "execution/plans/aggregation_plan.h" -#include "execution/plans/hash_join_plan.h" #include "execution/plans/limit_plan.h" #include "execution/plans/projection_plan.h" #include "execution/plans/sort_plan.h" @@ -38,31 +35,6 @@ auto AggregationPlanNode::PlanNodeToString() const -> std::string { return fmt::format("Agg {{ types={}, aggregates={}, group_by={} }}", agg_types_, aggregates_, group_bys_); } -auto WindowFunctionPlanNode::PlanNodeToString() const -> std::string { - std::string columns_str; - for (const auto &col : columns_) { - const auto &col_val = dynamic_cast(*col); - if (col_val.GetColIdx() == static_cast(-1)) { - columns_str += "placeholder, "; - continue; - } - columns_str += col->ToString() + ", "; - } - - std::vector map_content; - map_content.reserve(window_functions_.size()); - for (const auto &[k, v] : window_functions_) { - map_content.emplace_back(fmt::format(" {}=>{}", k, v)); - } - return fmt::format("WindowFunc {{\n columns={},\n window_functions={{\n{}\n }}\n}}", columns_str, - fmt::join(map_content, ",\n")); -} - -auto HashJoinPlanNode::PlanNodeToString() const -> std::string { - return fmt::format("HashJoin {{ type={}, left_key={}, right_key={} }}", join_type_, left_key_expressions_, - right_key_expressions_); -} - auto ProjectionPlanNode::PlanNodeToString() const -> std::string { return fmt::format("Projection {{ exprs={} }}", expressions_); } diff --git a/src/execution/hash_join_executor.cpp b/src/execution/hash_join_executor.cpp index 032bcef..b330a7d 100644 --- a/src/execution/hash_join_executor.cpp +++ b/src/execution/hash_join_executor.cpp @@ -11,21 +11,81 @@ //===----------------------------------------------------------------------===// #include "execution/executors/hash_join_executor.h" +#include "type/value_factory.h" + +// Note for 2022 Fall: You don't need to implement HashJoinExecutor to pass all tests. You ONLY need to implement it +// if you want to get faster in leaderboard tests. namespace bustub { HashJoinExecutor::HashJoinExecutor(ExecutorContext *exec_ctx, const HashJoinPlanNode *plan, std::unique_ptr &&left_child, std::unique_ptr &&right_child) - : AbstractExecutor(exec_ctx) { + : AbstractExecutor(exec_ctx), + plan_{plan}, + left_executor_{std::move(left_child)}, + right_executor_(std::move(right_child)) { if (!(plan->GetJoinType() == JoinType::LEFT || plan->GetJoinType() == JoinType::INNER)) { - // Note for 2023 Fall: You ONLY need to implement left join and inner join. + // Note for 2022 Fall: You ONLY need to implement left join and inner join. throw bustub::NotImplementedException(fmt::format("join type {} not supported", plan->GetJoinType())); } } -void HashJoinExecutor::Init() { throw NotImplementedException("HashJoinExecutor is not implemented"); } +void HashJoinExecutor::Init() { + left_executor_->Init(); + right_executor_->Init(); + + Tuple tmp_tuple{}; + RID rid; + while (right_executor_->Next(&tmp_tuple, &rid)) { + auto join_key = plan_->RightJoinKeyExpression().Evaluate(&tmp_tuple, plan_->GetRightPlan()->OutputSchema()); + hash_join_table_[HashUtil::HashValue(&join_key)].push_back(tmp_tuple); + } + + while (left_executor_->Next(&tmp_tuple, &rid)) { + auto join_key = plan_->LeftJoinKeyExpression().Evaluate(&tmp_tuple, plan_->GetLeftPlan()->OutputSchema()); + if (hash_join_table_.count(HashUtil::HashValue(&join_key)) > 0) { + auto right_tuples = hash_join_table_[HashUtil::HashValue(&join_key)]; + for (const auto &tuple : right_tuples) { + auto right_join_key = plan_->RightJoinKeyExpression().Evaluate(&tuple, plan_->GetRightPlan()->OutputSchema()); + if (right_join_key.CompareEquals(join_key) == CmpBool::CmpTrue) { + std::vector values{}; + values.reserve(plan_->GetLeftPlan()->OutputSchema().GetColumnCount() + + plan_->GetRightPlan()->OutputSchema().GetColumnCount()); + for (uint32_t col_idx = 0; col_idx < plan_->GetLeftPlan()->OutputSchema().GetColumnCount(); col_idx++) { + values.push_back(tmp_tuple.GetValue(&plan_->GetLeftPlan()->OutputSchema(), col_idx)); + } + for (uint32_t col_idx = 0; col_idx < plan_->GetRightPlan()->OutputSchema().GetColumnCount(); col_idx++) { + values.push_back(tuple.GetValue(&plan_->GetRightPlan()->OutputSchema(), col_idx)); + } + output_tuples_.emplace_back(values, &GetOutputSchema()); + } + } + } else if (plan_->GetJoinType() == JoinType::LEFT) { + std::vector values{}; + values.reserve(plan_->GetLeftPlan()->OutputSchema().GetColumnCount() + + plan_->GetRightPlan()->OutputSchema().GetColumnCount()); + for (uint32_t col_idx = 0; col_idx < plan_->GetLeftPlan()->OutputSchema().GetColumnCount(); col_idx++) { + values.push_back(tmp_tuple.GetValue(&plan_->GetLeftPlan()->OutputSchema(), col_idx)); + } + for (uint32_t col_idx = 0; col_idx < plan_->GetRightPlan()->OutputSchema().GetColumnCount(); col_idx++) { + values.push_back( + ValueFactory::GetNullValueByType(plan_->GetRightPlan()->OutputSchema().GetColumn(col_idx).GetType())); + } + output_tuples_.emplace_back(values, &GetOutputSchema()); + } + } -auto HashJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } + output_tuples_iter_ = output_tuples_.cbegin(); +} + +auto HashJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { + if (output_tuples_iter_ == output_tuples_.cend()) { + return false; + } + *tuple = *output_tuples_iter_; + ++output_tuples_iter_; + return true; +} } // namespace bustub diff --git a/src/execution/index_scan_executor.cpp b/src/execution/index_scan_executor.cpp index d0945f9..08cf1e4 100644 --- a/src/execution/index_scan_executor.cpp +++ b/src/execution/index_scan_executor.cpp @@ -10,13 +10,69 @@ // //===----------------------------------------------------------------------===// #include "execution/executors/index_scan_executor.h" +#include "execution/expressions/constant_value_expression.h" namespace bustub { IndexScanExecutor::IndexScanExecutor(ExecutorContext *exec_ctx, const IndexScanPlanNode *plan) - : AbstractExecutor(exec_ctx) {} + : AbstractExecutor(exec_ctx), + plan_{plan}, + index_info_{this->exec_ctx_->GetCatalog()->GetIndex(plan_->index_oid_)}, + table_info_{this->exec_ctx_->GetCatalog()->GetTable(index_info_->table_name_)}, + tree_{dynamic_cast(index_info_->index_.get())}, + iter_{plan_->filter_predicate_ != nullptr ? BPlusTreeIndexIteratorForOneIntegerColumn(nullptr, nullptr) + : tree_->GetBeginIterator()} {} -void IndexScanExecutor::Init() { throw NotImplementedException("IndexScanExecutor is not implemented"); } +void IndexScanExecutor::Init() { + if (plan_->filter_predicate_ != nullptr) { + if (exec_ctx_->GetTransaction()->GetIsolationLevel() != IsolationLevel::READ_UNCOMMITTED) { + try { + bool is_locked = exec_ctx_->GetLockManager()->LockTable( + exec_ctx_->GetTransaction(), LockManager::LockMode::INTENTION_SHARED, table_info_->oid_); + if (!is_locked) { + throw ExecutionException("IndexScan Executor Get Table Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("IndexScan Executor Get Table Lock Failed" + e.GetInfo()); + } + } + const auto *right_expr = + dynamic_cast(plan_->filter_predicate_->children_[1].get()); + Value v = right_expr->val_; + tree_->ScanKey(Tuple{{v}, index_info_->index_->GetKeySchema()}, &rids_, exec_ctx_->GetTransaction()); + rid_iter_ = rids_.begin(); + } +} -auto IndexScanExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto IndexScanExecutor::Next(Tuple *tuple, RID *rid) -> bool { + if (plan_->filter_predicate_ != nullptr) { + if (rid_iter_ != rids_.end()) { + *rid = *rid_iter_; + if (exec_ctx_->GetTransaction()->GetIsolationLevel() != IsolationLevel::READ_UNCOMMITTED) { + try { + bool is_locked = exec_ctx_->GetLockManager()->LockRow(exec_ctx_->GetTransaction(), + LockManager::LockMode::SHARED, table_info_->oid_, *rid); + if (!is_locked) { + throw ExecutionException("IndexScan Executor Get Table Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("IndexScan Executor Get Row Lock Failed"); + } + } + + auto result = table_info_->table_->GetTuple(*rid, tuple, exec_ctx_->GetTransaction()); + rid_iter_++; + return result; + } + return false; + } + if (iter_ == tree_->GetEndIterator()) { + return false; + } + *rid = (*iter_).second; + auto result = table_info_->table_->GetTuple(*rid, tuple, exec_ctx_->GetTransaction()); + ++iter_; + + return result; +} } // namespace bustub diff --git a/src/execution/init_check_executor.cpp b/src/execution/init_check_executor.cpp deleted file mode 100644 index c408bb8..0000000 --- a/src/execution/init_check_executor.cpp +++ /dev/null @@ -1,40 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// init_check_executor.cpp -// -// Identification: src/execution/init_check_executor.cpp -// -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "execution/executors/init_check_executor.h" -#include "execution/plans/abstract_plan.h" - -namespace bustub { - -InitCheckExecutor::InitCheckExecutor(ExecutorContext *exec_ctx, AbstractPlanNodeRef plan, - std::unique_ptr &&child_executor) - : AbstractExecutor{exec_ctx}, plan_{std::move(plan)}, child_executor_{std::move(child_executor)} {} - -void InitCheckExecutor::Init() { - if (!child_executor_) { - return; - } - n_init_++; - // Initialize the child executor - child_executor_->Init(); -} - -auto InitCheckExecutor::Next(Tuple *tuple, RID *rid) -> bool { - // Emit the next tuple - auto result = child_executor_->Next(tuple, rid); - if (result) { - n_next_++; - } - return result; -} - -} // namespace bustub diff --git a/src/execution/insert_executor.cpp b/src/execution/insert_executor.cpp index 9bf1f7a..6753fda 100644 --- a/src/execution/insert_executor.cpp +++ b/src/execution/insert_executor.cpp @@ -18,10 +18,61 @@ namespace bustub { InsertExecutor::InsertExecutor(ExecutorContext *exec_ctx, const InsertPlanNode *plan, std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx) {} + : AbstractExecutor(exec_ctx), plan_{plan}, child_executor_{std::move(child_executor)} { + this->table_info_ = this->exec_ctx_->GetCatalog()->GetTable(plan_->table_oid_); +} -void InsertExecutor::Init() { throw NotImplementedException("InsertExecutor is not implemented"); } +void InsertExecutor::Init() { + child_executor_->Init(); + try { + bool is_locked = exec_ctx_->GetLockManager()->LockTable( + exec_ctx_->GetTransaction(), LockManager::LockMode::INTENTION_EXCLUSIVE, table_info_->oid_); + if (!is_locked) { + throw ExecutionException("Insert Executor Get Table Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("Insert Executor Get Table Lock Failed"); + } + table_indexes_ = exec_ctx_->GetCatalog()->GetTableIndexes(table_info_->name_); +} -auto InsertExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { return false; } +auto InsertExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { + if (is_end_) { + return false; + } + Tuple to_insert_tuple{}; + RID emit_rid; + int32_t insert_count = 0; + + while (child_executor_->Next(&to_insert_tuple, &emit_rid)) { + bool inserted = table_info_->table_->InsertTuple(to_insert_tuple, rid, exec_ctx_->GetTransaction()); + + if (inserted) { + try { + bool is_locked = exec_ctx_->GetLockManager()->LockRow( + exec_ctx_->GetTransaction(), LockManager::LockMode::EXCLUSIVE, table_info_->oid_, *rid); + if (!is_locked) { + throw ExecutionException("Insert Executor Get Row Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("Insert Executor Get Row Lock Failed"); + } + + std::for_each(table_indexes_.begin(), table_indexes_.end(), + [&to_insert_tuple, &rid, &table_info = table_info_, &exec_ctx = exec_ctx_](IndexInfo *index) { + index->index_->InsertEntry(to_insert_tuple.KeyFromTuple(table_info->schema_, index->key_schema_, + index->index_->GetKeyAttrs()), + *rid, exec_ctx->GetTransaction()); + }); + insert_count++; + } + } + std::vector values{}; + values.reserve(GetOutputSchema().GetColumnCount()); + values.emplace_back(TypeId::INTEGER, insert_count); + *tuple = Tuple{values, &GetOutputSchema()}; + is_end_ = true; + return true; +} } // namespace bustub diff --git a/src/execution/limit_executor.cpp b/src/execution/limit_executor.cpp index 2830b38..8877d6b 100644 --- a/src/execution/limit_executor.cpp +++ b/src/execution/limit_executor.cpp @@ -16,10 +16,24 @@ namespace bustub { LimitExecutor::LimitExecutor(ExecutorContext *exec_ctx, const LimitPlanNode *plan, std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx) {} + : AbstractExecutor(exec_ctx), plan_{plan}, child_executor_{std::move(child_executor)} {} -void LimitExecutor::Init() { throw NotImplementedException("LimitExecutor is not implemented"); } +void LimitExecutor::Init() { + child_executor_->Init(); + count_ = 0; +} -auto LimitExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto LimitExecutor::Next(Tuple *tuple, RID *rid) -> bool { + if (count_ >= plan_->GetLimit()) { + return false; + } + + if (!child_executor_->Next(tuple, rid)) { + return false; + } + + count_++; + return true; +} } // namespace bustub diff --git a/src/execution/mock_scan_executor.cpp b/src/execution/mock_scan_executor.cpp index 306de5e..8816586 100644 --- a/src/execution/mock_scan_executor.cpp +++ b/src/execution/mock_scan_executor.cpp @@ -26,34 +26,22 @@ static const char *ta_list_2022[] = {"amstqq", "durovo", "joyceliaoo", "kush789", "lmwnshn", "mkpjnx", "skyzh", "thepinetree", "timlee0119", "yliang412"}; -static const char *ta_list_2023[] = {"abigalekim", "arvinwu168", "christopherlim98", "David-Lyons", "fanyuex2", - "Mayank-Baranwal", "skyzh", "yarkhinephyo", "yliang412"}; - -static const char *ta_list_2023_fall[] = {"skyzh", "yliang412", "fernandolis10", "wiam8", - "anurag-23", "Mayank-Baranwal", "abigalekim", "ChaosZhai", - "aoleizhou", "averyqi115", "kswim8"}; - static const char *ta_oh_2022[] = {"Tuesday", "Wednesday", "Monday", "Wednesday", "Thursday", "Friday", "Wednesday", "Randomly", "Tuesday", "Monday", "Tuesday"}; -static const char *ta_oh_2023[] = {"Friday", "Thursday", "Tuesday", "Monday", "Tuesday", - "Tuesday", "Randomly", "Wednesday", "Thursday"}; - -static const char *ta_oh_2023_fall[] = {"Randomly", "Tuesday", "Wednesday", "Tuesday", "Thursday", "Tuesday", - "Friday", "Yesterday", "Friday", "Friday", "Never"}; - static const char *course_on_date[] = {"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"}; +static int course_on_bool[] = {0, 1, 0, 1, 0, 1, 1}; + const char *mock_table_list[] = {"__mock_table_1", "__mock_table_2", "__mock_table_3", "__mock_table_tas_2022", - "__mock_table_tas_2023", "__mock_table_tas_2023_fall", "__mock_agg_input_small", - "__mock_agg_input_big", "__mock_table_schedule_2022", "__mock_table_schedule_2023", + "__mock_agg_input_small", "__mock_agg_input_big", "__mock_table_schedule_2022", "__mock_table_123", "__mock_graph", // For leaderboard Q1 - "__mock_t1", + "__mock_t1_50k", "__mock_t2_100k", "__mock_t3_1k", // For leaderboard Q2 "__mock_t4_1m", "__mock_t5_1m", "__mock_t6_1m", // For leaderboard Q3 - "__mock_t7", "__mock_t8", "__mock_t9", nullptr}; + "__mock_t7", "__mock_t8", nullptr}; static const int GRAPH_NODE_CNT = 10; @@ -74,22 +62,10 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema { return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}}; } - if (table == "__mock_table_tas_2023") { - return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}}; - } - - if (table == "__mock_table_tas_2023_fall") { - return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}}; - } - if (table == "__mock_table_schedule_2022") { return Schema{std::vector{Column{"day_of_week", TypeId::VARCHAR, 128}, Column{"has_lecture", TypeId::INTEGER}}}; } - if (table == "__mock_table_schedule_2023") { - return Schema{std::vector{Column{"day_of_week", TypeId::VARCHAR, 128}, Column{"has_lecture", TypeId::INTEGER}}}; - } - if (table == "__mock_agg_input_small" || table == "__mock_agg_input_big") { return Schema{std::vector{Column{"v1", TypeId::INTEGER}, Column{"v2", TypeId::INTEGER}, Column{"v3", TypeId::INTEGER}, Column{"v4", TypeId::INTEGER}, @@ -106,15 +82,11 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema { return Schema{std::vector{Column{"number", TypeId::INTEGER}}}; } - if (table == "__mock_t4_1m" || table == "__mock_t5_1m" || table == "__mock_t6_1m") { + if (table == "__mock_t1_50k" || table == "__mock_t2_100k" || table == "__mock_t3_1k" || table == "__mock_t4_1m" || + table == "__mock_t5_1m" || table == "__mock_t6_1m") { return Schema{std::vector{Column{"x", TypeId::INTEGER}, Column{"y", TypeId::INTEGER}}}; } - if (table == "__mock_t1") { - return Schema{ - std::vector{Column{"x", TypeId::INTEGER}, Column{"y", TypeId::INTEGER}, Column{"z", TypeId::INTEGER}}}; - } - if (table == "__mock_t7") { return Schema{ std::vector{Column{"v", TypeId::INTEGER}, Column{"v1", TypeId::INTEGER}, Column{"v2", TypeId::INTEGER}}}; @@ -124,10 +96,6 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema { return Schema{std::vector{Column{"v4", TypeId::INTEGER}}}; } - if (table == "__mock_t9") { - return Schema{std::vector{Column{"x", TypeId::INTEGER}, Column{"y", TypeId::INTEGER}}}; - } - throw bustub::Exception(fmt::format("mock table {} not found", table)); } @@ -150,22 +118,10 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t { return sizeof(ta_list_2022) / sizeof(ta_list_2022[0]); } - if (table == "__mock_table_tas_2023") { - return sizeof(ta_list_2023) / sizeof(ta_list_2023[0]); - } - - if (table == "__mock_table_tas_2023_fall") { - return sizeof(ta_list_2023_fall) / sizeof(ta_list_2023_fall[0]); - } - if (table == "__mock_table_schedule_2022") { return sizeof(course_on_date) / sizeof(course_on_date[0]); } - if (table == "__mock_table_schedule_2023") { - return sizeof(course_on_date) / sizeof(course_on_date[0]); - } - if (table == "__mock_agg_input_small") { return 1000; } @@ -182,8 +138,16 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t { return 3; } - if (table == "__mock_t1") { - return 1000000; + if (table == "__mock_t1_50k") { + return 50000; + } + + if (table == "__mock_t2_100k") { + return 100000; + } + + if (table == "__mock_t3_1k") { + return 1000; } if (table == "__mock_t4_1m" || table == "__mock_t5_1m" || table == "__mock_t6_1m") { @@ -198,17 +162,13 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t { return 10; } - if (table == "__mock_t9") { - return 10000000; - } - return 0; } auto GetShuffled(const MockScanPlanNode *plan) -> bool { const auto &table = plan->GetTable(); - if (table == "__mock_t1") { + if (table == "__mock_t1_50k") { return true; } @@ -270,38 +230,11 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function }; } - if (table == "__mock_table_tas_2023") { - return [plan](size_t cursor) { - std::vector values{}; - values.push_back(ValueFactory::GetVarcharValue(ta_list_2023[cursor])); - values.push_back(ValueFactory::GetVarcharValue(ta_oh_2023[cursor])); - return Tuple{values, &plan->OutputSchema()}; - }; - } - - if (table == "__mock_table_tas_2023_fall") { - return [plan](size_t cursor) { - std::vector values{}; - values.push_back(ValueFactory::GetVarcharValue(ta_list_2023_fall[cursor])); - values.push_back(ValueFactory::GetVarcharValue(ta_oh_2023_fall[cursor])); - return Tuple{values, &plan->OutputSchema()}; - }; - } - if (table == "__mock_table_schedule_2022") { return [plan](size_t cursor) { std::vector values{}; values.push_back(ValueFactory::GetVarcharValue(course_on_date[cursor])); - values.push_back(ValueFactory::GetIntegerValue(cursor == 1 || cursor == 3 ? 1 : 0)); - return Tuple{values, &plan->OutputSchema()}; - }; - } - - if (table == "__mock_table_schedule_2023") { - return [plan](size_t cursor) { - std::vector values{}; - values.push_back(ValueFactory::GetVarcharValue(course_on_date[cursor])); - values.push_back(ValueFactory::GetIntegerValue(cursor == 0 || cursor == 2 ? 1 : 0)); + values.push_back(ValueFactory::GetIntegerValue(course_on_bool[cursor])); return Tuple{values, &plan->OutputSchema()}; }; } @@ -360,12 +293,29 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function }; } - if (table == "__mock_t1") { + if (table == "__mock_t1_50k") { + return [plan](size_t cursor) { + std::vector values{}; + values.push_back(ValueFactory::GetIntegerValue(cursor * 10)); + values.push_back(ValueFactory::GetIntegerValue(cursor * 1000)); + return Tuple{values, &plan->OutputSchema()}; + }; + } + + if (table == "__mock_t2_100k") { return [plan](size_t cursor) { std::vector values{}; - values.push_back(ValueFactory::GetIntegerValue(cursor / 10000)); - values.push_back(ValueFactory::GetIntegerValue(cursor % 10000)); values.push_back(ValueFactory::GetIntegerValue(cursor)); + values.push_back(ValueFactory::GetIntegerValue(cursor * 100)); + return Tuple{values, &plan->OutputSchema()}; + }; + } + + if (table == "__mock_t3_1k") { + return [plan](size_t cursor) { + std::vector values{}; + values.push_back(ValueFactory::GetIntegerValue(cursor * 100)); + values.push_back(ValueFactory::GetIntegerValue(cursor * 10000)); return Tuple{values, &plan->OutputSchema()}; }; } @@ -418,16 +368,6 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function }; } - if (table == "__mock_t9") { - return [plan](size_t cursor) { - std::vector values{}; - values.push_back(ValueFactory::GetIntegerValue(cursor / 10000)); - values.push_back( - ValueFactory::GetIntegerValue(10000000 - (cursor / 2 + ((cursor / 10000) % 2) * ((cursor / 2) % 2)))); - return Tuple{values, &plan->OutputSchema()}; - }; - } - // By default, return table of all 0. return [plan](size_t cursor) { std::vector values{}; diff --git a/src/execution/nested_index_join_executor.cpp b/src/execution/nested_index_join_executor.cpp index 475c19e..5b7005f 100644 --- a/src/execution/nested_index_join_executor.cpp +++ b/src/execution/nested_index_join_executor.cpp @@ -11,20 +11,59 @@ //===----------------------------------------------------------------------===// #include "execution/executors/nested_index_join_executor.h" +#include "type/value_factory.h" namespace bustub { NestIndexJoinExecutor::NestIndexJoinExecutor(ExecutorContext *exec_ctx, const NestedIndexJoinPlanNode *plan, std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx) { + : AbstractExecutor(exec_ctx), + plan_{plan}, + child_(std::move(child_executor)), + index_info_{this->exec_ctx_->GetCatalog()->GetIndex(plan_->index_oid_)}, + table_info_{this->exec_ctx_->GetCatalog()->GetTable(index_info_->table_name_)}, + tree_{dynamic_cast(index_info_->index_.get())} { if (!(plan->GetJoinType() == JoinType::LEFT || plan->GetJoinType() == JoinType::INNER)) { - // Note for 2023 Spring: You ONLY need to implement left join and inner join. + // Note for 2022 Fall: You ONLY need to implement left join and inner join. throw bustub::NotImplementedException(fmt::format("join type {} not supported", plan->GetJoinType())); } } -void NestIndexJoinExecutor::Init() { throw NotImplementedException("NestIndexJoinExecutor is not implemented"); } +void NestIndexJoinExecutor::Init() { child_->Init(); } -auto NestIndexJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto NestIndexJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { + Tuple left_tuple{}; + RID emit_rid{}; + std::vector vals; + while (child_->Next(&left_tuple, &emit_rid)) { + Value value = plan_->KeyPredicate()->Evaluate(&left_tuple, child_->GetOutputSchema()); + std::vector rids; + tree_->ScanKey(Tuple{{value}, index_info_->index_->GetKeySchema()}, &rids, exec_ctx_->GetTransaction()); + + Tuple right_tuple{}; + if (!rids.empty()) { + table_info_->table_->GetTuple(rids[0], &right_tuple, exec_ctx_->GetTransaction()); + for (uint32_t idx = 0; idx < child_->GetOutputSchema().GetColumnCount(); idx++) { + vals.push_back(left_tuple.GetValue(&child_->GetOutputSchema(), idx)); + } + for (uint32_t idx = 0; idx < plan_->InnerTableSchema().GetColumnCount(); idx++) { + vals.push_back(right_tuple.GetValue(&plan_->InnerTableSchema(), idx)); + } + *tuple = Tuple(vals, &GetOutputSchema()); + return true; + } + if (plan_->GetJoinType() == JoinType::LEFT) { + for (uint32_t idx = 0; idx < child_->GetOutputSchema().GetColumnCount(); idx++) { + vals.push_back(left_tuple.GetValue(&child_->GetOutputSchema(), idx)); + } + for (uint32_t idx = 0; idx < plan_->InnerTableSchema().GetColumnCount(); idx++) { + vals.push_back(ValueFactory::GetNullValueByType(plan_->InnerTableSchema().GetColumn(idx).GetType())); + } + *tuple = Tuple(vals, &GetOutputSchema()); + return true; + } + } + return false; +} } // namespace bustub diff --git a/src/execution/nested_loop_join_executor.cpp b/src/execution/nested_loop_join_executor.cpp index 9ef553d..3661e56 100644 --- a/src/execution/nested_loop_join_executor.cpp +++ b/src/execution/nested_loop_join_executor.cpp @@ -13,21 +13,71 @@ #include "execution/executors/nested_loop_join_executor.h" #include "binder/table_ref/bound_join_ref.h" #include "common/exception.h" +#include "type/value_factory.h" namespace bustub { NestedLoopJoinExecutor::NestedLoopJoinExecutor(ExecutorContext *exec_ctx, const NestedLoopJoinPlanNode *plan, std::unique_ptr &&left_executor, std::unique_ptr &&right_executor) - : AbstractExecutor(exec_ctx) { + : AbstractExecutor(exec_ctx), + plan_{plan}, + left_executor_(std::move(left_executor)), + right_executor_(std::move(right_executor)) { if (!(plan->GetJoinType() == JoinType::LEFT || plan->GetJoinType() == JoinType::INNER)) { - // Note for 2023 Fall: You ONLY need to implement left join and inner join. + // Note for 2022 Fall: You ONLY need to implement left join and inner join. throw bustub::NotImplementedException(fmt::format("join type {} not supported", plan->GetJoinType())); } } -void NestedLoopJoinExecutor::Init() { throw NotImplementedException("NestedLoopJoinExecutor is not implemented"); } +void NestedLoopJoinExecutor::Init() { + left_executor_->Init(); + right_executor_->Init(); + Tuple tuple{}; + RID rid{}; + while (right_executor_->Next(&tuple, &rid)) { + right_tuples_.push_back(tuple); + } +} -auto NestedLoopJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto NestedLoopJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { + RID emit_rid{}; + while (right_tuple_idx_ >= 0 || left_executor_->Next(&left_tuple_, &emit_rid)) { + std::vector vals; + for (uint32_t ridx = (right_tuple_idx_ < 0 ? 0 : right_tuple_idx_); ridx < right_tuples_.size(); ridx++) { + auto &right_tuple = right_tuples_[ridx]; + if (Matched(&left_tuple_, &right_tuple)) { + for (uint32_t idx = 0; idx < left_executor_->GetOutputSchema().GetColumnCount(); idx++) { + vals.push_back(left_tuple_.GetValue(&left_executor_->GetOutputSchema(), idx)); + } + for (uint32_t idx = 0; idx < right_executor_->GetOutputSchema().GetColumnCount(); idx++) { + vals.push_back(right_tuple.GetValue(&right_executor_->GetOutputSchema(), idx)); + } + *tuple = Tuple(vals, &GetOutputSchema()); + right_tuple_idx_ = ridx + 1; + return true; + } + } + if (right_tuple_idx_ == -1 && plan_->GetJoinType() == JoinType::LEFT) { + for (uint32_t idx = 0; idx < left_executor_->GetOutputSchema().GetColumnCount(); idx++) { + vals.push_back(left_tuple_.GetValue(&left_executor_->GetOutputSchema(), idx)); + } + for (uint32_t idx = 0; idx < right_executor_->GetOutputSchema().GetColumnCount(); idx++) { + vals.push_back(ValueFactory::GetNullValueByType(right_executor_->GetOutputSchema().GetColumn(idx).GetType())); + } + *tuple = Tuple(vals, &GetOutputSchema()); + return true; + } + right_tuple_idx_ = -1; + } + return false; +} + +auto NestedLoopJoinExecutor::Matched(Tuple *left_tuple, Tuple *right_tuple) const -> bool { + auto value = plan_->Predicate().EvaluateJoin(left_tuple, left_executor_->GetOutputSchema(), right_tuple, + right_executor_->GetOutputSchema()); + + return !value.IsNull() && value.GetAs(); +} } // namespace bustub diff --git a/src/execution/plan_node.cpp b/src/execution/plan_node.cpp index ed55226..4a1ab19 100644 --- a/src/execution/plan_node.cpp +++ b/src/execution/plan_node.cpp @@ -10,7 +10,6 @@ #include "execution/plans/nested_loop_join_plan.h" #include "execution/plans/projection_plan.h" #include "execution/plans/seq_scan_plan.h" -#include "execution/plans/window_plan.h" namespace bustub { @@ -18,7 +17,7 @@ auto SeqScanPlanNode::InferScanSchema(const BoundBaseTableRef &table) -> Schema std::vector schema; for (const auto &column : table.schema_.GetColumns()) { auto col_name = fmt::format("{}.{}", table.GetBoundTableName(), column.GetName()); - schema.emplace_back(col_name, column); + schema.emplace_back(Column(col_name, column)); } return Schema(schema); } @@ -38,7 +37,12 @@ auto ProjectionPlanNode::InferProjectionSchema(const std::vector schema; for (const auto &expr : expressions) { auto type_id = expr->GetReturnType(); - schema.emplace_back(expr->GetReturnType().WithColumnName("")); + if (type_id != TypeId::VARCHAR) { + schema.emplace_back("", type_id); + } else { + // TODO(chi): infer the correct VARCHAR length. Maybe it doesn't matter for executors? + schema.emplace_back("", type_id, VARCHAR_DEFAULT_LENGTH); + } } return Schema(schema); } @@ -50,7 +54,7 @@ auto ProjectionPlanNode::RenameSchema(const Schema &schema, const std::vector output; output.reserve(group_bys.size() + aggregates.size()); for (const auto &column : group_bys) { - output.emplace_back(column->GetReturnType().WithColumnName("")); + // TODO(chi): correctly process VARCHAR column + if (column->GetReturnType() == TypeId::VARCHAR) { + output.emplace_back(Column("", column->GetReturnType(), 128)); + } else { + output.emplace_back(Column("", column->GetReturnType())); + } } for (size_t idx = 0; idx < aggregates.size(); idx++) { // TODO(chi): correctly infer agg call return type - output.emplace_back("", TypeId::INTEGER); - } - return Schema(output); -} - -auto WindowFunctionPlanNode::InferWindowSchema(const std::vector &columns) -> Schema { - std::vector output; - output.reserve(columns.size()); - // TODO(avery): correctly infer window call return type - for (const auto &column : columns) { - output.emplace_back(column->GetReturnType().WithColumnName("")); + output.emplace_back(Column("", TypeId::INTEGER)); } return Schema(output); } diff --git a/src/execution/seq_scan_executor.cpp b/src/execution/seq_scan_executor.cpp index 9b32f36..2e1b79d 100644 --- a/src/execution/seq_scan_executor.cpp +++ b/src/execution/seq_scan_executor.cpp @@ -14,10 +14,59 @@ namespace bustub { -SeqScanExecutor::SeqScanExecutor(ExecutorContext *exec_ctx, const SeqScanPlanNode *plan) : AbstractExecutor(exec_ctx) {} +SeqScanExecutor::SeqScanExecutor(ExecutorContext *exec_ctx, const SeqScanPlanNode *plan) + : AbstractExecutor(exec_ctx), plan_(plan) { + this->table_info_ = this->exec_ctx_->GetCatalog()->GetTable(plan_->table_oid_); +} -void SeqScanExecutor::Init() { throw NotImplementedException("SeqScanExecutor is not implemented"); } +void SeqScanExecutor::Init() { + if (exec_ctx_->GetTransaction()->GetIsolationLevel() != IsolationLevel::READ_UNCOMMITTED) { + try { + bool is_locked = exec_ctx_->GetLockManager()->LockTable( + exec_ctx_->GetTransaction(), LockManager::LockMode::INTENTION_SHARED, table_info_->oid_); + if (!is_locked) { + throw ExecutionException("SeqScan Executor Get Table Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("SeqScan Executor Get Table Lock Failed" + e.GetInfo()); + } + } + this->table_iter_ = table_info_->table_->Begin(exec_ctx_->GetTransaction()); +} -auto SeqScanExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto SeqScanExecutor::Next(Tuple *tuple, RID *rid) -> bool { + do { + if (table_iter_ == table_info_->table_->End()) { + if (exec_ctx_->GetTransaction()->GetIsolationLevel() == IsolationLevel::READ_COMMITTED) { + const auto locked_row_set = exec_ctx_->GetTransaction()->GetSharedRowLockSet()->at(table_info_->oid_); + table_oid_t oid = table_info_->oid_; + for (auto rid : locked_row_set) { + exec_ctx_->GetLockManager()->UnlockRow(exec_ctx_->GetTransaction(), oid, rid); + } + + exec_ctx_->GetLockManager()->UnlockTable(exec_ctx_->GetTransaction(), table_info_->oid_); + } + return false; + } + *tuple = *table_iter_; + *rid = tuple->GetRid(); + ++table_iter_; + } while (plan_->filter_predicate_ != nullptr && + !plan_->filter_predicate_->Evaluate(tuple, table_info_->schema_).GetAs()); + + if (exec_ctx_->GetTransaction()->GetIsolationLevel() != IsolationLevel::READ_UNCOMMITTED) { + try { + bool is_locked = exec_ctx_->GetLockManager()->LockRow(exec_ctx_->GetTransaction(), LockManager::LockMode::SHARED, + table_info_->oid_, *rid); + if (!is_locked) { + throw ExecutionException("SeqScan Executor Get Table Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("SeqScan Executor Get Row Lock Failed"); + } + } + + return true; +} } // namespace bustub diff --git a/src/execution/sort_executor.cpp b/src/execution/sort_executor.cpp index 8d9637b..25ac124 100644 --- a/src/execution/sort_executor.cpp +++ b/src/execution/sort_executor.cpp @@ -4,10 +4,59 @@ namespace bustub { SortExecutor::SortExecutor(ExecutorContext *exec_ctx, const SortPlanNode *plan, std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx) {} + : AbstractExecutor(exec_ctx), plan_{plan}, child_{std::move(child_executor)} {} -void SortExecutor::Init() { throw NotImplementedException("SortExecutor is not implemented"); } +void SortExecutor::Init() { + child_->Init(); + Tuple child_tuple{}; + RID child_rid; + while (child_->Next(&child_tuple, &child_rid)) { + child_tuples_.push_back(child_tuple); + } -auto SortExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } + std::sort( + child_tuples_.begin(), child_tuples_.end(), + [order_bys = plan_->order_bys_, schema = child_->GetOutputSchema()](const Tuple &tuple_a, const Tuple &tuple_b) { + for (const auto &order_key : order_bys) { + switch (order_key.first) { + case OrderByType::INVALID: + case OrderByType::DEFAULT: + case OrderByType::ASC: + if (static_cast(order_key.second->Evaluate(&tuple_a, schema) + .CompareLessThan(order_key.second->Evaluate(&tuple_b, schema)))) { + return true; + } else if (static_cast(order_key.second->Evaluate(&tuple_a, schema) + .CompareGreaterThan(order_key.second->Evaluate(&tuple_b, schema)))) { + return false; + } + break; + case OrderByType::DESC: + if (static_cast(order_key.second->Evaluate(&tuple_a, schema) + .CompareGreaterThan(order_key.second->Evaluate(&tuple_b, schema)))) { + return true; + } else if (static_cast(order_key.second->Evaluate(&tuple_a, schema) + .CompareLessThan(order_key.second->Evaluate(&tuple_b, schema)))) { + return false; + } + break; + } + } + return false; + }); + + child_iter_ = child_tuples_.begin(); +} + +auto SortExecutor::Next(Tuple *tuple, RID *rid) -> bool { + if (child_iter_ == child_tuples_.end()) { + return false; + } + + *tuple = *child_iter_; + *rid = tuple->GetRid(); + ++child_iter_; + + return true; +} } // namespace bustub diff --git a/src/execution/topn_check_executor.cpp b/src/execution/topn_check_executor.cpp deleted file mode 100644 index bda7ad5..0000000 --- a/src/execution/topn_check_executor.cpp +++ /dev/null @@ -1,48 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// topn_check_executor.cpp -// -// Identification: src/execution/topn_check_executor.cpp -// -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "execution/executors/topn_check_executor.h" -#include "execution/executors/topn_executor.h" - -namespace bustub { - -TopNCheckExecutor::TopNCheckExecutor(ExecutorContext *exec_ctx, const TopNPlanNode *plan, - std::unique_ptr &&child_executor, TopNExecutor *topn_executor) - : AbstractExecutor{exec_ctx}, - plan_(plan), - child_executor_{std::move(child_executor)}, - topn_executor_(topn_executor) {} - -void TopNCheckExecutor::Init() { - if (!child_executor_) { - return; - } - prev_ = 0; - // Initialize the child executor - child_executor_->Init(); -} - -auto TopNCheckExecutor::Next(Tuple *tuple, RID *rid) -> bool { - if (!child_executor_) { - return EXECUTOR_EXHAUSTED; - } - - BUSTUB_ASSERT(topn_executor_->GetNumInHeap() <= plan_->GetN(), "Cannot store more than N elements"); - if (prev_ > 0 && prev_ < plan_->GetN()) { - BUSTUB_ASSERT(topn_executor_->GetNumInHeap() - prev_ == 1, "Did you implement GetNumInHeap() properly?"); - } - prev_ = topn_executor_->GetNumInHeap(); - // Emit the next tuple - return child_executor_->Next(tuple, rid); -} - -} // namespace bustub diff --git a/src/execution/topn_executor.cpp b/src/execution/topn_executor.cpp index ab7b770..d5eed4c 100644 --- a/src/execution/topn_executor.cpp +++ b/src/execution/topn_executor.cpp @@ -4,12 +4,64 @@ namespace bustub { TopNExecutor::TopNExecutor(ExecutorContext *exec_ctx, const TopNPlanNode *plan, std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx) {} + : AbstractExecutor(exec_ctx), plan_{plan}, child_{std::move(child_executor)} {} -void TopNExecutor::Init() { throw NotImplementedException("TopNExecutor is not implemented"); } +void TopNExecutor::Init() { + child_->Init(); -auto TopNExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } + auto cmp = [order_bys = plan_->order_bys_, schema = child_->GetOutputSchema()](const Tuple &a, const Tuple &b) { + for (const auto &order_key : order_bys) { + switch (order_key.first) { + case OrderByType::INVALID: + case OrderByType::DEFAULT: + case OrderByType::ASC: + if (static_cast( + order_key.second->Evaluate(&a, schema).CompareLessThan(order_key.second->Evaluate(&b, schema)))) { + return true; + } else if (static_cast(order_key.second->Evaluate(&a, schema) + .CompareGreaterThan(order_key.second->Evaluate(&b, schema)))) { + return false; + } + break; + case OrderByType::DESC: + if (static_cast( + order_key.second->Evaluate(&a, schema).CompareGreaterThan(order_key.second->Evaluate(&b, schema)))) { + return true; + } else if (static_cast(order_key.second->Evaluate(&a, schema) + .CompareLessThan(order_key.second->Evaluate(&b, schema)))) { + return false; + } + break; + } + } + return false; + }; -auto TopNExecutor::GetNumInHeap() -> size_t { throw NotImplementedException("TopNExecutor is not implemented"); }; + std::priority_queue, decltype(cmp)> pq(cmp); + Tuple child_tuple{}; + RID child_rid; + while (child_->Next(&child_tuple, &child_rid)) { + pq.push(child_tuple); + if (pq.size() > plan_->GetN()) { + pq.pop(); + } + } + + while (!pq.empty()) { + child_tuples_.push(pq.top()); + pq.pop(); + } +} + +auto TopNExecutor::Next(Tuple *tuple, RID *rid) -> bool { + if (child_tuples_.empty()) { + return false; + } + *tuple = child_tuples_.top(); + *rid = tuple->GetRid(); + child_tuples_.pop(); + + return true; +} } // namespace bustub diff --git a/src/execution/topn_per_group_executor.cpp b/src/execution/topn_per_group_executor.cpp deleted file mode 100644 index 8e058d0..0000000 --- a/src/execution/topn_per_group_executor.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// topn_per_group_executor.cpp -// -// Identification: src/execution/topn_executor.cpp -// -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "execution/executors/topn_per_group_executor.h" - -namespace bustub { - -TopNPerGroupExecutor::TopNPerGroupExecutor(ExecutorContext *exec_ctx, const TopNPerGroupPlanNode *plan, - std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx), plan_(plan), child_executor_(std::move(child_executor)) {} - -void TopNPerGroupExecutor::Init() { throw NotImplementedException("TopNPerGroupExecutor is not implemented"); } - -auto TopNPerGroupExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } - -} // namespace bustub diff --git a/src/execution/update_executor.cpp b/src/execution/update_executor.cpp index bf4bdb6..730c13d 100644 --- a/src/execution/update_executor.cpp +++ b/src/execution/update_executor.cpp @@ -17,12 +17,78 @@ namespace bustub { UpdateExecutor::UpdateExecutor(ExecutorContext *exec_ctx, const UpdatePlanNode *plan, std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx) { + : AbstractExecutor(exec_ctx), plan_{plan}, child_executor_{std::move(child_executor)} { // As of Fall 2022, you DON'T need to implement update executor to have perfect score in project 3 / project 4. + this->table_info_ = this->exec_ctx_->GetCatalog()->GetTable(plan_->table_oid_); } -void UpdateExecutor::Init() { throw NotImplementedException("UpdateExecutor is not implemented"); } +void UpdateExecutor::Init() { + child_executor_->Init(); + try { + bool is_locked = exec_ctx_->GetLockManager()->LockTable( + exec_ctx_->GetTransaction(), LockManager::LockMode::INTENTION_EXCLUSIVE, table_info_->oid_); + if (!is_locked) { + throw ExecutionException("Update Executor Get Table Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("Update Executor Get Table Lock Failed"); + } + table_indexes_ = exec_ctx_->GetCatalog()->GetTableIndexes(table_info_->name_); +} + +auto UpdateExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { + if (is_end_) { + return false; + } + Tuple old_tuple{}; + RID old_rid; + int32_t update_count = 0; + + while (child_executor_->Next(&old_tuple, &old_rid)) { + try { + bool is_locked = exec_ctx_->GetLockManager()->LockRow( + exec_ctx_->GetTransaction(), LockManager::LockMode::EXCLUSIVE, table_info_->oid_, old_rid); + if (!is_locked) { + throw ExecutionException("Update Executor Get Row Lock Failed"); + } + } catch (TransactionAbortException e) { + throw ExecutionException("Update Executor Get Row Lock Failed"); + } + + std::vector values{}; + values.reserve(child_executor_->GetOutputSchema().GetColumnCount()); + for (const auto &expr : plan_->target_expressions_) { + values.push_back(expr->Evaluate(&old_tuple, child_executor_->GetOutputSchema())); + } -auto UpdateExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { return false; } + auto to_update_tuple = Tuple{values, &child_executor_->GetOutputSchema()}; + + bool updated = table_info_->table_->UpdateTuple(to_update_tuple, old_rid, exec_ctx_->GetTransaction()); + + if (updated) { + // std::for_each(table_indexes_.begin(), table_indexes_.end(), + // [&old_tuple, &rid, &table_info = table_info_, &exec_ctx = exec_ctx_](IndexInfo *index) { + // index->index_->DeleteEntry(old_tuple.KeyFromTuple(table_info->schema_, index->key_schema_, + // index->index_->GetKeyAttrs()), + // *rid, exec_ctx->GetTransaction()); + // }); + // std::for_each(table_indexes_.begin(), table_indexes_.end(), + // [&to_update_tuple, &old_rid, &table_info = table_info_, &exec_ctx = exec_ctx_](IndexInfo *index) + // { + // index->index_->InsertEntry(to_update_tuple.KeyFromTuple(table_info->schema_, + // index->key_schema_, + // index->index_->GetKeyAttrs()), + // old_rid, exec_ctx->GetTransaction()); + // }); + update_count++; + } + } + std::vector values{}; + values.reserve(GetOutputSchema().GetColumnCount()); + values.emplace_back(TypeId::INTEGER, update_count); + *tuple = Tuple{values, &GetOutputSchema()}; + is_end_ = true; + return true; +} } // namespace bustub diff --git a/src/execution/window_function_executor.cpp b/src/execution/window_function_executor.cpp deleted file mode 100644 index 768fa3d..0000000 --- a/src/execution/window_function_executor.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include "execution/executors/window_function_executor.h" -#include "execution/plans/window_plan.h" -#include "storage/table/tuple.h" - -namespace bustub { - -WindowFunctionExecutor::WindowFunctionExecutor(ExecutorContext *exec_ctx, const WindowFunctionPlanNode *plan, - std::unique_ptr &&child_executor) - : AbstractExecutor(exec_ctx), plan_(plan), child_executor_(std::move(child_executor)) {} - -void WindowFunctionExecutor::Init() { throw NotImplementedException("WindowFunctionExecutor is not implemented"); } - -auto WindowFunctionExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } -} // namespace bustub diff --git a/src/include/binder/binder.h b/src/include/binder/binder.h index 71de183..ad77a06 100644 --- a/src/include/binder/binder.h +++ b/src/include/binder/binder.h @@ -36,7 +36,6 @@ #include -#include "binder/expressions/bound_window.h" #include "binder/simplified_token.h" #include "binder/statement/select_statement.h" #include "binder/statement/set_show_statement.h" @@ -114,8 +113,6 @@ class Binder { /** Get the std::string representation of a Postgres node tag. */ static auto NodeTagToString(duckdb_libpgquery::PGNodeTag type) -> std::string; - static auto WindowBoundaryToString(WindowBoundary wb) -> std::string; - // The following parts are undocumented. One `BindXXX` functions simply corresponds to a // node type in the Postgres parse tree. @@ -154,12 +151,6 @@ class Binder { auto BindFuncCall(duckdb_libpgquery::PGFuncCall *root) -> std::unique_ptr; - auto BindWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, std::unique_ptr expr) - -> std::unique_ptr; - - auto BindWindowExpression(std::string func_name, std::vector> children, - duckdb_libpgquery::PGWindowDef *node) -> std::unique_ptr; - auto BindAExpr(duckdb_libpgquery::PGAExpr *root) -> std::unique_ptr; auto BindBoolExpr(duckdb_libpgquery::PGBoolExpr *root) -> std::unique_ptr; @@ -213,8 +204,6 @@ class Binder { auto BindVariableShow(duckdb_libpgquery::PGVariableShowStmt *stmt) -> std::unique_ptr; - auto BindTransaction(duckdb_libpgquery::PGTransactionStmt *stmt) -> std::unique_ptr; - class ContextGuard { public: explicit ContextGuard(const BoundTableRef **scope, const CTEList **cte_scope) { diff --git a/src/include/binder/bound_expression.h b/src/include/binder/bound_expression.h index 8d1ab07..52211c0 100644 --- a/src/include/binder/bound_expression.h +++ b/src/include/binder/bound_expression.h @@ -21,8 +21,6 @@ enum class ExpressionType : uint8_t { UNARY_OP = 8, /**< Unary expression type. */ BINARY_OP = 9, /**< Binary expression type. */ ALIAS = 10, /**< Alias expression type. */ - FUNC_CALL = 11, /**< Function call expression type. */ - WINDOW = 12, /**< Window Aggregation expression type. */ }; /** @@ -40,8 +38,6 @@ class BoundExpression { virtual auto HasAggregation() const -> bool { UNREACHABLE("has aggregation should have been implemented!"); } - virtual auto HasWindowFunction() const -> bool { return false; } - /** The type of this expression. */ ExpressionType type_{ExpressionType::INVALID}; }; @@ -102,12 +98,6 @@ struct fmt::formatter : formatter { case bustub::ExpressionType::ALIAS: name = "Alias"; break; - case bustub::ExpressionType::FUNC_CALL: - name = "FuncCall"; - break; - case bustub::ExpressionType::WINDOW: - name = "Window"; - break; } return formatter::format(name, ctx); } diff --git a/src/include/binder/expressions/bound_agg_call.h b/src/include/binder/expressions/bound_agg_call.h index 77ba797..1762850 100644 --- a/src/include/binder/expressions/bound_agg_call.h +++ b/src/include/binder/expressions/bound_agg_call.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/include/binder/expressions/bound_alias.h b/src/include/binder/expressions/bound_alias.h index 380ae80..243c191 100644 --- a/src/include/binder/expressions/bound_alias.h +++ b/src/include/binder/expressions/bound_alias.h @@ -19,8 +19,6 @@ class BoundAlias : public BoundExpression { auto HasAggregation() const -> bool override { return child_->HasAggregation(); } - auto HasWindowFunction() const -> bool override { return child_->HasWindowFunction(); } - /** Alias name. */ std::string alias_; diff --git a/src/include/binder/expressions/bound_func_call.h b/src/include/binder/expressions/bound_func_call.h deleted file mode 100644 index cc0d0b9..0000000 --- a/src/include/binder/expressions/bound_func_call.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include "binder/bound_expression.h" -#include "binder/expressions/bound_window.h" - -namespace bustub { - -/** - * A bound func call, e.g., `lower(x)`. - */ -class BoundFuncCall : public BoundExpression { - public: - explicit BoundFuncCall(std::string func_name, std::vector> args) - : BoundExpression(ExpressionType::FUNC_CALL), func_name_(std::move(func_name)), args_(std::move(args)) {} - - auto ToString() const -> std::string override; - - auto HasAggregation() const -> bool override { return false; } - - /** Function name. */ - std::string func_name_; - - /** Arguments of the func call. */ - std::vector> args_; -}; -} // namespace bustub diff --git a/src/include/binder/expressions/bound_window.h b/src/include/binder/expressions/bound_window.h deleted file mode 100644 index 8a5d7c6..0000000 --- a/src/include/binder/expressions/bound_window.h +++ /dev/null @@ -1,90 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include "binder/bound_expression.h" -#include "binder/bound_order_by.h" -#include "nodes/nodes.hpp" -#include "nodes/parsenodes.hpp" -#include "nodes/primnodes.hpp" -#include "pg_definitions.hpp" -#include "postgres_parser.hpp" - -namespace bustub { - -enum class WindowBoundary : uint8_t { - INVALID = 0, - UNBOUNDED_PRECEDING = 1, - UNBOUNDED_FOLLOWING = 2, - CURRENT_ROW_RANGE = 3, - CURRENT_ROW_ROWS = 4, - EXPR_PRECEDING_ROWS = 5, - EXPR_FOLLOWING_ROWS = 6, - EXPR_PRECEDING_RANGE = 7, - EXPR_FOLLOWING_RANGE = 8 -}; - -/** - * A bound aggregate call, e.g., `sum(x)`. - */ -#pragma once - -class BoundWindow : public BoundExpression { - public: - explicit BoundWindow(std::string func_name, std::vector> args, - std::vector> partition_by, - std::vector> order_bys, - std::optional> start_offset, - std::optional> end_offset) - : BoundExpression(ExpressionType::WINDOW), - func_name_(std::move(func_name)), - args_(std::move(args)), - partition_by_(std::move(partition_by)), - order_bys_(std::move(order_bys)), - start_offset_(std::move(start_offset)), - end_offset_(std::move(end_offset)) {} - - explicit BoundWindow(std::string func_name, std::vector> args, - std::vector> partition_by, - std::vector> order_bys, - std::optional> start_offset, - std::optional> end_offset, WindowBoundary start, - WindowBoundary end) - : BoundExpression(ExpressionType::WINDOW), - func_name_(std::move(func_name)), - args_(std::move(args)), - partition_by_(std::move(partition_by)), - order_bys_(std::move(order_bys)), - start_offset_(std::move(start_offset)), - end_offset_(std::move(end_offset)), - start_(start), - end_(end) {} - - auto ToString() const -> std::string override; - - auto HasAggregation() const -> bool override { return false; } - - auto HasWindowFunction() const -> bool override { return true; } - - auto SetStart(WindowBoundary start) { start_ = start; } - - auto SetEnd(WindowBoundary end) { end_ = end; } - - /** Function name. */ - std::string func_name_; - - /** Arguments of the func call. */ - std::vector> args_; - - std::vector> partition_by_; - std::vector> order_bys_; - std::optional> start_offset_; - std::optional> end_offset_; - WindowBoundary start_ = WindowBoundary::INVALID; - WindowBoundary end_ = WindowBoundary::INVALID; -}; -} // namespace bustub diff --git a/src/include/binder/statement/create_statement.h b/src/include/binder/statement/create_statement.h index 9878bc0..44c310d 100644 --- a/src/include/binder/statement/create_statement.h +++ b/src/include/binder/statement/create_statement.h @@ -21,11 +21,10 @@ namespace bustub { class CreateStatement : public BoundStatement { public: - explicit CreateStatement(std::string table, std::vector columns, std::vector primary_key); + explicit CreateStatement(std::string table, std::vector columns); std::string table_; std::vector columns_; - std::vector primary_key_; auto ToString() const -> std::string override; }; diff --git a/src/include/binder/statement/index_statement.h b/src/include/binder/statement/index_statement.h index d27f539..9743887 100644 --- a/src/include/binder/statement/index_statement.h +++ b/src/include/binder/statement/index_statement.h @@ -9,7 +9,6 @@ #include #include -#include #include #include "binder/bound_statement.h" @@ -22,8 +21,7 @@ namespace bustub { class IndexStatement : public BoundStatement { public: explicit IndexStatement(std::string index_name, std::unique_ptr table, - std::vector> cols, std::string index_type, - std::vector col_options, std::vector> options); + std::vector> cols); /** Name of the index */ std::string index_name_; @@ -34,12 +32,6 @@ class IndexStatement : public BoundStatement { /** Name of the columns */ std::vector> cols_; - /** Using */ - std::string index_type_; - - std::vector col_options_; - std::vector> options_; - auto ToString() const -> std::string override; }; diff --git a/src/include/binder/statement/set_show_statement.h b/src/include/binder/statement/set_show_statement.h index c15e4c8..de1b832 100644 --- a/src/include/binder/statement/set_show_statement.h +++ b/src/include/binder/statement/set_show_statement.h @@ -44,14 +44,4 @@ class VariableShowStatement : public BoundStatement { } }; -class TransactionStatement : public BoundStatement { - public: - explicit TransactionStatement(std::string type) - : BoundStatement(StatementType::TRANSACTION_STATEMENT), type_(std::move(type)) {} - - std::string type_; - - auto ToString() const -> std::string override { return fmt::format("BoundTransaction {{ type={} }}", type_); } -}; - } // namespace bustub diff --git a/src/include/buffer/buffer_pool_manager.h b/src/include/buffer/buffer_pool_manager.h index fc01038..5bb92dd 100644 --- a/src/include/buffer/buffer_pool_manager.h +++ b/src/include/buffer/buffer_pool_manager.h @@ -13,17 +13,13 @@ #pragma once #include -#include #include // NOLINT #include -#include "buffer/lru_k_replacer.h" -#include "common/config.h" +#include "buffer/lru_replacer.h" #include "recovery/log_manager.h" -#include "storage/disk/disk_scheduler.h" -#include "storage/disk/write_back_cache.h" +#include "storage/disk/disk_manager.h" #include "storage/page/page.h" -#include "storage/page/page_guard.h" namespace bustub { @@ -32,184 +28,118 @@ namespace bustub { */ class BufferPoolManager { public: - /** - * @brief Creates a new BufferPoolManager. - * @param pool_size the size of the buffer pool - * @param disk_manager the disk manager - * @param replacer_k the LookBack constant k for the LRU-K replacer - * @param log_manager the log manager (for testing only: nullptr = disable logging). Please ignore this for P1. - */ - BufferPoolManager(size_t pool_size, DiskManager *disk_manager, size_t replacer_k = LRUK_REPLACER_K, - LogManager *log_manager = nullptr); + enum class CallbackType { BEFORE, AFTER }; + using bufferpool_callback_fn = void (*)(enum CallbackType, const page_id_t page_id); + BufferPoolManager() = default; /** - * @brief Destroy an existing BufferPoolManager. + * Destroys an existing BufferPoolManager. */ - ~BufferPoolManager(); + virtual ~BufferPoolManager() = default; + + /** Grading function. Do not modify! */ + auto FetchPage(page_id_t page_id, bufferpool_callback_fn callback = nullptr) -> Page * { + GradingCallback(callback, CallbackType::BEFORE, page_id); + auto *result = FetchPgImp(page_id); + GradingCallback(callback, CallbackType::AFTER, page_id); + return result; + } - /** @brief Return the size (number of frames) of the buffer pool. */ - auto GetPoolSize() -> size_t { return pool_size_; } + /** Grading function. Do not modify! */ + auto UnpinPage(page_id_t page_id, bool is_dirty, bufferpool_callback_fn callback = nullptr) -> bool { + GradingCallback(callback, CallbackType::BEFORE, page_id); + auto result = UnpinPgImp(page_id, is_dirty); + GradingCallback(callback, CallbackType::AFTER, page_id); + return result; + } - /** @brief Return the pointer to all the pages in the buffer pool. */ - auto GetPages() -> Page * { return pages_; } + /** Grading function. Do not modify! */ + auto FlushPage(page_id_t page_id, bufferpool_callback_fn callback = nullptr) -> bool { + GradingCallback(callback, CallbackType::BEFORE, page_id); + auto result = FlushPgImp(page_id); + GradingCallback(callback, CallbackType::AFTER, page_id); + return result; + } - /** - * TODO(P1): Add implementation - * - * @brief Create a new page in the buffer pool. Set page_id to the new page's id, or nullptr if all frames - * are currently in use and not evictable (in another word, pinned). - * - * You should pick the replacement frame from either the free list or the replacer (always find from the free list - * first), and then call the AllocatePage() method to get a new page id. If the replacement frame has a dirty page, - * you should write it back to the disk first. You also need to reset the memory and metadata for the new page. - * - * Remember to "Pin" the frame by calling replacer.SetEvictable(frame_id, false) - * so that the replacer wouldn't evict the frame before the buffer pool manager "Unpin"s it. - * Also, remember to record the access history of the frame in the replacer for the lru-k algorithm to work. - * - * @param[out] page_id id of created page - * @return nullptr if no new pages could be created, otherwise pointer to new page - */ - auto NewPage(page_id_t *page_id) -> Page *; + /** Grading function. Do not modify! */ + auto NewPage(page_id_t *page_id, bufferpool_callback_fn callback = nullptr) -> Page * { + GradingCallback(callback, CallbackType::BEFORE, INVALID_PAGE_ID); + auto *result = NewPgImp(page_id); + GradingCallback(callback, CallbackType::AFTER, *page_id); + return result; + } - /** - * TODO(P1): Add implementation - * - * @brief PageGuard wrapper for NewPage - * - * Functionality should be the same as NewPage, except that - * instead of returning a pointer to a page, you return a - * BasicPageGuard structure. - * - * @param[out] page_id, the id of the new page - * @return BasicPageGuard holding a new page - */ - auto NewPageGuarded(page_id_t *page_id) -> BasicPageGuard; + /** Grading function. Do not modify! */ + auto DeletePage(page_id_t page_id, bufferpool_callback_fn callback = nullptr) -> bool { + GradingCallback(callback, CallbackType::BEFORE, page_id); + auto result = DeletePgImp(page_id); + GradingCallback(callback, CallbackType::AFTER, page_id); + return result; + } + /** Grading function. Do not modify! */ + void FlushAllPages(bufferpool_callback_fn callback = nullptr) { + GradingCallback(callback, CallbackType::BEFORE, INVALID_PAGE_ID); + FlushAllPgsImp(); + GradingCallback(callback, CallbackType::AFTER, INVALID_PAGE_ID); + } + + /** @return size of the buffer pool */ + virtual auto GetPoolSize() -> size_t = 0; + + protected: /** - * TODO(P1): Add implementation - * - * @brief Fetch the requested page from the buffer pool. Return nullptr if page_id needs to be fetched from the disk - * but all frames are currently in use and not evictable (in another word, pinned). - * - * First search for page_id in the buffer pool. If not found, pick a replacement frame from either the free list or - * the replacer (always find from the free list first), read the page from disk by scheduling a read DiskRequest with - * disk_scheduler_->Schedule(), and replace the old page in the frame. Similar to NewPage(), if the old page is dirty, - * you need to write it back to disk and update the metadata of the new page - * - * In addition, remember to disable eviction and record the access history of the frame like you did for NewPage(). - * - * @param page_id id of page to be fetched - * @param access_type type of access to the page, only needed for leaderboard tests. - * @return nullptr if page_id cannot be fetched, otherwise pointer to the requested page + * Grading function. Do not modify! + * Invokes the callback function if it is not null. + * @param callback callback function to be invoked + * @param callback_type BEFORE or AFTER + * @param page_id the page id to invoke the callback with */ - auto FetchPage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> Page *; + void GradingCallback(bufferpool_callback_fn callback, CallbackType callback_type, page_id_t page_id) { + if (callback != nullptr) { + callback(callback_type, page_id); + } + } /** - * TODO(P1): Add implementation - * - * @brief PageGuard wrappers for FetchPage - * - * Functionality should be the same as FetchPage, except - * that, depending on the function called, a guard is returned. - * If FetchPageRead or FetchPageWrite is called, it is expected that - * the returned page already has a read or write latch held, respectively. - * - * @param page_id, the id of the page to fetch - * @return PageGuard holding the fetched page + * Fetch the requested page from the buffer pool. + * @param page_id id of page to be fetched + * @return the requested page */ - auto FetchPageBasic(page_id_t page_id) -> BasicPageGuard; - auto FetchPageRead(page_id_t page_id) -> ReadPageGuard; - auto FetchPageWrite(page_id_t page_id) -> WritePageGuard; + virtual auto FetchPgImp(page_id_t page_id) -> Page * = 0; /** - * TODO(P1): Add implementation - * - * @brief Unpin the target page from the buffer pool. If page_id is not in the buffer pool or its pin count is already - * 0, return false. - * - * Decrement the pin count of a page. If the pin count reaches 0, the frame should be evictable by the replacer. - * Also, set the dirty flag on the page to indicate if the page was modified. - * + * Unpin the target page from the buffer pool. * @param page_id id of page to be unpinned * @param is_dirty true if the page should be marked as dirty, false otherwise - * @param access_type type of access to the page, only needed for leaderboard tests. - * @return false if the page is not in the page table or its pin count is <= 0 before this call, true otherwise + * @return false if the page pin count is <= 0 before this call, true otherwise */ - auto UnpinPage(page_id_t page_id, bool is_dirty, AccessType access_type = AccessType::Unknown) -> bool; + virtual auto UnpinPgImp(page_id_t page_id, bool is_dirty) -> bool = 0; /** - * TODO(P1): Add implementation - * - * @brief Flush the target page to disk. - * - * Use the DiskManager::WritePage() method to flush a page to disk, REGARDLESS of the dirty flag. - * Unset the dirty flag of the page after flushing. - * + * Flushes the target page to disk. * @param page_id id of page to be flushed, cannot be INVALID_PAGE_ID * @return false if the page could not be found in the page table, true otherwise */ - auto FlushPage(page_id_t page_id) -> bool; + virtual auto FlushPgImp(page_id_t page_id) -> bool = 0; /** - * TODO(P1): Add implementation - * - * @brief Flush all the pages in the buffer pool to disk. + * Creates a new page in the buffer pool. + * @param[out] page_id id of created page + * @return nullptr if no new pages could be created, otherwise pointer to new page */ - void FlushAllPages(); + virtual auto NewPgImp(page_id_t *page_id) -> Page * = 0; /** - * TODO(P1): Add implementation - * - * @brief Delete a page from the buffer pool. If page_id is not in the buffer pool, do nothing and return true. If the - * page is pinned and cannot be deleted, return false immediately. - * - * After deleting the page from the page table, stop tracking the frame in the replacer and add the frame - * back to the free list. Also, reset the page's memory and metadata. Finally, you should call DeallocatePage() to - * imitate freeing the page on the disk. - * + * Deletes a page from the buffer pool. * @param page_id id of page to be deleted * @return false if the page exists but could not be deleted, true if the page didn't exist or deletion succeeded */ - auto DeletePage(page_id_t page_id) -> bool; - - private: - /** Number of pages in the buffer pool. */ - const size_t pool_size_; - /** The next page id to be allocated */ - std::atomic next_page_id_ = 0; - - /** Array of buffer pool pages. */ - Page *pages_; - /** Pointer to the disk sheduler. */ - std::unique_ptr disk_scheduler_ __attribute__((__unused__)); - /** Pointer to the log manager. Please ignore this for P1. */ - LogManager *log_manager_ __attribute__((__unused__)); - /** Page table for keeping track of buffer pool pages. */ - std::unordered_map page_table_; - /** Replacer to find unpinned pages for replacement. */ - std::unique_ptr replacer_; - /** List of free frames that don't have any pages on them. */ - std::list free_list_; - /** This latch protects shared data structures. We recommend updating this comment to describe what it protects. */ - std::mutex latch_; - /** This buffer is for the leaderboard task. You may want to use it to optimize the write requests. */ - WriteBackCache write_back_cache_ __attribute__((__unused__)); - - /** - * @brief Allocate a page on disk. Caller should acquire the latch before calling this function. - * @return the id of the allocated page - */ - auto AllocatePage() -> page_id_t; + virtual auto DeletePgImp(page_id_t page_id) -> bool = 0; /** - * @brief Deallocate a page on disk. Caller should acquire the latch before calling this function. - * @param page_id id of the page to deallocate + * Flushes all the pages in the buffer pool to disk. */ - void DeallocatePage(__attribute__((unused)) page_id_t page_id) { - // This is a no-nop right now without a more complex data structure to track deallocated pages - } - - // TODO(student): You may add additional private members and helper functions + virtual void FlushAllPgsImp() = 0; }; } // namespace bustub diff --git a/src/include/buffer/buffer_pool_manager_instance.h b/src/include/buffer/buffer_pool_manager_instance.h new file mode 100644 index 0000000..73a8793 --- /dev/null +++ b/src/include/buffer/buffer_pool_manager_instance.h @@ -0,0 +1,181 @@ +//===----------------------------------------------------------------------===// +// +// BusTub +// +// buffer_pool_manager_instance.h +// +// Identification: src/include/buffer/buffer_pool_manager.h +// +// Copyright (c) 2015-2021, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include // NOLINT +#include + +#include "buffer/buffer_pool_manager.h" +#include "buffer/lru_k_replacer.h" +#include "common/config.h" +#include "container/hash/extendible_hash_table.h" +#include "recovery/log_manager.h" +#include "storage/disk/disk_manager.h" +#include "storage/page/page.h" + +namespace bustub { + +/** + * BufferPoolManager reads disk pages to and from its internal buffer pool. + */ +class BufferPoolManagerInstance : public BufferPoolManager { + public: + /** + * @brief Creates a new BufferPoolManagerInstance. + * @param pool_size the size of the buffer pool + * @param disk_manager the disk manager + * @param replacer_k the lookback constant k for the LRU-K replacer + * @param log_manager the log manager (for testing only: nullptr = disable logging). Please ignore this for P1. + */ + BufferPoolManagerInstance(size_t pool_size, DiskManager *disk_manager, size_t replacer_k = LRUK_REPLACER_K, + LogManager *log_manager = nullptr); + + /** + * @brief Destroy an existing BufferPoolManagerInstance. + */ + ~BufferPoolManagerInstance() override; + + /** @brief Return the size (number of frames) of the buffer pool. */ + auto GetPoolSize() -> size_t override { return pool_size_; } + + /** @brief Return the pointer to all the pages in the buffer pool. */ + auto GetPages() -> Page * { return pages_; } + + protected: + /** + * TODO(P1): Add implementation + * + * @brief Create a new page in the buffer pool. Set page_id to the new page's id, or nullptr if all frames + * are currently in use and not evictable (in another word, pinned). + * + * You should pick the replacement frame from either the free list or the replacer (always find from the free list + * first), and then call the AllocatePage() method to get a new page id. If the replacement frame has a dirty page, + * you should write it back to the disk first. You also need to reset the memory and metadata for the new page. + * + * Remember to "Pin" the frame by calling replacer.SetEvictable(frame_id, false) + * so that the replacer wouldn't evict the frame before the buffer pool manager "Unpin"s it. + * Also, remember to record the access history of the frame in the replacer for the lru-k algorithm to work. + * + * @param[out] page_id id of created page + * @return nullptr if no new pages could be created, otherwise pointer to new page + */ + auto NewPgImp(page_id_t *page_id) -> Page * override; + + /** + * TODO(P1): Add implementation + * + * @brief Fetch the requested page from the buffer pool. Return nullptr if page_id needs to be fetched from the disk + * but all frames are currently in use and not evictable (in another word, pinned). + * + * First search for page_id in the buffer pool. If not found, pick a replacement frame from either the free list or + * the replacer (always find from the free list first), read the page from disk by calling disk_manager_->ReadPage(), + * and replace the old page in the frame. Similar to NewPgImp(), if the old page is dirty, you need to write it back + * to disk and update the metadata of the new page + * + * In addition, remember to disable eviction and record the access history of the frame like you did for NewPgImp(). + * + * @param page_id id of page to be fetched + * @return nullptr if page_id cannot be fetched, otherwise pointer to the requested page + */ + auto FetchPgImp(page_id_t page_id) -> Page * override; + + /** + * TODO(P1): Add implementation + * + * @brief Unpin the target page from the buffer pool. If page_id is not in the buffer pool or its pin count is already + * 0, return false. + * + * Decrement the pin count of a page. If the pin count reaches 0, the frame should be evictable by the replacer. + * Also, set the dirty flag on the page to indicate if the page was modified. + * + * @param page_id id of page to be unpinned + * @param is_dirty true if the page should be marked as dirty, false otherwise + * @return false if the page is not in the page table or its pin count is <= 0 before this call, true otherwise + */ + auto UnpinPgImp(page_id_t page_id, bool is_dirty) -> bool override; + + /** + * TODO(P1): Add implementation + * + * @brief Flush the target page to disk. + * + * Use the DiskManager::WritePage() method to flush a page to disk, REGARDLESS of the dirty flag. + * Unset the dirty flag of the page after flushing. + * + * @param page_id id of page to be flushed, cannot be INVALID_PAGE_ID + * @return false if the page could not be found in the page table, true otherwise + */ + auto FlushPgImp(page_id_t page_id) -> bool override; + + /** + * TODO(P1): Add implementation + * + * @brief Flush all the pages in the buffer pool to disk. + */ + void FlushAllPgsImp() override; + + /** + * TODO(P1): Add implementation + * + * @brief Delete a page from the buffer pool. If page_id is not in the buffer pool, do nothing and return true. If the + * page is pinned and cannot be deleted, return false immediately. + * + * After deleting the page from the page table, stop tracking the frame in the replacer and add the frame + * back to the free list. Also, reset the page's memory and metadata. Finally, you should call DeallocatePage() to + * imitate freeing the page on the disk. + * + * @param page_id id of page to be deleted + * @return false if the page exists but could not be deleted, true if the page didn't exist or deletion succeeded + */ + auto DeletePgImp(page_id_t page_id) -> bool override; + + /** Number of pages in the buffer pool. */ + const size_t pool_size_; + /** The next page id to be allocated */ + std::atomic next_page_id_ = 0; + /** Bucket size for the extendible hash table */ + const size_t bucket_size_ = 4; + + /** Array of buffer pool pages. */ + Page *pages_; + /** Pointer to the disk manager. */ + DiskManager *disk_manager_ __attribute__((__unused__)); + /** Pointer to the log manager. Please ignore this for P1. */ + LogManager *log_manager_ __attribute__((__unused__)); + /** Page table for keeping track of buffer pool pages. */ + ExtendibleHashTable *page_table_; + /** Replacer to find unpinned pages for replacement. */ + LRUKReplacer *replacer_; + /** List of free frames that don't have any pages on them. */ + std::list free_list_; + /** This latch protects shared data structures. We recommend updating this comment to describe what it protects. */ + std::mutex latch_; + + /** + * @brief Allocate a page on disk. Caller should acquire the latch before calling this function. + * @return the id of the allocated page + */ + auto AllocatePage() -> page_id_t; + + /** + * @brief Deallocate a page on disk. Caller should acquire the latch before calling this function. + * @param page_id id of the page to deallocate + */ + void DeallocatePage(__attribute__((unused)) page_id_t page_id) { + // This is a no-nop right now without a more complex data structure to track deallocated pages + } + + // TODO(student): You may add additional private members and helper functions +}; +} // namespace bustub diff --git a/src/include/buffer/lru_k_replacer.h b/src/include/buffer/lru_k_replacer.h index d24eea3..d6877e7 100644 --- a/src/include/buffer/lru_k_replacer.h +++ b/src/include/buffer/lru_k_replacer.h @@ -23,19 +23,6 @@ namespace bustub { -enum class AccessType { Unknown = 0, Lookup, Scan, Index }; - -class LRUKNode { - private: - /** History of last seen K timestamps of this page. Least recent timestamp stored in front. */ - // Remove maybe_unused if you start using them. Feel free to change the member variables as you want. - - [[maybe_unused]] std::list history_; - [[maybe_unused]] size_t k_; - [[maybe_unused]] frame_id_t fid_; - [[maybe_unused]] bool is_evictable_{false}; -}; - /** * LRUKReplacer implements the LRU-k replacement policy. * @@ -74,8 +61,8 @@ class LRUKReplacer { * that are marked as 'evictable' are candidates for eviction. * * A frame with less than k historical references is given +inf as its backward k-distance. - * If multiple frames have inf backward k-distance, then evict frame with earliest timestamp - * based on LRU. + * If multiple frames have inf backward k-distance, then evict the frame with the earliest + * timestamp overall. * * Successful eviction of a frame should decrement the size of replacer and remove the frame's * access history. @@ -95,10 +82,8 @@ class LRUKReplacer { * also use BUSTUB_ASSERT to abort the process if frame id is invalid. * * @param frame_id id of frame that received a new access. - * @param access_type type of access that was received. This parameter is only needed for - * leaderboard tests. */ - void RecordAccess(frame_id_t frame_id, AccessType access_type = AccessType::Unknown); + void RecordAccess(frame_id_t frame_id); /** * TODO(P1): Add implementation @@ -150,12 +135,21 @@ class LRUKReplacer { private: // TODO(student): implement me! You can replace these member variables as you like. // Remove maybe_unused if you start using them. - [[maybe_unused]] std::unordered_map node_store_; [[maybe_unused]] size_t current_timestamp_{0}; - [[maybe_unused]] size_t curr_size_{0}; - [[maybe_unused]] size_t replacer_size_; - [[maybe_unused]] size_t k_; - [[maybe_unused]] std::mutex latch_; + size_t curr_size_{0}; + size_t replacer_size_; + size_t k_; + std::mutex latch_; + + std::unordered_map access_count_; + + std::list history_list_; + std::unordered_map::iterator> history_map_; + + std::list cache_list_; + std::unordered_map::iterator> cache_map_; + + std::unordered_map is_evictable_; }; } // namespace bustub diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index 3591356..3b0a813 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -12,7 +12,6 @@ #pragma once -#include #include #include #include @@ -25,10 +24,7 @@ #include "storage/index/b_plus_tree_index.h" #include "storage/index/extendible_hash_table_index.h" #include "storage/index/index.h" -#include "storage/index/stl_ordered.h" -#include "storage/index/stl_unordered.h" #include "storage/table/table_heap.h" -#include "storage/table/tuple.h" namespace bustub { @@ -39,8 +35,6 @@ using table_oid_t = uint32_t; using column_oid_t = uint32_t; using index_oid_t = uint32_t; -enum class IndexType { BPlusTreeIndex, HashTableIndex, STLOrderedIndex, STLUnorderedIndex }; - /** * The TableInfo class maintains metadata about a table. */ @@ -78,15 +72,13 @@ struct IndexInfo { * @param key_size The size of the index key, in bytes */ IndexInfo(Schema key_schema, std::string name, std::unique_ptr &&index, index_oid_t index_oid, - std::string table_name, size_t key_size, bool is_primary_key, IndexType index_type) + std::string table_name, size_t key_size) : key_schema_{std::move(key_schema)}, name_{std::move(name)}, index_{std::move(index)}, index_oid_{index_oid}, table_name_{std::move(table_name)}, - key_size_{key_size}, - is_primary_key_{is_primary_key}, - index_type_(index_type) {} + key_size_{key_size} {} /** The schema for the index key */ Schema key_schema_; /** The name of the index */ @@ -99,10 +91,6 @@ struct IndexInfo { std::string table_name_; /** The size of the index key, in bytes */ const size_t key_size_; - /** Is primary key index? */ - bool is_primary_key_; - /** The index type */ - IndexType index_type_; }; /** @@ -144,13 +132,11 @@ class Catalog { // Construct the table heap std::unique_ptr table = nullptr; + // TODO(Wan,chi): This should be refactored into a private ctor for the binder tests, we shouldn't allow nullptr. // When create_table_heap == false, it means that we're running binder tests (where no txn will be provided) or // we are running shell without buffer pool. We don't need to create TableHeap in this case. if (create_table_heap) { - table = std::make_unique(bpm_); - } else { - // Otherwise, create an empty heap only for binder tests - table = TableHeap::CreateEmptyHeap(create_table_heap); + table = std::make_unique(bpm_, lock_manager_, log_manager_, txn); } // Fetch the table OID for the new table @@ -215,8 +201,7 @@ class Catalog { template auto CreateIndex(Transaction *txn, const std::string &index_name, const std::string &table_name, const Schema &schema, const Schema &key_schema, const std::vector &key_attrs, std::size_t keysize, - HashFunction hash_function, bool is_primary_key = false, - IndexType index_type = IndexType::HashTableIndex) -> IndexInfo * { + HashFunction hash_function) -> IndexInfo * { // Reject the creation request for nonexistent table if (table_names_.find(table_name) == table_names_.end()) { return NULL_INDEX_INFO; @@ -233,7 +218,7 @@ class Catalog { } // Construct index metdata - auto meta = std::make_unique(index_name, table_name, &schema, key_attrs, is_primary_key); + auto meta = std::make_unique(index_name, table_name, &schema, key_attrs); // Construct the index, take ownership of metadata // TODO(Kyle): We should update the API for CreateIndex @@ -241,35 +226,21 @@ class Catalog { // just the key, value, and comparator types // TODO(chi): support both hash index and btree index - std::unique_ptr index; - if (index_type == IndexType::HashTableIndex) { - index = std::make_unique>(std::move(meta), bpm_, - hash_function); - } else if (index_type == IndexType::BPlusTreeIndex) { - index = std::make_unique>(std::move(meta), bpm_); - } else if (index_type == IndexType::STLOrderedIndex) { - index = std::make_unique>(std::move(meta), bpm_); - } else if (index_type == IndexType::STLUnorderedIndex) { - index = - std::make_unique>(std::move(meta), bpm_, hash_function); - } else { - UNIMPLEMENTED("Unsupported Index Type"); - } + auto index = std::make_unique>(std::move(meta), bpm_); // Populate the index with all tuples in table heap auto *table_meta = GetTable(table_name); - for (auto iter = table_meta->table_->MakeIterator(); !iter.IsEnd(); ++iter) { - auto [meta, tuple] = iter.GetTuple(); - // we have to silently ignore the error here for a lot of reasons... - index->InsertEntry(tuple.KeyFromTuple(schema, key_schema, key_attrs), tuple.GetRid(), txn); + auto *heap = table_meta->table_.get(); + for (auto tuple = heap->Begin(txn); tuple != heap->End(); ++tuple) { + index->InsertEntry(tuple->KeyFromTuple(schema, key_schema, key_attrs), tuple->GetRid(), txn); } // Get the next OID for the new index const auto index_oid = next_index_oid_.fetch_add(1); // Construct index information; IndexInfo takes ownership of the Index itself - auto index_info = std::make_unique(key_schema, index_name, std::move(index), index_oid, table_name, - keysize, is_primary_key, index_type); + auto index_info = + std::make_unique(key_schema, index_name, std::move(index), index_oid, table_name, keysize); auto *tmp = index_info.get(); // Update internal tracking @@ -403,29 +374,3 @@ class Catalog { }; } // namespace bustub - -template <> -struct fmt::formatter : formatter { - template - auto format(bustub::IndexType c, FormatContext &ctx) const { - string_view name; - switch (c) { - case bustub::IndexType::BPlusTreeIndex: - name = "BPlusTree"; - break; - case bustub::IndexType::HashTableIndex: - name = "Hash"; - break; - case bustub::IndexType::STLOrderedIndex: - name = "STLOrdered"; - break; - case bustub::IndexType::STLUnorderedIndex: - name = "STLUnordered"; - break; - default: - name = "Unknown"; - break; - } - return formatter::format(name, ctx); - } -}; diff --git a/src/include/catalog/column.h b/src/include/catalog/column.h index a38ae2c..b645ca2 100644 --- a/src/include/catalog/column.h +++ b/src/include/catalog/column.h @@ -22,7 +22,6 @@ #include "common/exception.h" #include "common/macros.h" #include "type/type.h" -#include "type/type_id.h" namespace bustub { class AbstractExpression; @@ -37,9 +36,8 @@ class Column { * @param type type of the column */ Column(std::string column_name, TypeId type) - : column_name_(std::move(column_name)), column_type_(type), length_(TypeSize(type)) { + : column_name_(std::move(column_name)), column_type_(type), fixed_length_(TypeSize(type)) { BUSTUB_ASSERT(type != TypeId::VARCHAR, "Wrong constructor for VARCHAR type."); - BUSTUB_ASSERT(type != TypeId::VECTOR, "Wrong constructor for VECTOR type."); } /** @@ -50,8 +48,11 @@ class Column { * @param expr expression used to create this column */ Column(std::string column_name, TypeId type, uint32_t length) - : column_name_(std::move(column_name)), column_type_(type), length_(TypeSize(type, length)) { - BUSTUB_ASSERT(type == TypeId::VARCHAR || type == TypeId::VECTOR, "Wrong constructor for fixed-size type."); + : column_name_(std::move(column_name)), + column_type_(type), + fixed_length_(TypeSize(type)), + variable_length_(length) { + BUSTUB_ASSERT(type == TypeId::VARCHAR, "Wrong constructor for non-VARCHAR type."); } /** @@ -62,20 +63,26 @@ class Column { Column(std::string column_name, const Column &column) : column_name_(std::move(column_name)), column_type_(column.column_type_), - length_(column.length_), + fixed_length_(column.fixed_length_), + variable_length_(column.variable_length_), column_offset_(column.column_offset_) {} - auto WithColumnName(std::string column_name) -> Column { - Column c = *this; - c.column_name_ = std::move(column_name); - return c; - } - /** @return column name */ auto GetName() const -> std::string { return column_name_; } /** @return column length */ - auto GetStorageSize() const -> uint32_t { return length_; } + auto GetLength() const -> uint32_t { + if (IsInlined()) { + return fixed_length_; + } + return variable_length_; + } + + /** @return column fixed length */ + auto GetFixedLength() const -> uint32_t { return fixed_length_; } + + /** @return column variable length */ + auto GetVariableLength() const -> uint32_t { return variable_length_; } /** @return column's offset in the tuple */ auto GetOffset() const -> uint32_t { return column_offset_; } @@ -84,7 +91,7 @@ class Column { auto GetType() const -> TypeId { return column_type_; } /** @return true if column is inlined, false otherwise */ - auto IsInlined() const -> bool { return column_type_ != TypeId::VARCHAR && column_type_ != TypeId::VECTOR; } + auto IsInlined() const -> bool { return column_type_ != TypeId::VARCHAR; } /** @return a string representation of this column */ auto ToString(bool simplified = true) const -> std::string; @@ -95,7 +102,7 @@ class Column { * @param type type whose size is to be determined * @return size in bytes */ - static auto TypeSize(TypeId type, uint32_t length = 0) -> uint8_t { + static auto TypeSize(TypeId type) -> uint8_t { switch (type) { case TypeId::BOOLEAN: case TypeId::TINYINT: @@ -109,9 +116,8 @@ class Column { case TypeId::TIMESTAMP: return 8; case TypeId::VARCHAR: - return length; - case TypeId::VECTOR: - return length * sizeof(double); + // TODO(Amadou): Confirm this. + return 12; default: { UNREACHABLE("Cannot get size of invalid type"); } @@ -124,8 +130,11 @@ class Column { /** Column value's type. */ TypeId column_type_; - /** The size of the column. */ - uint32_t length_; + /** For a non-inlined column, this is the size of a pointer. Otherwise, the size of the fixed length column. */ + uint32_t fixed_length_; + + /** For an inlined column, 0. Otherwise, the length of the variable length column. */ + uint32_t variable_length_{0}; /** Column offset in the tuple. */ uint32_t column_offset_{0}; diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index c85d3f5..6681acd 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -91,7 +91,7 @@ class Schema { auto GetUnlinedColumnCount() const -> uint32_t { return static_cast(uninlined_columns_.size()); } /** @return the number of bytes used by one tuple */ - inline auto GetInlinedStorageSize() const -> uint32_t { return length_; } + inline auto GetLength() const -> uint32_t { return length_; } /** @return true if all columns are inlined, false otherwise */ inline auto IsInlined() const -> bool { return tuple_is_inlined_; } diff --git a/src/include/common/bustub_instance.h b/src/include/common/bustub_instance.h index 3c08d84..ddfdb8f 100644 --- a/src/include/common/bustub_instance.h +++ b/src/include/common/bustub_instance.h @@ -25,7 +25,6 @@ #include "catalog/catalog.h" #include "common/config.h" #include "common/util/string_util.h" -#include "execution/check_options.h" #include "libfort/lib/fort.hpp" #include "type/value.h" @@ -42,13 +41,6 @@ class CheckpointManager; class Catalog; class ExecutionEngine; -class CreateStatement; -class IndexStatement; -class VariableSetStatement; -class VariableShowStatement; -class ExplainStatement; -class TransactionStatement; - class ResultWriter { public: ResultWriter() = default; @@ -62,13 +54,6 @@ class ResultWriter { virtual void EndRow() = 0; virtual void BeginTable(bool simplified_output) = 0; virtual void EndTable() = 0; - virtual void OneCell(const std::string &cell) { - BeginTable(true); - BeginRow(); - WriteCell(cell); - EndRow(); - EndTable(); - } bool simplified_output_{false}; }; @@ -114,20 +99,6 @@ class SimpleStreamWriter : public ResultWriter { std::string separator_; }; -class StringVectorWriter : public ResultWriter { - public: - void WriteCell(const std::string &cell) override { values_.back().push_back(cell); } - void WriteHeaderCell(const std::string &cell) override {} - void BeginHeader() override {} - void EndHeader() override {} - void BeginRow() override { values_.emplace_back(); } - void EndRow() override {} - void BeginTable(bool simplified_output) override { values_.clear(); } - void EndTable() override {} - - std::vector> values_; -}; - class HtmlWriter : public ResultWriter { auto Escape(const std::string &data) -> std::string { std::string buffer; @@ -228,7 +199,6 @@ class FortTableWriter : public ResultWriter { tables_.emplace_back(table_.to_string()); table_ = fort::utf8_table{}; } - void OneCell(const std::string &cell) override { tables_.emplace_back(cell + "\n"); } fort::utf8_table table_; std::vector tables_; }; @@ -238,32 +208,24 @@ class BustubInstance { /** * Get the executor context from the BusTub instance. */ - auto MakeExecutorContext(Transaction *txn, bool is_modify) -> std::unique_ptr; + auto MakeExecutorContext(Transaction *txn) -> std::unique_ptr; public: - explicit BustubInstance(const std::string &db_file_name, size_t bpm_size = 128); + explicit BustubInstance(const std::string &db_file_name); - explicit BustubInstance(size_t bpm_size = 128); + BustubInstance(); ~BustubInstance(); /** * Execute a SQL query in the BusTub instance. */ - auto ExecuteSql(const std::string &sql, ResultWriter &writer, std::shared_ptr check_options = nullptr) - -> bool; + auto ExecuteSql(const std::string &sql, ResultWriter &writer) -> bool; /** * Execute a SQL query in the BusTub instance with provided txn. */ - auto ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, Transaction *txn, - std::shared_ptr check_options = nullptr) -> bool; - - /** Enable managed txn mode on this BusTub instance, allowing statements like `BEGIN`. */ - void EnableManagedTxn(); - - /** Get the current transaction. */ - auto CurrentManagedTxn() -> Transaction *; + auto ExecuteSqlTxn(const std::string &sql, ResultWriter &writer, Transaction *txn) -> bool; /** * FOR TEST ONLY. Generate test tables in this BusTub instance. @@ -279,19 +241,17 @@ class BustubInstance { */ void GenerateMockTable(); - // Currently the followings are directly referenced by recovery test, so + // TODO(chi): change to unique_ptr. Currently they're directly referenced by recovery test, so // we cannot do anything on them until someone decides to refactor the recovery test. - std::unique_ptr disk_manager_; - std::unique_ptr buffer_pool_manager_; - - std::unique_ptr lock_manager_; - std::unique_ptr txn_manager_; - std::unique_ptr log_manager_; - std::unique_ptr checkpoint_manager_; - std::unique_ptr catalog_; - std::unique_ptr execution_engine_; - /** Coordination for catalog */ + DiskManager *disk_manager_; + BufferPoolManager *buffer_pool_manager_; + LockManager *lock_manager_; + TransactionManager *txn_manager_; + LogManager *log_manager_; + CheckpointManager *checkpoint_manager_; + Catalog *catalog_; + ExecutionEngine *execution_engine_; std::shared_mutex catalog_lock_; auto GetSessionVariable(const std::string &key) -> std::string { @@ -308,22 +268,10 @@ class BustubInstance { private: void CmdDisplayTables(ResultWriter &writer); - void CmdDbgMvcc(const std::vector ¶ms, ResultWriter &writer); - void CmdTxn(const std::vector ¶ms, ResultWriter &writer); void CmdDisplayIndices(ResultWriter &writer); void CmdDisplayHelp(ResultWriter &writer); void WriteOneCell(const std::string &cell, ResultWriter &writer); - - void HandleCreateStatement(Transaction *txn, const CreateStatement &stmt, ResultWriter &writer); - void HandleIndexStatement(Transaction *txn, const IndexStatement &stmt, ResultWriter &writer); - void HandleExplainStatement(Transaction *txn, const ExplainStatement &stmt, ResultWriter &writer); - void HandleTxnStatement(Transaction *txn, const TransactionStatement &stmt, ResultWriter &writer); - void HandleVariableShowStatement(Transaction *txn, const VariableShowStatement &stmt, ResultWriter &writer); - void HandleVariableSetStatement(Transaction *txn, const VariableSetStatement &stmt, ResultWriter &writer); - std::unordered_map session_variables_; - Transaction *current_txn_{nullptr}; - bool managed_txn_mode_{false}; }; } // namespace bustub diff --git a/src/include/common/channel.h b/src/include/common/channel.h deleted file mode 100644 index 8539a18..0000000 --- a/src/include/common/channel.h +++ /dev/null @@ -1,59 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// channel.h -// -// Identification: src/include/common/channel.h -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include // NOLINT -#include // NOLINT -#include -#include - -namespace bustub { - -/** - * Channels allow for safe sharing of data between threads. This is a multi-producer multi-consumer channel. - */ -template -class Channel { - public: - Channel() = default; - ~Channel() = default; - - /** - * @brief Inserts an element into a shared queue. - * - * @param element The element to be inserted. - */ - void Put(T element) { - std::unique_lock lk(m_); - q_.push(std::move(element)); - lk.unlock(); - cv_.notify_all(); - } - - /** - * @brief Gets an element from the shared queue. If the queue is empty, blocks until an element is available. - */ - auto Get() -> T { - std::unique_lock lk(m_); - cv_.wait(lk, [&]() { return !q_.empty(); }); - T element = std::move(q_.front()); - q_.pop(); - return element; - } - - private: - std::mutex m_; - std::condition_variable cv_; - std::queue q_; -}; -} // namespace bustub diff --git a/src/include/common/config.h b/src/include/common/config.h index fb004eb..1690320 100644 --- a/src/include/common/config.h +++ b/src/include/common/config.h @@ -16,9 +16,6 @@ #include // NOLINT #include -#define DISABLE_LOCK_MANAGER -#define DISABLE_CHECKPOINT_MANAGER - namespace bustub { /** Cycle detection is performed every CYCLE_DETECTION_INTERVAL milliseconds. */ @@ -42,13 +39,11 @@ static constexpr int LRUK_REPLACER_K = 10; // lookback window for lru-k replace using frame_id_t = int32_t; // frame id type using page_id_t = int32_t; // page id type -using txn_id_t = int64_t; // transaction id type +using txn_id_t = int32_t; // transaction id type using lsn_t = int32_t; // log sequence number type using slot_offset_t = size_t; // slot offset type using oid_t = uint16_t; -const txn_id_t TXN_START_ID = 1LL << 62; // first txn id - static constexpr int VARCHAR_DEFAULT_LENGTH = 128; // default length for varchar when constructing the column } // namespace bustub diff --git a/src/include/common/enums/statement_type.h b/src/include/common/enums/statement_type.h index 74270d3..c915f85 100644 --- a/src/include/common/enums/statement_type.h +++ b/src/include/common/enums/statement_type.h @@ -32,7 +32,6 @@ enum class StatementType : uint8_t { INDEX_STATEMENT, // index statement type VARIABLE_SET_STATEMENT, // set variable statement type VARIABLE_SHOW_STATEMENT, // show variable statement type - TRANSACTION_STATEMENT, // txn statement type }; } // namespace bustub @@ -76,9 +75,6 @@ struct fmt::formatter : formatter { case bustub::StatementType::VARIABLE_SET_STATEMENT: name = "VariableSet"; break; - case bustub::StatementType::TRANSACTION_STATEMENT: - name = "Transaction"; - break; } return formatter::format(name, ctx); } diff --git a/src/include/common/exception.h b/src/include/common/exception.h index 3ac1f31..128ca6c 100644 --- a/src/include/common/exception.h +++ b/src/include/common/exception.h @@ -12,7 +12,6 @@ #pragma once -#include #include #include #include @@ -52,21 +51,16 @@ enum class ExceptionType { EXECUTION = 12, }; -extern std::atomic global_disable_execution_exception_print; - class Exception : public std::runtime_error { public: /** * Construct a new Exception instance. * @param message The exception message */ - explicit Exception(const std::string &message, bool print = true) - : std::runtime_error(message), type_(ExceptionType::INVALID) { + explicit Exception(const std::string &message) : std::runtime_error(message), type_(ExceptionType::INVALID) { #ifndef NDEBUG - if (print) { - std::string exception_message = "Message :: " + message + "\n"; - std::cerr << exception_message; - } + std::string exception_message = "Message :: " + message + "\n"; + std::cerr << exception_message; #endif } @@ -75,14 +69,12 @@ class Exception : public std::runtime_error { * @param exception_type The exception type * @param message The exception message */ - Exception(ExceptionType exception_type, const std::string &message, bool print = true) + Exception(ExceptionType exception_type, const std::string &message) : std::runtime_error(message), type_(exception_type) { #ifndef NDEBUG - if (print && !global_disable_execution_exception_print.load()) { - std::string exception_message = - "\nException Type :: " + ExceptionTypeToString(type_) + ", Message :: " + message + "\n\n"; - std::cerr << exception_message; - } + std::string exception_message = + "\nException Type :: " + ExceptionTypeToString(type_) + "\nMessage :: " + message + "\n"; + std::cerr << exception_message; #endif } @@ -112,8 +104,6 @@ class Exception : public std::runtime_error { return "Out of Memory"; case ExceptionType::NOT_IMPLEMENTED: return "Not implemented"; - case ExceptionType::EXECUTION: - return "Execution"; default: return "Unknown"; } @@ -132,7 +122,7 @@ class NotImplementedException : public Exception { class ExecutionException : public Exception { public: ExecutionException() = delete; - explicit ExecutionException(const std::string &msg) : Exception(ExceptionType::EXECUTION, msg, true) {} + explicit ExecutionException(const std::string &msg) : Exception(ExceptionType::EXECUTION, msg) {} }; } // namespace bustub diff --git a/src/include/common/logger.h b/src/include/common/logger.h index d68f16b..291a7fa 100644 --- a/src/include/common/logger.h +++ b/src/include/common/logger.h @@ -42,7 +42,7 @@ namespace bustub { using cstr = const char *; static constexpr auto PastLastSlash(cstr a, cstr b) -> cstr { - return *a == '\0' ? b : *a == '/' ? PastLastSlash(a + 1, a + 1) : PastLastSlash(a + 1, b); + return *a == '\0' ? b : *b == '/' ? PastLastSlash(a + 1, a + 1) : PastLastSlash(a + 1, b); } static constexpr auto PastLastSlash(cstr a) -> cstr { return PastLastSlash(a, a); } @@ -54,13 +54,13 @@ static constexpr auto PastLastSlash(cstr a) -> cstr { return PastLastSlash(a, a) }) // Log levels. -#define LOG_LEVEL_OFF 1000 -#define LOG_LEVEL_ERROR 500 -#define LOG_LEVEL_WARN 400 -#define LOG_LEVEL_INFO 300 -#define LOG_LEVEL_DEBUG 200 -#define LOG_LEVEL_TRACE 100 -#define LOG_LEVEL_ALL 0 +static constexpr int LOG_LEVEL_OFF = 1000; +static constexpr int LOG_LEVEL_ERROR = 500; +static constexpr int LOG_LEVEL_WARN = 400; +static constexpr int LOG_LEVEL_INFO = 300; +static constexpr int LOG_LEVEL_DEBUG = 200; +static constexpr int LOG_LEVEL_TRACE = 100; +static constexpr int LOG_LEVEL_ALL = 0; #define LOG_LOG_TIME_FORMAT "%Y-%m-%d %H:%M:%S" #define LOG_OUTPUT_STREAM stdout @@ -71,11 +71,11 @@ static constexpr auto PastLastSlash(cstr a) -> cstr { return PastLastSlash(a, a) // #pragma message("Warning: LOG_LEVEL compile option was not explicitly // given.") #ifndef NDEBUG -#define LOG_LEVEL LOG_LEVEL_DEBUG // #pragma message("LOG_LEVEL_DEBUG is used instead as DEBUG option is on.") +static constexpr int LOG_LEVEL = LOG_LEVEL_DEBUG; #else // #pragma message("LOG_LEVEL_WARN is used instead as DEBUG option is off.") -#define LOG_LEVEL LOG_LEVEL_INFO +static constexpr int LOG_LEVEL = LOG_LEVEL_INFO; #endif // #pragma message("Give LOG_LEVEL compile option to overwrite the default // level.") diff --git a/src/include/common/macros.h b/src/include/common/macros.h index 3db3ba6..4e01a43 100644 --- a/src/include/common/macros.h +++ b/src/include/common/macros.h @@ -13,7 +13,6 @@ #pragma once #include -#include #include namespace bustub { @@ -22,10 +21,9 @@ namespace bustub { #define UNIMPLEMENTED(message) throw std::logic_error(message) -#define BUSTUB_ENSURE(expr, message) \ - if (!(expr)) { \ - std::cerr << "ERROR: " << (message) << std::endl; \ - std::terminate(); \ +#define BUSTUB_ENSURE(expr, message) \ + if (!(expr)) { \ + throw std::logic_error(message); \ } #define UNREACHABLE(message) throw std::logic_error(message) diff --git a/src/include/common/util/hash_util.h b/src/include/common/util/hash_util.h index e584a94..469c9ce 100644 --- a/src/include/common/util/hash_util.h +++ b/src/include/common/util/hash_util.h @@ -88,7 +88,7 @@ class HashUtil { } case TypeId::VARCHAR: { auto raw = val->GetData(); - auto len = val->GetStorageSize(); + auto len = val->GetLength(); return HashBytes(raw, len); } case TypeId::TIMESTAMP: { diff --git a/src/include/common/util/string_util.h b/src/include/common/util/string_util.h index 37beffc..39a31e0 100644 --- a/src/include/common/util/string_util.h +++ b/src/include/common/util/string_util.h @@ -25,9 +25,6 @@ class StringUtil { /** @return true if haystack contains needle, false otherwise */ static auto Contains(const std::string &haystack, const std::string &needle) -> bool; - /** @return true if haystack contains needle after keyword, false otherwise */ - static auto ContainsAfter(const std::string &keyword, const std::string &haystack, const std::string &needle) -> bool; - /** @return true if target string starts with given prefix, false otherwise */ static auto StartsWith(const std::string &str, const std::string &prefix) -> bool; diff --git a/src/include/concurrency/lock_manager.h b/src/include/concurrency/lock_manager.h index aba2c2d..6c367cf 100644 --- a/src/include/concurrency/lock_manager.h +++ b/src/include/concurrency/lock_manager.h @@ -17,13 +17,13 @@ #include #include #include // NOLINT +#include #include #include #include #include #include "common/config.h" -#include "common/macros.h" #include "common/rid.h" #include "concurrency/transaction.h" @@ -77,28 +77,16 @@ class LockManager { /** * Creates a new lock manager configured for the deadlock detection policy. */ - LockManager() = default; - - void StartDeadlockDetection() { - BUSTUB_ENSURE(txn_manager_ != nullptr, "txn_manager_ is not set.") + LockManager() { enable_cycle_detection_ = true; cycle_detection_thread_ = new std::thread(&LockManager::RunCycleDetection, this); } -#ifndef DISABLE_LOCK_MANAGER ~LockManager() { - UnlockAll(); - enable_cycle_detection_ = false; - - if (cycle_detection_thread_ != nullptr) { - cycle_detection_thread_->join(); - delete cycle_detection_thread_; - } + cycle_detection_thread_->join(); + delete cycle_detection_thread_; } -#else - ~LockManager() = default; -#endif /** * [LOCK_NOTE] @@ -158,12 +146,8 @@ class LockManager { * - If requested lock mode is the same as that of the lock presently held, * Lock() should return true since it already has the lock. * - If requested lock mode is different, Lock() should upgrade the lock held by the transaction. - * - Basically there should be three steps to perform a lock upgrade in general - * - 1. Check the precondition of upgrade - * - 2. Drop the current lock, reserve the upgrade position - * - 3. Wait to get the new lock granted * - * A lock request being upgraded should be prioritized over other waiting lock requests on the same resource. + * A lock request being upgraded should be prioritised over other waiting lock requests on the same resource. * * While upgrading, only the following transitions should be allowed: * IS -> [S, X, IX, SIX] @@ -181,9 +165,6 @@ class LockManager { * BOOK KEEPING: * If a lock is granted to a transaction, lock manager should update its * lock sets appropriately (check transaction.h) - * - * You probably want to consider which type of lock to directly apply on table - * when implementing executor later */ /** @@ -280,10 +261,9 @@ class LockManager { * @param rid the RID that is locked by the transaction * @param oid the table_oid_t of the table the row belongs to * @param rid the RID of the row to be unlocked - * @param force unlock the tuple regardless of isolation level, not changing the transaction state * @return true if the unlock is successful, false otherwise */ - auto UnlockRow(Transaction *txn, const table_oid_t &oid, const RID &rid, bool force = false) -> bool; + auto UnlockRow(Transaction *txn, const table_oid_t &oid, const RID &rid) -> bool; /*** Graph API ***/ @@ -318,22 +298,60 @@ class LockManager { */ auto RunCycleDetection() -> void; - TransactionManager *txn_manager_; + auto GrantLock(const std::shared_ptr &lock_request, + const std::shared_ptr &lock_request_queue) -> bool; - private: - /** Spring 2023 */ - /* You are allowed to modify all functions below. */ - auto UpgradeLockTable(Transaction *txn, LockMode lock_mode, const table_oid_t &oid) -> bool; - auto UpgradeLockRow(Transaction *txn, LockMode lock_mode, const table_oid_t &oid, const RID &rid) -> bool; - auto AreLocksCompatible(LockMode l1, LockMode l2) -> bool; - auto CanTxnTakeLock(Transaction *txn, LockMode lock_mode) -> bool; - void GrantNewLocksIfPossible(LockRequestQueue *lock_request_queue); - auto CanLockUpgrade(LockMode curr_lock_mode, LockMode requested_lock_mode) -> bool; - auto CheckAppropriateLockOnTable(Transaction *txn, const table_oid_t &oid, LockMode row_lock_mode) -> bool; - auto FindCycle(txn_id_t source_txn, std::vector &path, std::unordered_set &on_path, - std::unordered_set &visited, txn_id_t *abort_txn_id) -> bool; - void UnlockAll(); + auto InsertOrDeleteTableLockSet(Transaction *txn, const std::shared_ptr &lock_request, bool insert) + -> void; + + auto InsertOrDeleteRowLockSet(Transaction *txn, const std::shared_ptr &lock_request, bool insert) + -> void; + + auto InsertRowLockSet(const std::shared_ptr>> &lock_set, + const table_oid_t &oid, const RID &rid) -> void { + auto row_lock_set = lock_set->find(oid); + if (row_lock_set == lock_set->end()) { + lock_set->emplace(oid, std::unordered_set{}); + row_lock_set = lock_set->find(oid); + } + row_lock_set->second.emplace(rid); + } + + auto DeleteRowLockSet(const std::shared_ptr>> &lock_set, + const table_oid_t &oid, const RID &rid) -> void { + auto row_lock_set = lock_set->find(oid); + if (row_lock_set == lock_set->end()) { + return; + } + row_lock_set->second.erase(rid); + } + auto Dfs(txn_id_t txn_id) -> bool { + if (safe_set_.find(txn_id) != safe_set_.end()) { + return false; + } + active_set_.insert(txn_id); + + std::vector &next_node_vector = waits_for_[txn_id]; + std::sort(next_node_vector.begin(), next_node_vector.end()); + for (txn_id_t const next_node : next_node_vector) { + if (active_set_.find(next_node) != active_set_.end()) { + return true; + } + if (Dfs(next_node)) { + return true; + } + } + + active_set_.erase(txn_id); + safe_set_.insert(txn_id); + return false; + } + + auto DeleteNode(txn_id_t txn_id) -> void; + + private: + /** Fall 2022 */ /** Structure that holds lock requests for a given table oid */ std::unordered_map> table_lock_map_; /** Coordination */ @@ -349,33 +367,13 @@ class LockManager { /** Waits-for graph representation. */ std::unordered_map> waits_for_; std::mutex waits_for_latch_; -}; -} // namespace bustub + std::set safe_set_; + std::set txn_set_; + std::unordered_set active_set_; -template <> -struct fmt::formatter : formatter { - // parse is inherited from formatter. - template - auto format(bustub::LockManager::LockMode x, FormatContext &ctx) const { - string_view name = "unknown"; - switch (x) { - case bustub::LockManager::LockMode::EXCLUSIVE: - name = "EXCLUSIVE"; - break; - case bustub::LockManager::LockMode::INTENTION_EXCLUSIVE: - name = "INTENTION_EXCLUSIVE"; - break; - case bustub::LockManager::LockMode::SHARED: - name = "SHARED"; - break; - case bustub::LockManager::LockMode::INTENTION_SHARED: - name = "INTENTION_SHARED"; - break; - case bustub::LockManager::LockMode::SHARED_INTENTION_EXCLUSIVE: - name = "SHARED_INTENTION_EXCLUSIVE"; - break; - } - return formatter::format(name, ctx); - } + std::unordered_map map_txn_rid_; + std::unordered_map map_txn_oid_; }; + +} // namespace bustub diff --git a/src/include/concurrency/transaction.h b/src/include/concurrency/transaction.h index faa486a..cbe2dd1 100644 --- a/src/include/concurrency/transaction.h +++ b/src/include/concurrency/transaction.h @@ -12,75 +12,145 @@ #pragma once -#include #include -#include -#include #include -#include -#include #include -#include // NOLINT #include #include // NOLINT #include #include -#include -#include #include "common/config.h" #include "common/logger.h" -#include "execution/expressions/abstract_expression.h" #include "storage/page/page.h" #include "storage/table/tuple.h" namespace bustub { -class TransactionManager; +/** + * Transaction states for 2PL: + * + * _________________________ + * | v + * GROWING -> SHRINKING -> COMMITTED ABORTED + * |__________|________________________^ + * + * Transaction states for Non-2PL: + * __________ + * | v + * GROWING -> COMMITTED ABORTED + * |_________________________^ + * + **/ +enum class TransactionState { GROWING, SHRINKING, COMMITTED, ABORTED }; /** - * Transaction State. + * Transaction isolation level. */ -enum class TransactionState { RUNNING = 0, TAINTED, COMMITTED = 100, ABORTED }; +enum class IsolationLevel { READ_UNCOMMITTED, REPEATABLE_READ, READ_COMMITTED }; /** - * Transaction isolation level. READ_UNCOMMITTED will NOT be used in project 3/4 as of Fall 2023. + * Type of write operation. */ -enum class IsolationLevel { READ_UNCOMMITTED, SNAPSHOT_ISOLATION, SERIALIZABLE }; +enum class WType { INSERT = 0, DELETE, UPDATE }; class TableHeap; class Catalog; using table_oid_t = uint32_t; using index_oid_t = uint32_t; -/** Represents a link to a previous version of this tuple */ -struct UndoLink { - /* Previous version can be found in which txn */ - txn_id_t prev_txn_{INVALID_TXN_ID}; - /* The log index of the previous version in `prev_txn_` */ - int prev_log_idx_{0}; +/** + * WriteRecord tracks information related to a write. + */ +class TableWriteRecord { + public: + TableWriteRecord(RID rid, WType wtype, const Tuple &tuple, TableHeap *table) + : rid_(rid), wtype_(wtype), tuple_(tuple), table_(table) {} - friend auto operator==(const UndoLink &a, const UndoLink &b) { - return a.prev_txn_ == b.prev_txn_ && a.prev_log_idx_ == b.prev_log_idx_; - } + RID rid_; + WType wtype_; + /** The tuple is only used for the update operation. */ + Tuple tuple_; + /** The table heap specifies which table this write record is for. */ + TableHeap *table_; +}; - friend auto operator!=(const UndoLink &a, const UndoLink &b) { return !(a == b); } +/** + * WriteRecord tracks information related to a write. + */ +class IndexWriteRecord { + public: + IndexWriteRecord(RID rid, table_oid_t table_oid, WType wtype, const Tuple &tuple, index_oid_t index_oid, + Catalog *catalog) + : rid_(rid), table_oid_(table_oid), wtype_(wtype), tuple_(tuple), index_oid_(index_oid), catalog_(catalog) {} + + /** The rid is the value stored in the index. */ + RID rid_; + /** Table oid. */ + table_oid_t table_oid_; + /** Write type. */ + WType wtype_; + /** The tuple is used to construct an index key. */ + Tuple tuple_; + /** The old tuple is only used for the update operation. */ + Tuple old_tuple_; + /** Each table has an index list, this is the identifier of an index into the list. */ + index_oid_t index_oid_; + /** The catalog contains metadata required to locate index. */ + Catalog *catalog_; +}; - /* Checks if the undo link points to something. */ - auto IsValid() const -> bool { return prev_txn_ != INVALID_TXN_ID; } +/** + * Reason to a transaction abortion + */ +enum class AbortReason { + LOCK_ON_SHRINKING, + UPGRADE_CONFLICT, + LOCK_SHARED_ON_READ_UNCOMMITTED, + TABLE_LOCK_NOT_PRESENT, + ATTEMPTED_INTENTION_LOCK_ON_ROW, + TABLE_UNLOCKED_BEFORE_UNLOCKING_ROWS, + INCOMPATIBLE_UPGRADE, + ATTEMPTED_UNLOCK_BUT_NO_LOCK_HELD }; -struct UndoLog { - /* Whether this log is a deletion marker */ - bool is_deleted_; - /* The fields modified by this undo log */ - std::vector modified_fields_; - /* The modified fields */ - Tuple tuple_; - /* Timestamp of this undo log */ - timestamp_t ts_{INVALID_TS}; - /* Undo log prev version */ - UndoLink prev_version_{}; +/** + * TransactionAbortException is thrown when state of a transaction is changed to ABORTED + */ +class TransactionAbortException : public std::exception { + txn_id_t txn_id_; + AbortReason abort_reason_; + + public: + explicit TransactionAbortException(txn_id_t txn_id, AbortReason abort_reason) + : txn_id_(txn_id), abort_reason_(abort_reason) {} + auto GetTransactionId() -> txn_id_t { return txn_id_; } + auto GetAbortReason() -> AbortReason { return abort_reason_; } + auto GetInfo() -> std::string { + switch (abort_reason_) { + case AbortReason::LOCK_ON_SHRINKING: + return "Transaction " + std::to_string(txn_id_) + + " aborted because it can not take locks in the shrinking state\n"; + case AbortReason::UPGRADE_CONFLICT: + return "Transaction " + std::to_string(txn_id_) + + " aborted because another transaction is already waiting to upgrade its lock\n"; + case AbortReason::LOCK_SHARED_ON_READ_UNCOMMITTED: + return "Transaction " + std::to_string(txn_id_) + " aborted on lockshared on READ_UNCOMMITTED\n"; + case AbortReason::TABLE_LOCK_NOT_PRESENT: + return "Transaction " + std::to_string(txn_id_) + " aborted because table lock not present\n"; + case AbortReason::ATTEMPTED_INTENTION_LOCK_ON_ROW: + return "Transaction " + std::to_string(txn_id_) + " aborted because intention lock attempted on row\n"; + case AbortReason::TABLE_UNLOCKED_BEFORE_UNLOCKING_ROWS: + return "Transaction " + std::to_string(txn_id_) + + " aborted because table locks dropped before dropping row locks\n"; + case AbortReason::INCOMPATIBLE_UPGRADE: + return "Transaction " + std::to_string(txn_id_) + " aborted because attempted lock upgrade is incompatible\n"; + case AbortReason::ATTEMPTED_UNLOCK_BUT_NO_LOCK_HELD: + return "Transaction " + std::to_string(txn_id_) + " aborted because attempted to unlock but no lock held \n"; + } + // Todo: Should fail with unreachable. + return ""; + } }; /** @@ -88,8 +158,26 @@ struct UndoLog { */ class Transaction { public: - explicit Transaction(txn_id_t txn_id, IsolationLevel isolation_level = IsolationLevel::SNAPSHOT_ISOLATION) - : isolation_level_(isolation_level), thread_id_(std::this_thread::get_id()), txn_id_(txn_id) {} + explicit Transaction(txn_id_t txn_id, IsolationLevel isolation_level = IsolationLevel::REPEATABLE_READ) + : isolation_level_(isolation_level), + thread_id_(std::this_thread::get_id()), + txn_id_(txn_id), + prev_lsn_(INVALID_LSN), + shared_lock_set_{new std::unordered_set}, + exclusive_lock_set_{new std::unordered_set}, + s_table_lock_set_{new std::unordered_set}, + x_table_lock_set_{new std::unordered_set}, + is_table_lock_set_{new std::unordered_set}, + ix_table_lock_set_{new std::unordered_set}, + six_table_lock_set_{new std::unordered_set}, + s_row_lock_set_{new std::unordered_map>}, + x_row_lock_set_{new std::unordered_map>} { + // Initialize the sets that will be tracked. + table_write_set_ = std::make_shared>(); + index_write_set_ = std::make_shared>(); + page_set_ = std::make_shared>(); + deleted_page_set_ = std::make_shared>(); + } ~Transaction() = default; @@ -101,157 +189,179 @@ class Transaction { /** @return the id of this transaction */ inline auto GetTransactionId() const -> txn_id_t { return txn_id_; } - /** @return the id of this transaction, stripping the highest bit. NEVER use/store this value unless for debugging. */ - inline auto GetTransactionIdHumanReadable() const -> txn_id_t { return txn_id_ ^ TXN_START_ID; } - - /** @return the temporary timestamp of this transaction */ - inline auto GetTransactionTempTs() const -> timestamp_t { return txn_id_; } - /** @return the isolation level of this transaction */ inline auto GetIsolationLevel() const -> IsolationLevel { return isolation_level_; } - /** @return the transaction state */ - inline auto GetTransactionState() const -> TransactionState { return state_; } + /** @return the list of table write records of this transaction */ + inline auto GetWriteSet() -> std::shared_ptr> { return table_write_set_; } - /** @return the read ts */ - inline auto GetReadTs() const -> timestamp_t { return read_ts_; } + /** @return the list of index write records of this transaction */ + inline auto GetIndexWriteSet() -> std::shared_ptr> { return index_write_set_; } - /** @return the commit ts */ - inline auto GetCommitTs() const -> timestamp_t { return commit_ts_; } + /** @return the page set */ + inline auto GetPageSet() -> std::shared_ptr> { return page_set_; } - /** Modify an existing undo log. */ - inline auto ModifyUndoLog(int log_idx, UndoLog new_log) { - std::scoped_lock lck(latch_); - undo_logs_[log_idx] = std::move(new_log); + /** + * Adds a tuple write record into the table write set. + * @param write_record write record to be added + */ + inline void AppendTableWriteRecord(const TableWriteRecord &write_record) { + table_write_set_->push_back(write_record); } - /** @return the index of the undo log in this transaction */ - inline auto AppendUndoLog(UndoLog log) -> UndoLink { - std::scoped_lock lck(latch_); - undo_logs_.emplace_back(std::move(log)); - return {txn_id_, static_cast(undo_logs_.size() - 1)}; + /** + * Adds an index write record into the index write set. + * @param write_record write record to be added + */ + inline void AppendIndexWriteRecord(const IndexWriteRecord &write_record) { + index_write_set_->push_back(write_record); } - inline auto AppendWriteSet(table_oid_t t, RID rid) { - std::scoped_lock lck(latch_); - write_set_[t].insert(rid); + /** + * Adds a page into the page set. + * @param page page to be added + */ + inline void AddIntoPageSet(Page *page) { page_set_->push_back(page); } + + /** @return the deleted page set */ + inline auto GetDeletedPageSet() -> std::shared_ptr> { return deleted_page_set_; } + + /** + * Adds a page to the deleted page set. + * @param page_id id of the page to be marked as deleted + */ + inline void AddIntoDeletedPageSet(page_id_t page_id) { deleted_page_set_->insert(page_id); } + + /** @return the set of resources under a shared lock */ + inline auto GetSharedLockSet() -> std::shared_ptr> { return shared_lock_set_; } + + /** @return the set of rows under a shared lock */ + inline auto GetSharedRowLockSet() -> std::shared_ptr>> { + return s_row_lock_set_; } - inline auto GetWriteSets() -> const std::unordered_map> & { return write_set_; } + /** @return the set of resources under an exclusive lock */ + inline auto GetExclusiveLockSet() -> std::shared_ptr> { return exclusive_lock_set_; } - inline auto AppendScanPredicate(table_oid_t t, const AbstractExpressionRef &predicate) { - std::scoped_lock lck(latch_); - scan_predicates_[t].emplace_back(predicate); + /** @return the set of rows in under an exclusive lock */ + inline auto GetExclusiveRowLockSet() -> std::shared_ptr>> { + return x_row_lock_set_; } - inline auto GetScanPredicates() -> const std::unordered_map> & { - return scan_predicates_; + /** @return the set of resources under a shared lock */ + inline auto GetSharedTableLockSet() -> std::shared_ptr> { return s_table_lock_set_; } + inline auto GetExclusiveTableLockSet() -> std::shared_ptr> { + return x_table_lock_set_; + } + inline auto GetIntentionSharedTableLockSet() -> std::shared_ptr> { + return is_table_lock_set_; + } + inline auto GetIntentionExclusiveTableLockSet() -> std::shared_ptr> { + return ix_table_lock_set_; + } + inline auto GetSharedIntentionExclusiveTableLockSet() -> std::shared_ptr> { + return six_table_lock_set_; } - inline auto GetUndoLog(size_t log_id) -> UndoLog { - std::scoped_lock lck(latch_); - return undo_logs_[log_id]; + /** @return true if rid (belong to table oid) is shared locked by this transaction */ + auto IsRowSharedLocked(const table_oid_t &oid, const RID &rid) -> bool { + auto row_lock_set = s_row_lock_set_->find(oid); + if (row_lock_set == s_row_lock_set_->end()) { + return false; + } + return row_lock_set->second.find(rid) != row_lock_set->second.end(); } - inline auto GetUndoLogNum() -> size_t { - std::scoped_lock lck(latch_); - return undo_logs_.size(); + /** @return true if rid (belong to table oid) is exclusive locked by this transaction */ + auto IsRowExclusiveLocked(const table_oid_t &oid, const RID &rid) -> bool { + auto row_lock_set = x_row_lock_set_->find(oid); + if (row_lock_set == x_row_lock_set_->end()) { + return false; + } + return row_lock_set->second.find(rid) != row_lock_set->second.end(); } - /** Use this function in leaderboard benchmarks for online garbage collection. For stop-the-world GC, simply remove - * the txn from the txn_map. */ - inline auto ClearUndoLog() -> size_t { - std::scoped_lock lck(latch_); - return undo_logs_.size(); + auto IsTableIntentionSharedLocked(const table_oid_t &oid) -> bool { + return is_table_lock_set_->find(oid) != is_table_lock_set_->end(); } - void SetTainted(); + auto IsTableSharedLocked(const table_oid_t &oid) -> bool { + return s_table_lock_set_->find(oid) != s_table_lock_set_->end(); + } - private: - friend class TransactionManager; + auto IsTableIntentionExclusiveLocked(const table_oid_t &oid) -> bool { + return ix_table_lock_set_->find(oid) != ix_table_lock_set_->end(); + } - // The below fields should be ONLY changed by txn manager (with the txn manager lock held). + auto IsTableExclusiveLocked(const table_oid_t &oid) -> bool { + return x_table_lock_set_->find(oid) != x_table_lock_set_->end(); + } - /** The state of this transaction. */ - std::atomic state_{TransactionState::RUNNING}; + auto IsTableSharedIntentionExclusiveLocked(const table_oid_t &oid) -> bool { + return six_table_lock_set_->find(oid) != six_table_lock_set_->end(); + } - /** The read ts */ - std::atomic read_ts_{0}; + /** @return the current state of the transaction */ + inline auto GetState() -> TransactionState { return state_; } - /** The commit ts */ - std::atomic commit_ts_{INVALID_TS}; + inline auto LockTxn() -> void { latch_.lock(); } - /** The latch for this transaction for accessing txn-level undo logs, protecting all fields below. */ - std::mutex latch_; + inline auto UnlockTxn() -> void { latch_.unlock(); } /** - * @brief Store undo logs. Other undo logs / table heap will store (txn_id, index) pairs, and therefore - * you should only append to this vector or update things in-place without removing anything. + * Set the state of the transaction. + * @param state new state */ - std::vector undo_logs_; + inline void SetState(TransactionState state) { state_ = state; } - /** stores the RID of write tuples */ - std::unordered_map> write_set_; - /** store all scan predicates */ - std::unordered_map> scan_predicates_; + /** @return the previous LSN */ + inline auto GetPrevLSN() -> lsn_t { return prev_lsn_; } - // The below fields are set when a txn is created and will NEVER be changed. + /** + * Set the previous LSN. + * @param prev_lsn new previous lsn + */ + inline void SetPrevLSN(lsn_t prev_lsn) { prev_lsn_ = prev_lsn; } + private: + /** The current transaction state. */ + TransactionState state_{TransactionState::GROWING}; /** The isolation level of the transaction. */ - const IsolationLevel isolation_level_; - - /** The thread ID which the txn starts from. */ - const std::thread::id thread_id_; - + IsolationLevel isolation_level_; + /** The thread ID, used in single-threaded transactions. */ + std::thread::id thread_id_; /** The ID of this transaction. */ - const txn_id_t txn_id_; -}; + txn_id_t txn_id_; -} // namespace bustub + /** The undo set of table tuples. */ + std::shared_ptr> table_write_set_; + /** The undo set of indexes. */ + std::shared_ptr> index_write_set_; + /** The LSN of the last record written by the transaction. */ + lsn_t prev_lsn_; -template <> -struct fmt::formatter : formatter { - // parse is inherited from formatter. - template - auto format(bustub::IsolationLevel x, FormatContext &ctx) const { - using bustub::IsolationLevel; - string_view name = "unknown"; - switch (x) { - case IsolationLevel::READ_UNCOMMITTED: - name = "READ_UNCOMMITTED"; - break; - case IsolationLevel::SNAPSHOT_ISOLATION: - name = "SNAPSHOT_ISOLATION"; - break; - case IsolationLevel::SERIALIZABLE: - name = "SERIALIZABLE"; - break; - } - return formatter::format(name, ctx); - } -}; + std::mutex latch_; -template <> -struct fmt::formatter : formatter { - // parse is inherited from formatter. - template - auto format(bustub::TransactionState x, FormatContext &ctx) const { - using bustub::TransactionState; - string_view name = "unknown"; - switch (x) { - case TransactionState::RUNNING: - name = "RUNNING"; - break; - case TransactionState::ABORTED: - name = "ABORTED"; - break; - case TransactionState::COMMITTED: - name = "COMMITTED"; - break; - case TransactionState::TAINTED: - name = "TAINTED"; - break; - } - return formatter::format(name, ctx); - } + /** Concurrent index: the pages that were latched during index operation. */ + std::shared_ptr> page_set_; + /** Concurrent index: the page IDs that were deleted during index operation.*/ + std::shared_ptr> deleted_page_set_; + + /** LockManager: the set of shared-locked tuples held by this transaction. */ + std::shared_ptr> shared_lock_set_; + /** LockManager: the set of exclusive-locked tuples held by this transaction. */ + std::shared_ptr> exclusive_lock_set_; + + /** LockManager: the set of table locks held by this transaction. */ + std::shared_ptr> s_table_lock_set_; + std::shared_ptr> x_table_lock_set_; + std::shared_ptr> is_table_lock_set_; + std::shared_ptr> ix_table_lock_set_; + std::shared_ptr> six_table_lock_set_; + + /** LockManager: the set of row locks held by this transaction. */ + std::shared_ptr>> s_row_lock_set_; + std::shared_ptr>> x_row_lock_set_; }; + +} // namespace bustub diff --git a/src/include/concurrency/transaction_manager.h b/src/include/concurrency/transaction_manager.h index 6a33ffa..aec37c4 100644 --- a/src/include/concurrency/transaction_manager.h +++ b/src/include/concurrency/transaction_manager.h @@ -13,146 +13,133 @@ #pragma once #include -#include -#include -#include // NOLINT -#include #include #include #include -#include "catalog/schema.h" #include "common/config.h" +#include "concurrency/lock_manager.h" #include "concurrency/transaction.h" -#include "concurrency/watermark.h" #include "recovery/log_manager.h" -#include "storage/table/tuple.h" namespace bustub { - -/// The first undo link in the version chain, that links table heap tuple to the undo log. -struct VersionUndoLink { - /** The next version in the version chain. */ - UndoLink prev_; - /** Whether a transaction is modifying the version link. Fall 2023: you do not need to read / write this field until - * task 4.2. */ - bool in_progress_{false}; - - friend auto operator==(const VersionUndoLink &a, const VersionUndoLink &b) { - return a.prev_ == b.prev_ && a.in_progress_ == b.in_progress_; - } - - friend auto operator!=(const VersionUndoLink &a, const VersionUndoLink &b) { return !(a == b); } - - inline static auto FromOptionalUndoLink(std::optional undo_link) -> std::optional { - if (undo_link.has_value()) { - return VersionUndoLink{*undo_link}; - } - return std::nullopt; - } -}; +class LockManager; /** * TransactionManager keeps track of all the transactions running in the system. */ class TransactionManager { public: - TransactionManager() = default; + explicit TransactionManager(LockManager *lock_manager, LogManager *log_manager = nullptr) + : lock_manager_(lock_manager), log_manager_(log_manager) {} + ~TransactionManager() = default; /** * Begins a new transaction. + * @param txn an optional transaction object to be initialized, otherwise a new transaction is created. * @param isolation_level an optional isolation level of the transaction. * @return an initialized transaction */ - auto Begin(IsolationLevel isolation_level = IsolationLevel::SNAPSHOT_ISOLATION) -> Transaction *; + auto Begin(Transaction *txn = nullptr, IsolationLevel isolation_level = IsolationLevel::REPEATABLE_READ) + -> Transaction *; /** * Commits a transaction. - * @param txn the transaction to commit, the txn will be managed by the txn manager so no need to delete it by - * yourself + * @param txn the transaction to commit */ - auto Commit(Transaction *txn) -> bool; + void Commit(Transaction *txn); /** * Aborts a transaction - * @param txn the transaction to abort, the txn will be managed by the txn manager so no need to delete it by yourself + * @param txn the transaction to abort */ void Abort(Transaction *txn); /** - * @brief Use this function before task 4.2. Update an undo link that links table heap tuple to the first undo log. - * Before updating, `check` function will be called to ensure validity. + * Global list of running transactions */ - auto UpdateUndoLink(RID rid, std::optional prev_link, - std::function)> &&check = nullptr) -> bool; + + /** The transaction map is a global list of all the running transactions in the system. */ + static std::unordered_map txn_map; + static std::shared_mutex txn_map_mutex; /** - * @brief Use this function after task 4.2. Update an undo link that links table heap tuple to the first undo log. - * Before updating, `check` function will be called to ensure validity. + * Locates and returns the transaction with the given transaction ID. + * @param txn_id the id of the transaction to be found, it must exist! + * @return the transaction with the given transaction id */ - auto UpdateVersionLink(RID rid, std::optional prev_version, - std::function)> &&check = nullptr) -> bool; - - /** @brief Get the first undo log of a table heap tuple. Use this before task 4.2 */ - auto GetUndoLink(RID rid) -> std::optional; - - /** @brief Get the first undo log of a table heap tuple. Use this after task 4.2 */ - auto GetVersionLink(RID rid) -> std::optional; - - /** @brief Access the transaction undo log buffer and get the undo log. Return nullopt if the txn does not exist. Will - * still throw an exception if the index is out of range. */ - auto GetUndoLogOptional(UndoLink link) -> std::optional; - - /** @brief Access the transaction undo log buffer and get the undo log. Except when accessing the current txn buffer, - * you should always call this function to get the undo log instead of manually retrieve the txn shared_ptr and access - * the buffer. */ - auto GetUndoLog(UndoLink link) -> UndoLog; - - /** @brief Get the lowest read timestamp in the system. */ - auto GetWatermark() -> timestamp_t { return running_txns_.GetWatermark(); } - - /** @brief Stop-the-world garbage collection. Will be called only when all transactions are not accessing the table - * heap. */ - void GarbageCollection(); + static auto GetTransaction(txn_id_t txn_id) -> Transaction * { + std::shared_lock l(TransactionManager::txn_map_mutex); + assert(TransactionManager::txn_map.find(txn_id) != TransactionManager::txn_map.end()); + auto *res = TransactionManager::txn_map[txn_id]; + assert(res != nullptr); + return res; + } - /** protects txn map */ - std::shared_mutex txn_map_mutex_; - /** All transactions, running or committed */ - std::unordered_map> txn_map_; + /** Prevents all transactions from performing operations, used for checkpointing. */ + void BlockAllTransactions(); - struct PageVersionInfo { - /** protects the map */ - std::shared_mutex mutex_; - /** Stores previous version info for all slots. Note: DO NOT use `[x]` to access it because - * it will create new elements even if it does not exist. Use `find` instead. - */ - std::unordered_map prev_version_; - }; + /** Resumes all transactions, used for checkpointing. */ + void ResumeTransactions(); - /** protects version info */ - std::shared_mutex version_info_mutex_; - /** Stores the previous version of each tuple in the table heap. Do not directly access this field. Use the helper - * functions in `transaction_manager_impl.cpp`. */ - std::unordered_map> version_info_; + private: + /** + * Releases all the locks held by the given transaction. + * @param txn the transaction whose locks should be released + */ + void ReleaseLocks(Transaction *txn) { + /** Drop all row locks */ + txn->LockTxn(); + std::unordered_map> row_lock_set; + for (const auto &s_row_lock_set : *txn->GetSharedRowLockSet()) { + for (auto rid : s_row_lock_set.second) { + row_lock_set[s_row_lock_set.first].emplace(rid); + } + } + for (const auto &x_row_lock_set : *txn->GetExclusiveRowLockSet()) { + for (auto rid : x_row_lock_set.second) { + row_lock_set[x_row_lock_set.first].emplace(rid); + } + } - /** Stores all the read_ts of running txns so as to facilitate garbage collection. */ - Watermark running_txns_{0}; + /** Drop all table locks */ + std::unordered_set table_lock_set; + for (auto oid : *txn->GetSharedTableLockSet()) { + table_lock_set.emplace(oid); + } + for (table_oid_t oid : *(txn->GetIntentionSharedTableLockSet())) { + table_lock_set.emplace(oid); + } + for (auto oid : *txn->GetExclusiveTableLockSet()) { + table_lock_set.emplace(oid); + } + for (auto oid : *txn->GetIntentionExclusiveTableLockSet()) { + table_lock_set.emplace(oid); + } + for (auto oid : *txn->GetSharedIntentionExclusiveTableLockSet()) { + table_lock_set.emplace(oid); + } + txn->UnlockTxn(); - /** Only one txn is allowed to commit at a time */ - std::mutex commit_mutex_; - /** The last committed timestamp. */ - std::atomic last_commit_ts_{0}; + for (const auto &locked_table_row_set : row_lock_set) { + table_oid_t oid = locked_table_row_set.first; + for (auto rid : locked_table_row_set.second) { + lock_manager_->UnlockRow(txn, oid, rid); + } + } - /** Catalog */ - Catalog *catalog_; + for (auto oid : table_lock_set) { + lock_manager_->UnlockTable(txn, oid); + } + } - std::atomic next_txn_id_{TXN_START_ID}; + std::atomic next_txn_id_{0}; + LockManager *lock_manager_ __attribute__((__unused__)); + LogManager *log_manager_ __attribute__((__unused__)); - private: - /** @brief Verify if a txn satisfies serializability. We will not test this function and you can change / remove it as - * you want. */ - auto VerifyTxn(Transaction *txn) -> bool; + /** The global transaction latch is used for checkpointing. */ + ReaderWriterLatch global_txn_latch_; }; } // namespace bustub diff --git a/src/include/concurrency/watermark.h b/src/include/concurrency/watermark.h deleted file mode 100644 index c171cd6..0000000 --- a/src/include/concurrency/watermark.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include - -#include "concurrency/transaction.h" -#include "storage/table/tuple.h" - -namespace bustub { - -/** - * @brief tracks all the read timestamps. - * - */ -class Watermark { - public: - explicit Watermark(timestamp_t commit_ts) : commit_ts_(commit_ts), watermark_(commit_ts) {} - - auto AddTxn(timestamp_t read_ts) -> void; - - auto RemoveTxn(timestamp_t read_ts) -> void; - - /** The caller should update commit ts before removing the txn from the watermark so that we can track watermark - * correctly. */ - auto UpdateCommitTs(timestamp_t commit_ts) { commit_ts_ = commit_ts; } - - auto GetWatermark() -> timestamp_t { - if (current_reads_.empty()) { - return commit_ts_; - } - return watermark_; - } - - timestamp_t commit_ts_; - - timestamp_t watermark_; - - std::unordered_map current_reads_; -}; - -}; // namespace bustub diff --git a/src/include/container/disk/hash/disk_extendible_hash_table.h b/src/include/container/disk/hash/disk_extendible_hash_table.h index ba83ae7..e0ebcd5 100644 --- a/src/include/container/disk/hash/disk_extendible_hash_table.h +++ b/src/include/container/disk/hash/disk_extendible_hash_table.h @@ -6,99 +6,83 @@ // // Identification: src/include/container/disk/hash/extendible_hash_table.h // -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group +// Copyright (c) 2015-2021, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #pragma once -#include #include #include -#include #include #include "buffer/buffer_pool_manager.h" -#include "common/config.h" #include "concurrency/transaction.h" #include "container/hash/hash_function.h" -#include "storage/page/extendible_htable_bucket_page.h" -#include "storage/page/extendible_htable_directory_page.h" -#include "storage/page/extendible_htable_header_page.h" -#include "storage/page/page_guard.h" +#include "storage/page/hash_table_bucket_page.h" +#include "storage/page/hash_table_directory_page.h" namespace bustub { +#define HASH_TABLE_TYPE DiskExtendibleHashTable + /** * Implementation of extendible hash table that is backed by a buffer pool * manager. Non-unique keys are supported. Supports insert and delete. The * table grows/shrinks dynamically as buckets become full/empty. */ -template +template class DiskExtendibleHashTable { public: /** - * @brief Creates a new DiskExtendibleHashTable. + * Creates a new DiskExtendibleHashTable. * - * @param name - * @param bpm buffer pool manager to be used - * @param cmp comparator for keys + * @param buffer_pool_manager buffer pool manager to be used + * @param comparator comparator for keys * @param hash_fn the hash function - * @param header_max_depth the max depth allowed for the header page - * @param directory_max_depth the max depth allowed for the directory page - * @param bucket_max_size the max size allowed for the bucket page array */ - explicit DiskExtendibleHashTable(const std::string &name, BufferPoolManager *bpm, const KC &cmp, - const HashFunction &hash_fn, uint32_t header_max_depth = HTABLE_HEADER_MAX_DEPTH, - uint32_t directory_max_depth = HTABLE_DIRECTORY_MAX_DEPTH, - uint32_t bucket_max_size = HTableBucketArraySize(sizeof(std::pair))); + explicit DiskExtendibleHashTable(const std::string &name, BufferPoolManager *buffer_pool_manager, + const KeyComparator &comparator, HashFunction hash_fn); - /** TODO(P2): Add implementation + /** * Inserts a key-value pair into the hash table. * + * @param transaction the current transaction * @param key the key to create * @param value the value to be associated with the key - * @param transaction the current transaction * @return true if insert succeeded, false otherwise */ - auto Insert(const K &key, const V &value, Transaction *transaction = nullptr) -> bool; + auto Insert(Transaction *transaction, const KeyType &key, const ValueType &value) -> bool; - /** TODO(P2): Add implementation - * Removes a key-value pair from the hash table. + /** + * Deletes the associated value for the given key. * + * @param transaction the current transaction * @param key the key to delete * @param value the value to delete - * @param transaction the current transaction * @return true if remove succeeded, false otherwise */ - auto Remove(const K &key, Transaction *transaction = nullptr) -> bool; + auto Remove(Transaction *transaction, const KeyType &key, const ValueType &value) -> bool; - /** TODO(P2): Add implementation - * Get the value associated with a given key in the hash table. - * - * Note(fall2023): This semester you will only need to support unique key-value pairs. + /** + * Performs a point query on the hash table. * + * @param transaction the current transaction * @param key the key to look up * @param[out] result the value(s) associated with a given key - * @param transaction the current transaction * @return the value(s) associated with the given key */ - auto GetValue(const K &key, std::vector *result, Transaction *transaction = nullptr) const -> bool; + auto GetValue(Transaction *transaction, const KeyType &key, std::vector *result) -> bool; /** - * Helper function to verify the integrity of the extendible hash table's directory. + * Returns the global depth */ - void VerifyIntegrity() const; + auto GetGlobalDepth() -> uint32_t; /** - * Helper function to expose the header page id. - */ - auto GetHeaderPageId() const -> page_id_t; - - /** - * Helper function to print out the HashTable. + * Helper function to verify the integrity of the extendible hash table's directory. */ - void PrintHT() const; + void VerifyIntegrity(); private: /** @@ -106,32 +90,85 @@ class DiskExtendibleHashTable { * for extendible hashing. * * @param key the key to hash - * @return the down-casted 32-bit hash + * @return the downcasted 32-bit hash */ - auto Hash(K key) const -> uint32_t; + inline auto Hash(KeyType key) -> uint32_t; - auto InsertToNewDirectory(ExtendibleHTableHeaderPage *header, uint32_t directory_idx, uint32_t hash, const K &key, - const V &value) -> bool; + /** + * KeyToDirectoryIndex - maps a key to a directory index + * + * In Extendible Hashing we map a key to a directory index + * using the following hash + mask function. + * + * DirectoryIndex = Hash(key) & GLOBAL_DEPTH_MASK + * + * where GLOBAL_DEPTH_MASK is a mask with exactly GLOBAL_DEPTH 1's from LSB + * upwards. For example, global depth 3 corresponds to 0x00000007 in a 32-bit + * representation. + * + * @param key the key to use for lookup + * @param dir_page to use for lookup of global depth + * @return the directory index + */ + auto KeyToDirectoryIndex(KeyType key, HashTableDirectoryPage *dir_page) -> uint32_t; + + /** + * Get the bucket page_id corresponding to a key. + * + * @param key the key for lookup + * @param dir_page a pointer to the hash table's directory page + * @return the bucket page_id corresponding to the input key + */ + auto KeyToPageId(KeyType key, HashTableDirectoryPage *dir_page) -> page_id_t; - auto InsertToNewBucket(ExtendibleHTableDirectoryPage *directory, uint32_t bucket_idx, const K &key, const V &value) - -> bool; + /** + * Fetches the directory page from the buffer pool manager. + * + * @return a pointer to the directory page + */ + auto FetchDirectoryPage() -> HashTableDirectoryPage *; + + /** + * Fetches the a bucket page from the buffer pool manager using the bucket's page_id. + * + * @param bucket_page_id the page_id to fetch + * @return a pointer to a bucket page + */ + auto FetchBucketPage(page_id_t bucket_page_id) -> HASH_TABLE_BUCKET_TYPE *; - void UpdateDirectoryMapping(ExtendibleHTableDirectoryPage *directory, uint32_t new_bucket_idx, - page_id_t new_bucket_page_id, uint32_t new_local_depth, uint32_t local_depth_mask); + /** + * Performs insertion with an optional bucket splitting. + * + * @param transaction a pointer to the current transaction + * @param key the key to insert + * @param value the value to insert + * @return whether or not the insertion was successful + */ + auto SplitInsert(Transaction *transaction, const KeyType &key, const ValueType &value) -> bool; - void MigrateEntries(ExtendibleHTableBucketPage *old_bucket, - ExtendibleHTableBucketPage *new_bucket, uint32_t new_bucket_idx, - uint32_t local_depth_mask); + /** + * Optionally merges an empty bucket into it's pair. This is called by Remove, + * if Remove makes a bucket empty. + * + * There are three conditions under which we skip the merge: + * 1. The bucket is no longer empty. + * 2. The bucket has local depth 0. + * 3. The bucket's local depth doesn't match its split image's local depth. + * + * @param transaction a pointer to the current transaction + * @param key the key that was removed + * @param value the value that was removed + */ + void Merge(Transaction *transaction, const KeyType &key, const ValueType &value); // member variables - std::string index_name_; - BufferPoolManager *bpm_; - KC cmp_; - HashFunction hash_fn_; - uint32_t header_max_depth_; - uint32_t directory_max_depth_; - uint32_t bucket_max_size_; - page_id_t header_page_id_; + page_id_t directory_page_id_; + BufferPoolManager *buffer_pool_manager_; + KeyComparator comparator_; + + // Readers includes inserts and removes, writers are splits and merges + ReaderWriterLatch table_latch_; + HashFunction hash_fn_; }; } // namespace bustub diff --git a/src/include/container/hash/extendible_hash_table.h b/src/include/container/hash/extendible_hash_table.h new file mode 100644 index 0000000..430dfa5 --- /dev/null +++ b/src/include/container/hash/extendible_hash_table.h @@ -0,0 +1,201 @@ +//===----------------------------------------------------------------------===// +// +// BusTub +// +// extendible_hash_table.h +// +// Identification: src/include/container/hash/extendible_hash_table.h +// +// Copyright (c) 2015-2021, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// +/** + * extendible_hash_table.h + * + * Implementation of in-memory hash table using extendible hashing + */ + +#pragma once + +#include +#include +#include // NOLINT +#include +#include + +#include "container/hash/hash_table.h" + +namespace bustub { + +/** + * ExtendibleHashTable implements a hash table using the extendible hashing algorithm. + * @tparam K key type + * @tparam V value type + */ +template +class ExtendibleHashTable : public HashTable { + public: + /** + * + * TODO(P1): Add implementation + * + * @brief Create a new ExtendibleHashTable. + * @param bucket_size: fixed size for each bucket + */ + explicit ExtendibleHashTable(size_t bucket_size); + + /** + * @brief Get the global depth of the directory. + * @return The global depth of the directory. + */ + auto GetGlobalDepth() const -> int; + + /** + * @brief Get the local depth of the bucket that the given directory index points to. + * @param dir_index The index in the directory. + * @return The local depth of the bucket. + */ + auto GetLocalDepth(int dir_index) const -> int; + + /** + * @brief Get the number of buckets in the directory. + * @return The number of buckets in the directory. + */ + auto GetNumBuckets() const -> int; + + /** + * + * TODO(P1): Add implementation + * + * @brief Find the value associated with the given key. + * + * Use IndexOf(key) to find the directory index the key hashes to. + * + * @param key The key to be searched. + * @param[out] value The value associated with the key. + * @return True if the key is found, false otherwise. + */ + auto Find(const K &key, V &value) -> bool override; + + /** + * + * TODO(P1): Add implementation + * + * @brief Insert the given key-value pair into the hash table. + * If a key already exists, the value should be updated. + * If the bucket is full and can't be inserted, do the following steps before retrying: + * 1. If the local depth of the bucket is equal to the global depth, + * increment the global depth and double the size of the directory. + * 2. Increment the local depth of the bucket. + * 3. Split the bucket and redistribute directory pointers & the kv pairs in the bucket. + * + * @param key The key to be inserted. + * @param value The value to be inserted. + */ + void Insert(const K &key, const V &value) override; + + /** + * + * TODO(P1): Add implementation + * + * @brief Given the key, remove the corresponding key-value pair in the hash table. + * Shrink & Combination is not required for this project + * @param key The key to be deleted. + * @return True if the key exists, false otherwise. + */ + auto Remove(const K &key) -> bool override; + + /** + * Bucket class for each hash table bucket that the directory points to. + */ + class Bucket { + public: + explicit Bucket(size_t size, int depth = 0); + + /** @brief Check if a bucket is full. */ + inline auto IsFull() const -> bool { return list_.size() == size_; } + + /** @brief Get the local depth of the bucket. */ + inline auto GetDepth() const -> int { return depth_; } + + /** @brief Increment the local depth of a bucket. */ + inline void IncrementDepth() { depth_++; } + + inline auto GetItems() -> std::list> & { return list_; } + + /** + * + * TODO(P1): Add implementation + * + * @brief Find the value associated with the given key in the bucket. + * @param key The key to be searched. + * @param[out] value The value associated with the key. + * @return True if the key is found, false otherwise. + */ + auto Find(const K &key, V &value) -> bool; + + /** + * + * TODO(P1): Add implementation + * + * @brief Given the key, remove the corresponding key-value pair in the bucket. + * @param key The key to be deleted. + * @return True if the key exists, false otherwise. + */ + auto Remove(const K &key) -> bool; + + /** + * + * TODO(P1): Add implementation + * + * @brief Insert the given key-value pair into the bucket. + * 1. If a key already exists, the value should be updated. + * 2. If the bucket is full, do nothing and return false. + * @param key The key to be inserted. + * @param value The value to be inserted. + * @return True if the key-value pair is inserted, false otherwise. + */ + auto Insert(const K &key, const V &value) -> bool; + + private: + // TODO(student): You may add additional private members and helper functions + size_t size_; + int depth_; + std::list> list_; + }; + + private: + // TODO(student): You may add additional private members and helper functions and remove the ones + // you don't need. + + int global_depth_; // The global depth of the directory + size_t bucket_size_; // The size of a bucket + int num_buckets_; // The number of buckets in the hash table + mutable std::mutex latch_; + std::vector> dir_; // The directory of the hash table + + // The following functions are completely optional, you can delete them if you have your own ideas. + + /** + * @brief Redistribute the kv pairs in a full bucket. + * @param bucket The bucket to be redistributed. + */ + auto RedistributeBucket(std::shared_ptr bucket) -> void; + + /***************************************************************** + * Must acquire latch_ first before calling the below functions. * + *****************************************************************/ + + /** + * @brief For the given key, return the entry index in the directory where the key hashes to. + * @param key The key to be hashed. + * @return The entry index in the directory. + */ + auto IndexOf(const K &key) -> size_t; + + auto GetGlobalDepthInternal() const -> int; + auto GetLocalDepthInternal(int dir_index) const -> int; + auto GetNumBucketsInternal() const -> int; +}; + +} // namespace bustub diff --git a/src/include/container/hash/hash_function.h b/src/include/container/hash/hash_function.h index 2e1b1cb..88e63bd 100644 --- a/src/include/container/hash/hash_function.h +++ b/src/include/container/hash/hash_function.h @@ -25,7 +25,7 @@ class HashFunction { * @param key the key to be hashed * @return the hashed value */ - virtual auto GetHash(KeyType key) const -> uint64_t { + virtual auto GetHash(KeyType key) -> uint64_t { uint64_t hash[2]; murmur3::MurmurHash3_x64_128(reinterpret_cast(&key), static_cast(sizeof(KeyType)), 0, reinterpret_cast(&hash)); diff --git a/src/include/execution/check_options.h b/src/include/execution/check_options.h deleted file mode 100644 index 8544091..0000000 --- a/src/include/execution/check_options.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2022 RisingLight Project Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#pragma once - -namespace bustub { - -enum class CheckOption : uint8_t { - ENABLE_NLJ_CHECK = 0, - ENABLE_TOPN_CHECK = 1, -}; - -/** - * The CheckOptions class contains the set of check options used for testing - * executor logic. - */ -class CheckOptions { - public: - std::unordered_set check_options_set_; -}; - -}; // namespace bustub diff --git a/src/include/execution/execution_common.h b/src/include/execution/execution_common.h deleted file mode 100644 index 584f8af..0000000 --- a/src/include/execution/execution_common.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include - -#include "catalog/catalog.h" -#include "catalog/schema.h" -#include "concurrency/transaction.h" -#include "storage/table/tuple.h" - -namespace bustub { - -auto ReconstructTuple(const Schema *schema, const Tuple &base_tuple, const TupleMeta &base_meta, - const std::vector &undo_logs) -> std::optional; - -void TxnMgrDbg(const std::string &info, TransactionManager *txn_mgr, const TableInfo *table_info, - TableHeap *table_heap); - -// Add new functions as needed... You are likely need to define some more functions. -// -// To give you a sense of what can be shared across executors / transaction manager, here are the -// list of helper function names that we defined in the reference solution. You should come up with -// your own when you go through the process. -// * CollectUndoLogs -// * WalkUndoLogs -// * Modify -// * IsWriteWriteConflict -// * GenerateDiffLog -// * GenerateNullTupleForSchema -// * GetUndoLogSchema -// -// We do not provide the signatures for these functions because it depends on the your implementation -// of other parts of the system. You do not need to define the same set of helper functions in -// your implementation. Please add your own ones as necessary so that you do not need to write -// the same code everywhere. - -} // namespace bustub diff --git a/src/include/execution/execution_engine.h b/src/include/execution/execution_engine.h index b9bedbb..9506b47 100644 --- a/src/include/execution/execution_engine.h +++ b/src/include/execution/execution_engine.h @@ -20,7 +20,6 @@ #include "concurrency/transaction_manager.h" #include "execution/executor_context.h" #include "execution/executor_factory.h" -#include "execution/executors/init_check_executor.h" #include "execution/plans/abstract_plan.h" #include "storage/table/tuple.h" @@ -64,8 +63,10 @@ class ExecutionEngine { try { executor->Init(); PollExecutor(executor.get(), plan, result_set); - PerformChecks(exec_ctx); } catch (const ExecutionException &ex) { +#ifndef NDEBUG + LOG_ERROR("Error Encountered in Executor Execution: %s", ex.what()); +#endif executor_succeeded = false; if (result_set != nullptr) { result_set->clear(); @@ -75,16 +76,6 @@ class ExecutionEngine { return executor_succeeded; } - void PerformChecks(ExecutorContext *exec_ctx) { - for (const auto &[left_executor, right_executor] : exec_ctx->GetNLJCheckExecutorSet()) { - auto casted_left_executor = dynamic_cast(left_executor); - auto casted_right_executor = dynamic_cast(right_executor); - BUSTUB_ASSERT(casted_right_executor->GetInitCount() + 1 >= casted_left_executor->GetNextCount(), - "nlj check failed, are you initialising the right executor every time when there is a left tuple? " - "(off-by-one is okay)"); - } - } - private: /** * Poll the executor until exhausted, or exception escapes. diff --git a/src/include/execution/executor_context.h b/src/include/execution/executor_context.h index f61a100..113a768 100644 --- a/src/include/execution/executor_context.h +++ b/src/include/execution/executor_context.h @@ -12,20 +12,15 @@ #pragma once -#include -#include #include #include #include #include "catalog/catalog.h" #include "concurrency/transaction.h" -#include "execution/check_options.h" -#include "execution/executors/abstract_executor.h" #include "storage/page/tmp_tuple_page.h" namespace bustub { -class AbstractExecutor; /** * ExecutorContext stores all the context necessary to run an executor. */ @@ -40,17 +35,8 @@ class ExecutorContext { * @param lock_mgr The lock manager that the executor uses */ ExecutorContext(Transaction *transaction, Catalog *catalog, BufferPoolManager *bpm, TransactionManager *txn_mgr, - LockManager *lock_mgr, bool is_delete) - : transaction_(transaction), - catalog_{catalog}, - bpm_{bpm}, - txn_mgr_(txn_mgr), - lock_mgr_(lock_mgr), - is_delete_(is_delete) { - nlj_check_exec_set_ = std::deque>( - std::deque>{}); - check_options_ = std::make_shared(); - } + LockManager *lock_mgr) + : transaction_(transaction), catalog_{catalog}, bpm_{bpm}, txn_mgr_(txn_mgr), lock_mgr_(lock_mgr) {} ~ExecutorContext() = default; @@ -74,30 +60,10 @@ class ExecutorContext { /** @return the transaction manager */ auto GetTransactionManager() -> TransactionManager * { return txn_mgr_; } - /** @return the set of nlj check executors */ - auto GetNLJCheckExecutorSet() -> std::deque> & { - return nlj_check_exec_set_; - } - - /** @return the check options */ - auto GetCheckOptions() -> std::shared_ptr { return check_options_; } - - void AddCheckExecutor(AbstractExecutor *left_exec, AbstractExecutor *right_exec) { - nlj_check_exec_set_.emplace_back(left_exec, right_exec); - } - - void InitCheckOptions(std::shared_ptr &&check_options) { - BUSTUB_ASSERT(check_options, "nullptr"); - check_options_ = std::move(check_options); - } - - /** As of Fall 2023, this function should not be used. */ - auto IsDelete() const -> bool { return is_delete_; } - private: /** The transaction context associated with this executor context */ Transaction *transaction_; - /** The database catalog associated with this executor context */ + /** The datbase catalog associated with this executor context */ Catalog *catalog_; /** The buffer pool manager associated with this executor context */ BufferPoolManager *bpm_; @@ -105,11 +71,6 @@ class ExecutorContext { TransactionManager *txn_mgr_; /** The lock manager associated with this executor context */ LockManager *lock_mgr_; - /** The set of NLJ check executors associated with this executor context */ - std::deque> nlj_check_exec_set_; - /** The set of check options associated with this executor context */ - std::shared_ptr check_options_; - bool is_delete_; }; } // namespace bustub diff --git a/src/include/execution/executors/abstract_executor.h b/src/include/execution/executors/abstract_executor.h index d4aaf41..8783979 100644 --- a/src/include/execution/executors/abstract_executor.h +++ b/src/include/execution/executors/abstract_executor.h @@ -16,7 +16,6 @@ #include "storage/table/tuple.h" namespace bustub { -class ExecutorContext; /** * The AbstractExecutor implements the Volcano tuple-at-a-time iterator model. * This is the base class from which all executors in the BustTub execution diff --git a/src/include/execution/executors/aggregation_executor.h b/src/include/execution/executors/aggregation_executor.h index a6e5a1d..73d989e 100644 --- a/src/include/execution/executors/aggregation_executor.h +++ b/src/include/execution/executors/aggregation_executor.h @@ -42,7 +42,7 @@ class SimpleAggregationHashTable { const std::vector &agg_types) : agg_exprs_{agg_exprs}, agg_types_{agg_types} {} - /** @return The initial aggregate value for this aggregation executor */ + /** @return The initial aggregrate value for this aggregation executor */ auto GenerateInitialAggregateValue() -> AggregateValue { std::vector values{}; for (const auto &agg_type : agg_types_) { @@ -74,10 +74,36 @@ class SimpleAggregationHashTable { for (uint32_t i = 0; i < agg_exprs_.size(); i++) { switch (agg_types_[i]) { case AggregationType::CountStarAggregate: + result->aggregates_[i] = result->aggregates_[i].Add(ValueFactory::GetIntegerValue(1)); + break; case AggregationType::CountAggregate: + if (result->aggregates_[i].IsNull()) { + result->aggregates_[i] = ValueFactory::GetIntegerValue(0); + } + if (!input.aggregates_[i].IsNull()) { + result->aggregates_[i] = result->aggregates_[i].Add(ValueFactory::GetIntegerValue(1)); + } + break; case AggregationType::SumAggregate: + if (result->aggregates_[i].IsNull()) { + result->aggregates_[i] = input.aggregates_[i]; + } else if (!input.aggregates_[i].IsNull()) { + result->aggregates_[i] = result->aggregates_[i].Add(input.aggregates_[i]); + } + break; case AggregationType::MinAggregate: + if (result->aggregates_[i].IsNull()) { + result->aggregates_[i] = input.aggregates_[i]; + } else if (!input.aggregates_[i].IsNull()) { + result->aggregates_[i] = result->aggregates_[i].Min(input.aggregates_[i]); + } + break; case AggregationType::MaxAggregate: + if (result->aggregates_[i].IsNull()) { + result->aggregates_[i] = input.aggregates_[i]; + } else if (!input.aggregates_[i].IsNull()) { + result->aggregates_[i] = result->aggregates_[i].Max(input.aggregates_[i]); + } break; } } @@ -95,6 +121,8 @@ class SimpleAggregationHashTable { CombineAggregateValues(&ht_[agg_key], agg_val); } + void InsertIntialCombine() { ht_.insert({{std::vector()}, GenerateInitialAggregateValue()}); } + /** * Clear the hash table */ @@ -135,6 +163,8 @@ class SimpleAggregationHashTable { /** @return Iterator to the end of the hash table */ auto End() -> Iterator { return Iterator{ht_.cend()}; } + auto Size() -> size_t { return ht_.size(); } + private: /** The hash table is just a map from aggregate keys to aggregate values */ std::unordered_map ht_{}; @@ -157,7 +187,7 @@ class AggregationExecutor : public AbstractExecutor { * @param child_executor The child executor from which inserted tuples are pulled (may be `nullptr`) */ AggregationExecutor(ExecutorContext *exec_ctx, const AggregationPlanNode *plan, - std::unique_ptr &&child_executor); + std::unique_ptr &&child); /** Initialize the aggregation */ void Init() override; @@ -181,7 +211,7 @@ class AggregationExecutor : public AbstractExecutor { auto MakeAggregateKey(const Tuple *tuple) -> AggregateKey { std::vector keys; for (const auto &expr : plan_->GetGroupBys()) { - keys.emplace_back(expr->Evaluate(tuple, child_executor_->GetOutputSchema())); + keys.emplace_back(expr->Evaluate(tuple, child_->GetOutputSchema())); } return {keys}; } @@ -190,7 +220,7 @@ class AggregationExecutor : public AbstractExecutor { auto MakeAggregateValue(const Tuple *tuple) -> AggregateValue { std::vector vals; for (const auto &expr : plan_->GetAggregates()) { - vals.emplace_back(expr->Evaluate(tuple, child_executor_->GetOutputSchema())); + vals.emplace_back(expr->Evaluate(tuple, child_->GetOutputSchema())); } return {vals}; } @@ -198,14 +228,11 @@ class AggregationExecutor : public AbstractExecutor { private: /** The aggregation plan node */ const AggregationPlanNode *plan_; - /** The child executor that produces tuples over which the aggregation is computed */ - std::unique_ptr child_executor_; - + std::unique_ptr child_; /** Simple aggregation hash table */ - // TODO(Student): Uncomment SimpleAggregationHashTable aht_; - + SimpleAggregationHashTable aht_; /** Simple aggregation hash table iterator */ - // TODO(Student): Uncomment SimpleAggregationHashTable::Iterator aht_iterator_; + SimpleAggregationHashTable::Iterator aht_iterator_; }; } // namespace bustub diff --git a/src/include/execution/executors/delete_executor.h b/src/include/execution/executors/delete_executor.h index ba872a0..7d46fe0 100644 --- a/src/include/execution/executors/delete_executor.h +++ b/src/include/execution/executors/delete_executor.h @@ -58,8 +58,12 @@ class DeleteExecutor : public AbstractExecutor { private: /** The delete plan node to be executed */ const DeletePlanNode *plan_; - /** The child executor from which RIDs for deleted tuples are pulled */ std::unique_ptr child_executor_; + + const TableInfo *table_info_; + + std::vector table_indexes_; + bool is_end_{false}; }; } // namespace bustub diff --git a/src/include/execution/executors/hash_join_executor.h b/src/include/execution/executors/hash_join_executor.h index bd0f216..5a74236 100644 --- a/src/include/execution/executors/hash_join_executor.h +++ b/src/include/execution/executors/hash_join_executor.h @@ -13,8 +13,11 @@ #pragma once #include +#include #include +#include +#include "common/util/hash_util.h" #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" #include "execution/plans/hash_join_plan.h" @@ -52,8 +55,16 @@ class HashJoinExecutor : public AbstractExecutor { auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; private: - /** The HashJoin plan node to be executed. */ + /** The NestedLoopJoin plan node to be executed. */ const HashJoinPlanNode *plan_; + + std::unique_ptr left_executor_; + std::unique_ptr right_executor_; + + std::unordered_map> hash_join_table_; + + std::vector output_tuples_; + std::vector::const_iterator output_tuples_iter_; }; } // namespace bustub diff --git a/src/include/execution/executors/index_scan_executor.h b/src/include/execution/executors/index_scan_executor.h index 116346d..2493b5a 100644 --- a/src/include/execution/executors/index_scan_executor.h +++ b/src/include/execution/executors/index_scan_executor.h @@ -44,5 +44,11 @@ class IndexScanExecutor : public AbstractExecutor { private: /** The index scan plan node to be executed. */ const IndexScanPlanNode *plan_; + const IndexInfo *index_info_; + const TableInfo *table_info_; + BPlusTreeIndexForOneIntegerColumn *tree_; + BPlusTreeIndexIteratorForOneIntegerColumn iter_; + std::vector rids_; + std::vector::const_iterator rid_iter_{}; }; } // namespace bustub diff --git a/src/include/execution/executors/init_check_executor.h b/src/include/execution/executors/init_check_executor.h deleted file mode 100644 index 20ad99c..0000000 --- a/src/include/execution/executors/init_check_executor.h +++ /dev/null @@ -1,76 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// init_check_executor.h -// -// Identification: src/include/execution/executors/init_check_executor.h -// -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - -#include "execution/executor_context.h" -#include "execution/executors/abstract_executor.h" -#include "execution/plans/abstract_plan.h" - -namespace bustub { - -/** - * InitCheckExecutor counts the number of times the child operator calls init. - */ -class InitCheckExecutor : public AbstractExecutor { - public: - /** - * Construct a new InitCheckExecutor instance. - * @param exec_ctx The executor context - * @param plan The init check plan to be executed - * @param child_executor The child executor from which init calls are counted - */ - InitCheckExecutor(ExecutorContext *exec_ctx, AbstractPlanNodeRef plan, - std::unique_ptr &&child_executor); - - /** Initialize the InitCheck */ - void Init() override; - - /** - * Yield the next tuple from the child executor. - * @param[out] tuple The next tuple produced by the child executor - * @param[out] rid The next tuple RID produced by the child executor - * @return `true` if a tuple was produced, `false` if there are no more tuples - */ - auto Next(Tuple *tuple, RID *rid) -> bool override; - - /** @return The output schema for the child executor */ - auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; - - /** @return The number of inits */ - auto GetInitCount() const -> std::size_t { return n_init_; }; - - /** @return The number of nexts */ - auto GetNextCount() const -> std::size_t { return n_next_; }; - - private: - /** InitCheckExecutor returns `true` when it should be polled again */ - constexpr static const bool EXECUTOR_ACTIVE{true}; - - /** InitCheckExecutor returns `false` when child executor is exhausted */ - constexpr static const bool EXECUTOR_EXHAUSTED{false}; - - /** The init check plan node to be executed */ - AbstractPlanNodeRef plan_; - - /** The child executor from which tuples are obtained */ - std::unique_ptr child_executor_; - - /** The number of times init was called */ - std::size_t n_init_{0}; - std::size_t n_next_{0}; -}; - -} // namespace bustub diff --git a/src/include/execution/executors/insert_executor.h b/src/include/execution/executors/insert_executor.h index ca80713..98bda5a 100644 --- a/src/include/execution/executors/insert_executor.h +++ b/src/include/execution/executors/insert_executor.h @@ -14,6 +14,7 @@ #include #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -57,6 +58,12 @@ class InsertExecutor : public AbstractExecutor { private: /** The insert plan node to be executed*/ const InsertPlanNode *plan_; + + const TableInfo *table_info_; + + std::unique_ptr child_executor_; + std::vector table_indexes_; + bool is_end_{false}; }; } // namespace bustub diff --git a/src/include/execution/executors/limit_executor.h b/src/include/execution/executors/limit_executor.h index e49b217..7ee6694 100644 --- a/src/include/execution/executors/limit_executor.h +++ b/src/include/execution/executors/limit_executor.h @@ -51,8 +51,9 @@ class LimitExecutor : public AbstractExecutor { private: /** The limit plan node to be executed */ const LimitPlanNode *plan_; - /** The child executor from which tuples are obtained */ std::unique_ptr child_executor_; + + u_int32_t count_; }; } // namespace bustub diff --git a/src/include/execution/executors/nested_index_join_executor.h b/src/include/execution/executors/nested_index_join_executor.h index c4b6ead..c5e92ca 100644 --- a/src/include/execution/executors/nested_index_join_executor.h +++ b/src/include/execution/executors/nested_index_join_executor.h @@ -34,8 +34,8 @@ class NestIndexJoinExecutor : public AbstractExecutor { public: /** * Creates a new nested index join executor. - * @param exec_ctx the context that the nested index join should be performed in - * @param plan the nested index join plan to be executed + * @param exec_ctx the context that the hash join should be performed in + * @param plan the nested index join plan node * @param child_executor the outer table */ NestIndexJoinExecutor(ExecutorContext *exec_ctx, const NestedIndexJoinPlanNode *plan, @@ -50,5 +50,10 @@ class NestIndexJoinExecutor : public AbstractExecutor { private: /** The nested index join plan node. */ const NestedIndexJoinPlanNode *plan_; + + std::unique_ptr child_; + const IndexInfo *index_info_; + const TableInfo *table_info_; + BPlusTreeIndexForOneIntegerColumn *tree_; }; } // namespace bustub diff --git a/src/include/execution/executors/nested_loop_join_executor.h b/src/include/execution/executors/nested_loop_join_executor.h index 6f2aeaf..ff15d74 100644 --- a/src/include/execution/executors/nested_loop_join_executor.h +++ b/src/include/execution/executors/nested_loop_join_executor.h @@ -14,6 +14,7 @@ #include #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -30,7 +31,7 @@ class NestedLoopJoinExecutor : public AbstractExecutor { /** * Construct a new NestedLoopJoinExecutor instance. * @param exec_ctx The executor context - * @param plan The nested loop join plan to be executed + * @param plan The NestedLoop join plan to be executed * @param left_executor The child executor that produces tuple for the left side of join * @param right_executor The child executor that produces tuple for the right side of join */ @@ -52,9 +53,16 @@ class NestedLoopJoinExecutor : public AbstractExecutor { /** @return The output schema for the insert */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; + auto Matched(Tuple *left_tuple, Tuple *right_tuple) const -> bool; + private: /** The NestedLoopJoin plan node to be executed. */ const NestedLoopJoinPlanNode *plan_; + std::unique_ptr left_executor_; + std::unique_ptr right_executor_; + std::vector right_tuples_; + Tuple left_tuple_; + int32_t right_tuple_idx_ = -1; }; } // namespace bustub diff --git a/src/include/execution/executors/seq_scan_executor.h b/src/include/execution/executors/seq_scan_executor.h index bacbb8d..e4ef42e 100644 --- a/src/include/execution/executors/seq_scan_executor.h +++ b/src/include/execution/executors/seq_scan_executor.h @@ -50,5 +50,7 @@ class SeqScanExecutor : public AbstractExecutor { private: /** The sequential scan plan node to be executed */ const SeqScanPlanNode *plan_; + TableIterator table_iter_ = {nullptr, RID(), nullptr}; + const TableInfo *table_info_; }; } // namespace bustub diff --git a/src/include/execution/executors/sort_executor.h b/src/include/execution/executors/sort_executor.h index 27d9ce0..7efb6c2 100644 --- a/src/include/execution/executors/sort_executor.h +++ b/src/include/execution/executors/sort_executor.h @@ -52,5 +52,9 @@ class SortExecutor : public AbstractExecutor { private: /** The sort plan node to be executed */ const SortPlanNode *plan_; + std::unique_ptr child_; + std::vector child_tuples_; + + std::vector::const_iterator child_iter_; }; } // namespace bustub diff --git a/src/include/execution/executors/topn_check_executor.h b/src/include/execution/executors/topn_check_executor.h deleted file mode 100644 index eeec29c..0000000 --- a/src/include/execution/executors/topn_check_executor.h +++ /dev/null @@ -1,67 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// topn_check_executor.h -// -// Identification: src/include/execution/executors/topn_check_executor.h -// -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - -#include "execution/executors/abstract_executor.h" -#include "execution/executors/topn_executor.h" -#include "execution/plans/topn_plan.h" - -namespace bustub { - -/** - * TopNCheckExecutor checks the number of items in TopN executor container - */ -class TopNCheckExecutor : public AbstractExecutor { - public: - /** - * Construct a new TopNCheckExecutor instance. - * @param exec_ctx The executor context - * @param plan The TopN plan to be executed - * @param child_executor The TopN child executor - */ - TopNCheckExecutor(ExecutorContext *exec_ctx, const TopNPlanNode *plan, - std::unique_ptr &&child_executor, TopNExecutor *topn_executor); - - /** Initialize the TopNCheck */ - void Init() override; - - /** - * Yield the next tuple from the child executor. - * @param[out] tuple The next tuple produced by the child executor - * @param[out] rid The next tuple RID produced by the child executor - * @return `true` if a tuple was produced, `false` if there are no more tuples - */ - auto Next(Tuple *tuple, RID *rid) -> bool override; - - /** @return The output schema for the child executor */ - auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; - - private: - /** TopNCheckExecutor returns `false` when child executor is exhausted */ - constexpr static const bool EXECUTOR_EXHAUSTED{false}; - - /** The TopNPlanNode to be executed */ - const TopNPlanNode *plan_; - - std::size_t prev_{0}; - - /** The child executor from which tuples are obtained */ - std::unique_ptr child_executor_; - - TopNExecutor *topn_executor_; -}; - -} // namespace bustub diff --git a/src/include/execution/executors/topn_executor.h b/src/include/execution/executors/topn_executor.h index cbed6c1..d474c6c 100644 --- a/src/include/execution/executors/topn_executor.h +++ b/src/include/execution/executors/topn_executor.h @@ -13,7 +13,7 @@ #pragma once #include -#include +#include #include #include "execution/executor_context.h" @@ -32,36 +32,29 @@ class TopNExecutor : public AbstractExecutor { /** * Construct a new TopNExecutor instance. * @param exec_ctx The executor context - * @param plan The TopN plan to be executed + * @param plan The topn plan to be executed */ TopNExecutor(ExecutorContext *exec_ctx, const TopNPlanNode *plan, std::unique_ptr &&child_executor); - /** Initialize the TopN */ + /** Initialize the topn */ void Init() override; /** - * Yield the next tuple from the TopN. - * @param[out] tuple The next tuple produced by the TopN - * @param[out] rid The next tuple RID produced by the TopN + * Yield the next tuple from the topn. + * @param[out] tuple The next tuple produced by the topn + * @param[out] rid The next tuple RID produced by the topn * @return `true` if a tuple was produced, `false` if there are no more tuples */ auto Next(Tuple *tuple, RID *rid) -> bool override; - /** @return The output schema for the TopN */ + /** @return The output schema for the topn */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } - /** Sets new child executor (for testing only) */ - void SetChildExecutor(std::unique_ptr &&child_executor) { - child_executor_ = std::move(child_executor); - } - - /** @return The size of top_entries_ container, which will be called on each child_executor->Next(). */ - auto GetNumInHeap() -> size_t; - private: - /** The TopN plan node to be executed */ + /** The topn plan node to be executed */ const TopNPlanNode *plan_; - /** The child executor from which tuples are obtained */ - std::unique_ptr child_executor_; + std::unique_ptr child_; + + std::stack child_tuples_; }; } // namespace bustub diff --git a/src/include/execution/executors/topn_per_group_executor.h b/src/include/execution/executors/topn_per_group_executor.h deleted file mode 100644 index 75642cf..0000000 --- a/src/include/execution/executors/topn_per_group_executor.h +++ /dev/null @@ -1,59 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// topn_per_group_executor.h -// -// Identification: src/include/execution/executors/topn_per_group_executor.h -// -// Copyright (c) 2015-2022, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include - -#include "execution/executor_context.h" -#include "execution/executors/abstract_executor.h" -#include "execution/plans/seq_scan_plan.h" -#include "execution/plans/topn_per_group_plan.h" -#include "storage/table/tuple.h" - -namespace bustub { - -/** - * The TopNPerGroupExecutor executor executes a topn. - */ -class TopNPerGroupExecutor : public AbstractExecutor { - public: - /** - * Construct a new TopNPerGroupExecutor instance. - * @param exec_ctx The executor context - * @param plan The TopNPerGroup plan to be executed - */ - TopNPerGroupExecutor(ExecutorContext *exec_ctx, const TopNPerGroupPlanNode *plan, - std::unique_ptr &&child_executor); - - /** Initialize the TopNPerGroup */ - void Init() override; - - /** - * Yield the next tuple from the TopNPerGroup. - * @param[out] tuple The next tuple produced by the TopNPerGroup - * @param[out] rid The next tuple RID produced by the TopNPerGroup - * @return `true` if a tuple was produced, `false` if there are no more tuples - */ - auto Next(Tuple *tuple, RID *rid) -> bool override; - - auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } - - private: - /** The TopNPerGroup plan node to be executed */ - [[maybe_unused]] const TopNPerGroupPlanNode *plan_; - /** The child executor from which tuples are obtained */ - std::unique_ptr child_executor_; -}; -} // namespace bustub diff --git a/src/include/execution/executors/update_executor.h b/src/include/execution/executors/update_executor.h index ca357b1..5ae8de7 100644 --- a/src/include/execution/executors/update_executor.h +++ b/src/include/execution/executors/update_executor.h @@ -45,7 +45,7 @@ class UpdateExecutor : public AbstractExecutor { void Init() override; /** - * Yield the next tuple from the update. + * Yield the next tuple from the udpate. * @param[out] tuple The next tuple produced by the update * @param[out] rid The next tuple RID produced by the update (ignore this) * @return `true` if a tuple was produced, `false` if there are no more tuples @@ -60,11 +60,12 @@ class UpdateExecutor : public AbstractExecutor { private: /** The update plan node to be executed */ const UpdatePlanNode *plan_; - /** Metadata identifying the table that should be updated */ const TableInfo *table_info_; - /** The child executor to obtain value from */ std::unique_ptr child_executor_; + + std::vector table_indexes_; + bool is_end_{false}; }; } // namespace bustub diff --git a/src/include/execution/executors/window_function_executor.h b/src/include/execution/executors/window_function_executor.h deleted file mode 100644 index d8d033f..0000000 --- a/src/include/execution/executors/window_function_executor.h +++ /dev/null @@ -1,94 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// window_function_executor.h -// -// Identification: src/include/execution/executors/window_function_executor.h -// -// Copyright (c) 2015-2022, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - -#include "execution/executor_context.h" -#include "execution/executors/abstract_executor.h" -#include "execution/plans/window_plan.h" -#include "storage/table/tuple.h" - -namespace bustub { - -/** - * The WindowFunctionExecutor executor executes a window function for columns using window function. - * - * Window function is different from normal aggregation as it outputs one row for each inputing rows, - * and can be combined with normal selected columns. The columns in WindowFunctionPlanNode contains both - * normal selected columns and placeholder columns for window functions. - * - * For example, if we have a query like: - * SELECT 0.1, 0.2, SUM(0.3) OVER (PARTITION BY 0.2 ORDER BY 0.3), SUM(0.4) OVER (PARTITION BY 0.1 ORDER BY 0.2,0.3) - * FROM table; - * - * The WindowFunctionPlanNode contains following structure: - * columns: std::vector{0.1, 0.2, 0.-1(placeholder), 0.-1(placeholder)} - * window_functions_: { - * 3: { - * partition_by: std::vector{0.2} - * order_by: std::vector{0.3} - * functions: std::vector{0.3} - * window_func_type: WindowFunctionType::SumAggregate - * } - * 4: { - * partition_by: std::vector{0.1} - * order_by: std::vector{0.2,0.3} - * functions: std::vector{0.4} - * window_func_type: WindowFunctionType::SumAggregate - * } - * } - * - * Your executor should use child executor and exprs in columns to produce selected columns except for window - * function columns, and use window_agg_indexes, partition_bys, order_bys, functionss and window_agg_types to - * generate window function columns results. Directly use placeholders for window function columns in columns is - * not allowed, as it contains invalid column id. - * - * Your WindowFunctionExecutor does not need to support specified window frames (eg: 1 preceding and 1 following). - * You can assume that all window frames are UNBOUNDED FOLLOWING AND CURRENT ROW when there is ORDER BY clause, and - * UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING when there is no ORDER BY clause. - * - */ -class WindowFunctionExecutor : public AbstractExecutor { - public: - /** - * Construct a new WindowFunctionExecutor instance. - * @param exec_ctx The executor context - * @param plan The window aggregation plan to be executed - */ - WindowFunctionExecutor(ExecutorContext *exec_ctx, const WindowFunctionPlanNode *plan, - std::unique_ptr &&child_executor); - - /** Initialize the window aggregation */ - void Init() override; - - /** - * Yield the next tuple from the window aggregation. - * @param[out] tuple The next tuple produced by the window aggregation - * @param[out] rid The next tuple RID produced by the window aggregation - * @return `true` if a tuple was produced, `false` if there are no more tuples - */ - auto Next(Tuple *tuple, RID *rid) -> bool override; - - /** @return The output schema for the window aggregation plan */ - auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } - - private: - /** The window aggregation plan node to be executed */ - const WindowFunctionPlanNode *plan_; - - /** The child executor from which tuples are obtained */ - std::unique_ptr child_executor_; -}; -} // namespace bustub diff --git a/src/include/execution/expressions/abstract_expression.h b/src/include/execution/expressions/abstract_expression.h index 96d84fb..97f6dfb 100644 --- a/src/include/execution/expressions/abstract_expression.h +++ b/src/include/execution/expressions/abstract_expression.h @@ -17,11 +17,9 @@ #include #include -#include "catalog/column.h" #include "catalog/schema.h" #include "fmt/format.h" #include "storage/table/tuple.h" -#include "type/type.h" #define BUSTUB_EXPR_CLONE_WITH_CHILDREN(cname) \ auto CloneWithChildren(std::vector children) const->std::unique_ptr \ @@ -47,8 +45,8 @@ class AbstractExpression { * @param children the children of this abstract expression * @param ret_type the return type of this abstract expression when it is evaluated */ - AbstractExpression(std::vector children, Column ret_type) - : children_{std::move(children)}, ret_type_{std::move(ret_type)} {} + AbstractExpression(std::vector children, TypeId ret_type) + : children_{std::move(children)}, ret_type_{ret_type} {} /** Virtual destructor. */ virtual ~AbstractExpression() = default; @@ -74,7 +72,7 @@ class AbstractExpression { auto GetChildren() const -> const std::vector & { return children_; } /** @return the type of this expression if it were to be evaluated */ - virtual auto GetReturnType() const -> Column { return ret_type_; } + virtual auto GetReturnType() const -> TypeId { return ret_type_; } /** @return the string representation of the plan node and its children */ virtual auto ToString() const -> std::string { return ""; } @@ -88,7 +86,7 @@ class AbstractExpression { private: /** The return type of this expression. */ - Column ret_type_; + TypeId ret_type_; }; } // namespace bustub diff --git a/src/include/execution/expressions/arithmetic_expression.h b/src/include/execution/expressions/arithmetic_expression.h index cae3aab..9d99a8c 100644 --- a/src/include/execution/expressions/arithmetic_expression.h +++ b/src/include/execution/expressions/arithmetic_expression.h @@ -23,7 +23,6 @@ #include "execution/expressions/abstract_expression.h" #include "fmt/format.h" #include "storage/table/tuple.h" -#include "type/type.h" #include "type/type_id.h" #include "type/value_factory.h" @@ -39,10 +38,8 @@ class ArithmeticExpression : public AbstractExpression { public: /** Creates a new comparison expression representing (left comp_type right). */ ArithmeticExpression(AbstractExpressionRef left, AbstractExpressionRef right, ArithmeticType compute_type) - : AbstractExpression({std::move(left), std::move(right)}, Column{"", TypeId::INTEGER}), - compute_type_{compute_type} { - if (GetChildAt(0)->GetReturnType().GetType() != TypeId::INTEGER || - GetChildAt(1)->GetReturnType().GetType() != TypeId::INTEGER) { + : AbstractExpression({std::move(left), std::move(right)}, TypeId::INTEGER), compute_type_{compute_type} { + if (GetChildAt(0)->GetReturnType() != TypeId::INTEGER || GetChildAt(1)->GetReturnType() != TypeId::INTEGER) { throw bustub::NotImplementedException("only support integer for now"); } } diff --git a/src/include/execution/expressions/array_expression.h b/src/include/execution/expressions/array_expression.h deleted file mode 100644 index b3965cb..0000000 --- a/src/include/execution/expressions/array_expression.h +++ /dev/null @@ -1,65 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// constant_value_expression.h -// -// Identification: src/include/expression/constant_value_expression.h -// -// Copyright (c) 2015-19, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include - -#include "common/exception.h" -#include "execution/expressions/abstract_expression.h" -#include "type/value_factory.h" - -namespace bustub { -/** - * ArrayExpression represents arrays. - */ -class ArrayExpression : public AbstractExpression { - public: - /** Creates a new constant value expression wrapping the given value. */ - explicit ArrayExpression(const std::vector &children) - : AbstractExpression(children, Column{"", TypeId::VECTOR, static_cast(children.size())}) {} - - auto Evaluate(const Tuple *tuple, const Schema &schema) const -> Value override { - std::vector values; - values.reserve(children_.size()); - for (const auto &expr : children_) { - auto val = expr->Evaluate(tuple, schema); - if (val.GetTypeId() != TypeId::DECIMAL) { - throw Exception("vector value can only be constructed from decimal type"); - } - values.emplace_back(val.GetAs()); - } - return ValueFactory::GetVectorValue(values); - } - - auto EvaluateJoin(const Tuple *left_tuple, const Schema &left_schema, const Tuple *right_tuple, - const Schema &right_schema) const -> Value override { - std::vector values; - values.resize(children_.size()); - for (const auto &expr : children_) { - auto val = expr->EvaluateJoin(left_tuple, left_schema, right_tuple, right_schema); - if (val.GetTypeId() != TypeId::DECIMAL) { - throw Exception("vector value can only be constructed from decimal type"); - } - values.emplace_back(val.GetAs()); - } - return ValueFactory::GetVectorValue(values); - } - - /** @return the string representation of the plan node and its children */ - auto ToString() const -> std::string override { return fmt::format("[{}]", fmt::join(children_, ",")); } - - BUSTUB_EXPR_CLONE_WITH_CHILDREN(ArrayExpression); -}; -} // namespace bustub diff --git a/src/include/execution/expressions/column_value_expression.h b/src/include/execution/expressions/column_value_expression.h index 710e7f5..3969dcb 100644 --- a/src/include/execution/expressions/column_value_expression.h +++ b/src/include/execution/expressions/column_value_expression.h @@ -14,7 +14,6 @@ #include #include -#include #include #include "catalog/schema.h" @@ -33,8 +32,8 @@ class ColumnValueExpression : public AbstractExpression { * @param col_idx the index of the column in the schema * @param ret_type the return type of the expression */ - ColumnValueExpression(uint32_t tuple_idx, uint32_t col_idx, Column ret_type) - : AbstractExpression({}, std::move(ret_type)), tuple_idx_{tuple_idx}, col_idx_{col_idx} {} + ColumnValueExpression(uint32_t tuple_idx, uint32_t col_idx, TypeId ret_type) + : AbstractExpression({}, ret_type), tuple_idx_{tuple_idx}, col_idx_{col_idx} {} auto Evaluate(const Tuple *tuple, const Schema &schema) const -> Value override { return tuple->GetValue(&schema, col_idx_); diff --git a/src/include/execution/expressions/comparison_expression.h b/src/include/execution/expressions/comparison_expression.h index 1a38988..84f5f09 100644 --- a/src/include/execution/expressions/comparison_expression.h +++ b/src/include/execution/expressions/comparison_expression.h @@ -34,8 +34,7 @@ class ComparisonExpression : public AbstractExpression { public: /** Creates a new comparison expression representing (left comp_type right). */ ComparisonExpression(AbstractExpressionRef left, AbstractExpressionRef right, ComparisonType comp_type) - : AbstractExpression({std::move(left), std::move(right)}, Column{"", TypeId::BOOLEAN}), - comp_type_{comp_type} {} + : AbstractExpression({std::move(left), std::move(right)}, TypeId::BOOLEAN), comp_type_{comp_type} {} auto Evaluate(const Tuple *tuple, const Schema &schema) const -> Value override { Value lhs = GetChildAt(0)->Evaluate(tuple, schema); diff --git a/src/include/execution/expressions/constant_value_expression.h b/src/include/execution/expressions/constant_value_expression.h index 08f4a03..ebea3c6 100644 --- a/src/include/execution/expressions/constant_value_expression.h +++ b/src/include/execution/expressions/constant_value_expression.h @@ -25,7 +25,7 @@ namespace bustub { class ConstantValueExpression : public AbstractExpression { public: /** Creates a new constant value expression wrapping the given value. */ - explicit ConstantValueExpression(const Value &val) : AbstractExpression({}, val.GetColumn()), val_(val) {} + explicit ConstantValueExpression(const Value &val) : AbstractExpression({}, val.GetTypeId()), val_(val) {} auto Evaluate(const Tuple *tuple, const Schema &schema) const -> Value override { return val_; } diff --git a/src/include/execution/expressions/logic_expression.h b/src/include/execution/expressions/logic_expression.h index bc82ae8..c4636d6 100644 --- a/src/include/execution/expressions/logic_expression.h +++ b/src/include/execution/expressions/logic_expression.h @@ -16,7 +16,6 @@ #include #include -#include "catalog/column.h" #include "catalog/schema.h" #include "common/exception.h" #include "common/macros.h" @@ -39,10 +38,8 @@ class LogicExpression : public AbstractExpression { public: /** Creates a new comparison expression representing (left comp_type right). */ LogicExpression(AbstractExpressionRef left, AbstractExpressionRef right, LogicType logic_type) - : AbstractExpression({std::move(left), std::move(right)}, Column{"", TypeId::BOOLEAN}), - logic_type_{logic_type} { - if (GetChildAt(0)->GetReturnType().GetType() != TypeId::BOOLEAN || - GetChildAt(1)->GetReturnType().GetType() != TypeId::BOOLEAN) { + : AbstractExpression({std::move(left), std::move(right)}, TypeId::BOOLEAN), logic_type_{logic_type} { + if (GetChildAt(0)->GetReturnType() != TypeId::BOOLEAN || GetChildAt(1)->GetReturnType() != TypeId::BOOLEAN) { throw bustub::NotImplementedException("expect boolean from either side"); } } diff --git a/src/include/execution/expressions/string_expression.h b/src/include/execution/expressions/string_expression.h deleted file mode 100644 index ce3d874..0000000 --- a/src/include/execution/expressions/string_expression.h +++ /dev/null @@ -1,95 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// string_expression.h -// -// Identification: src/include/expression/string_expression.h -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include -#include -#include - -#include "catalog/schema.h" -#include "common/exception.h" -#include "common/macros.h" -#include "execution/expressions/abstract_expression.h" -#include "fmt/format.h" -#include "storage/table/tuple.h" -#include "type/type.h" -#include "type/type_id.h" -#include "type/value_factory.h" - -namespace bustub { - -enum class StringExpressionType { Lower, Upper }; - -/** - * StringExpression represents two expressions being computed. - */ -class StringExpression : public AbstractExpression { - public: - StringExpression(AbstractExpressionRef arg, StringExpressionType expr_type) - : AbstractExpression({std::move(arg)}, Column{"", TypeId::VARCHAR, 256 /* hardcode max length */}), - expr_type_{expr_type} { - if (GetChildAt(0)->GetReturnType().GetType() != TypeId::VARCHAR) { - BUSTUB_ENSURE(GetChildAt(0)->GetReturnType().GetType() == TypeId::VARCHAR, "unexpected arg"); - } - } - - auto Compute(const std::string &val) const -> std::string { - // TODO(student): implement upper / lower. - return {}; - } - - auto Evaluate(const Tuple *tuple, const Schema &schema) const -> Value override { - Value val = GetChildAt(0)->Evaluate(tuple, schema); - auto str = val.GetAs(); - return ValueFactory::GetVarcharValue(Compute(str)); - } - - auto EvaluateJoin(const Tuple *left_tuple, const Schema &left_schema, const Tuple *right_tuple, - const Schema &right_schema) const -> Value override { - Value val = GetChildAt(0)->EvaluateJoin(left_tuple, left_schema, right_tuple, right_schema); - auto str = val.GetAs(); - return ValueFactory::GetVarcharValue(Compute(str)); - } - - /** @return the string representation of the expression node and its children */ - auto ToString() const -> std::string override { return fmt::format("{}({})", expr_type_, *GetChildAt(0)); } - - BUSTUB_EXPR_CLONE_WITH_CHILDREN(StringExpression); - - StringExpressionType expr_type_; - - private: -}; -} // namespace bustub - -template <> -struct fmt::formatter : formatter { - template - auto format(bustub::StringExpressionType c, FormatContext &ctx) const { - string_view name; - switch (c) { - case bustub::StringExpressionType::Upper: - name = "upper"; - break; - case bustub::StringExpressionType::Lower: - name = "lower"; - break; - default: - name = "Unknown"; - break; - } - return formatter::format(name, ctx); - } -}; diff --git a/src/include/execution/plans/abstract_plan.h b/src/include/execution/plans/abstract_plan.h index 369b12a..64b8536 100644 --- a/src/include/execution/plans/abstract_plan.h +++ b/src/include/execution/plans/abstract_plan.h @@ -47,10 +47,7 @@ enum class PlanType { Projection, Sort, TopN, - TopNPerGroup, - MockScan, - InitCheck, - Window + MockScan }; class AbstractPlanNode; @@ -66,8 +63,8 @@ class AbstractPlanNode { public: /** * Create a new AbstractPlanNode with the specified output schema and children. - * @param output_schema The schema for the output of this plan node - * @param children The children of this plan node + * @param output_schema the schema for the output of this plan node + * @param children the children of this plan node */ AbstractPlanNode(SchemaRef output_schema, std::vector children) : output_schema_(std::move(output_schema)), children_(std::move(children)) {} diff --git a/src/include/execution/plans/delete_plan.h b/src/include/execution/plans/delete_plan.h index 9631698..094bf63 100644 --- a/src/include/execution/plans/delete_plan.h +++ b/src/include/execution/plans/delete_plan.h @@ -41,7 +41,7 @@ class DeletePlanNode : public AbstractPlanNode { auto GetType() const -> PlanType override { return PlanType::Delete; } /** @return The identifier of the table from which tuples are deleted*/ - auto GetTableOid() const -> table_oid_t { return table_oid_; } + auto TableOid() const -> table_oid_t { return table_oid_; } /** @return The child plan providing tuples to be deleted */ auto GetChildPlan() const -> AbstractPlanNodeRef { diff --git a/src/include/execution/plans/hash_join_plan.h b/src/include/execution/plans/hash_join_plan.h index 6500f56..4199453 100644 --- a/src/include/execution/plans/hash_join_plan.h +++ b/src/include/execution/plans/hash_join_plan.h @@ -35,21 +35,21 @@ class HashJoinPlanNode : public AbstractPlanNode { * @param right_key_expression The expression for the right JOIN key */ HashJoinPlanNode(SchemaRef output_schema, AbstractPlanNodeRef left, AbstractPlanNodeRef right, - std::vector left_key_expressions, - std::vector right_key_expressions, JoinType join_type) + AbstractExpressionRef left_key_expression, AbstractExpressionRef right_key_expression, + JoinType join_type) : AbstractPlanNode(std::move(output_schema), {std::move(left), std::move(right)}), - left_key_expressions_{std::move(left_key_expressions)}, - right_key_expressions_{std::move(right_key_expressions)}, + left_key_expression_{std::move(left_key_expression)}, + right_key_expression_{std::move(right_key_expression)}, join_type_(join_type) {} /** @return The type of the plan node */ auto GetType() const -> PlanType override { return PlanType::HashJoin; } /** @return The expression to compute the left join key */ - auto LeftJoinKeyExpressions() const -> const std::vector & { return left_key_expressions_; } + auto LeftJoinKeyExpression() const -> const AbstractExpression & { return *left_key_expression_; } /** @return The expression to compute the right join key */ - auto RightJoinKeyExpressions() const -> const std::vector & { return right_key_expressions_; } + auto RightJoinKeyExpression() const -> const AbstractExpression & { return *right_key_expression_; } /** @return The left plan node of the hash join */ auto GetLeftPlan() const -> AbstractPlanNodeRef { @@ -69,15 +69,18 @@ class HashJoinPlanNode : public AbstractPlanNode { BUSTUB_PLAN_NODE_CLONE_WITH_CHILDREN(HashJoinPlanNode); /** The expression to compute the left JOIN key */ - std::vector left_key_expressions_; + AbstractExpressionRef left_key_expression_; /** The expression to compute the right JOIN key */ - std::vector right_key_expressions_; + AbstractExpressionRef right_key_expression_; /** The join type */ JoinType join_type_; protected: - auto PlanNodeToString() const -> std::string override; + auto PlanNodeToString() const -> std::string override { + return fmt::format("HashJoin {{ type={}, left_key={}, right_key={} }}", join_type_, left_key_expression_, + right_key_expression_); + } }; } // namespace bustub diff --git a/src/include/execution/plans/index_scan_plan.h b/src/include/execution/plans/index_scan_plan.h index 14121fc..da27bd8 100644 --- a/src/include/execution/plans/index_scan_plan.h +++ b/src/include/execution/plans/index_scan_plan.h @@ -16,9 +16,7 @@ #include #include "catalog/catalog.h" -#include "concurrency/transaction.h" #include "execution/expressions/abstract_expression.h" -#include "execution/expressions/constant_value_expression.h" #include "execution/plans/abstract_plan.h" namespace bustub { @@ -28,19 +26,14 @@ namespace bustub { class IndexScanPlanNode : public AbstractPlanNode { public: /** - * Creates a new index scan plan node with filter predicate. - * @param output The output format of this scan plan node - * @param table_oid The identifier of table to be scanned - * @param filter_predicate The predicate pushed down to index scan. - * @param pred_key The key for point lookup + * Creates a new index scan plan node. + * @param output the output format of this scan plan node + * @param table_oid the identifier of table to be scanned */ - IndexScanPlanNode(SchemaRef output, table_oid_t table_oid, index_oid_t index_oid, - AbstractExpressionRef filter_predicate = nullptr, ConstantValueExpression *pred_key = nullptr) + IndexScanPlanNode(SchemaRef output, index_oid_t index_oid, AbstractExpressionRef filter_predicate = nullptr) : AbstractPlanNode(std::move(output), {}), - table_oid_(table_oid), index_oid_(index_oid), - filter_predicate_(std::move(filter_predicate)), - pred_key_(pred_key) {} + filter_predicate_(std::move(filter_predicate)) {} auto GetType() const -> PlanType override { return PlanType::IndexScan; } @@ -49,26 +42,13 @@ class IndexScanPlanNode : public AbstractPlanNode { BUSTUB_PLAN_NODE_CLONE_WITH_CHILDREN(IndexScanPlanNode); - /** The table which the index is created on. */ - table_oid_t table_oid_; - - /** The index whose tuples should be scanned. */ + /** The table whose tuples should be scanned. */ index_oid_t index_oid_; - /** The predicate to filter in index scan. - * For Fall 2023, after you implemented seqscan to indexscan optimizer rule, - * we can use this predicate to do index point lookup - */ - AbstractExpressionRef filter_predicate_; - - /** - * The constant value key to lookup. - * For example when dealing "WHERE v = 1" we could store the constant value 1 here - */ - const ConstantValueExpression *pred_key_; - // Add anything you want here for index lookup + AbstractExpressionRef filter_predicate_; + protected: auto PlanNodeToString() const -> std::string override { if (filter_predicate_) { diff --git a/src/include/execution/plans/insert_plan.h b/src/include/execution/plans/insert_plan.h index efb944a..b48bc4c 100644 --- a/src/include/execution/plans/insert_plan.h +++ b/src/include/execution/plans/insert_plan.h @@ -31,8 +31,8 @@ class InsertPlanNode : public AbstractPlanNode { public: /** * Creates a new insert plan node for inserting values from a child plan. - * @param child The child plan to obtain values from - * @param table_oid The identifier of the table that should be inserted into + * @param child the child plan to obtain values from + * @param table_oid the identifier of the table that should be inserted into */ InsertPlanNode(SchemaRef output, AbstractPlanNodeRef child, table_oid_t table_oid) : AbstractPlanNode(std::move(output), {std::move(child)}), table_oid_(table_oid) {} @@ -41,7 +41,7 @@ class InsertPlanNode : public AbstractPlanNode { auto GetType() const -> PlanType override { return PlanType::Insert; } /** @return The identifier of the table into which tuples are inserted */ - auto GetTableOid() const -> table_oid_t { return table_oid_; } + auto TableOid() const -> table_oid_t { return table_oid_; } /** @return the child plan providing tuples to be inserted */ auto GetChildPlan() const -> AbstractPlanNodeRef { diff --git a/src/include/execution/plans/nested_index_join_plan.h b/src/include/execution/plans/nested_index_join_plan.h index 9693ca8..45271be 100644 --- a/src/include/execution/plans/nested_index_join_plan.h +++ b/src/include/execution/plans/nested_index_join_plan.h @@ -28,7 +28,7 @@ namespace bustub { /** * NestedIndexJoinPlanNode is used to represent performing a nested index join between two tables - * The outer table tuples are propagated using a child executor, but the inner table tuples should be + * The outer table tuples are propogated using a child executor, but the inner table tuples should be * obtained using the outer table tuples as well as the index from the catalog. */ class NestedIndexJoinPlanNode : public AbstractPlanNode { @@ -47,22 +47,22 @@ class NestedIndexJoinPlanNode : public AbstractPlanNode { auto GetType() const -> PlanType override { return PlanType::NestedIndexJoin; } - /** @return The predicate to be used to extract the join key from the child */ + /** @return the predicate to be used to extract the join key from the child */ auto KeyPredicate() const -> const AbstractExpressionRef & { return key_predicate_; } /** @return The join type used in the nested index join */ auto GetJoinType() const -> JoinType { return join_type_; }; - /** @return The plan node for the outer table of the nested index join */ + /** @return the plan node for the outer table of the nested index join */ auto GetChildPlan() const -> AbstractPlanNodeRef { return GetChildAt(0); } - /** @return The table oid for the inner table of the nested index join */ + /** @return the table oid for the inner table of the nested index join */ auto GetInnerTableOid() const -> table_oid_t { return inner_table_oid_; } - /** @return The index associated with the nested index join */ + /** @return the index associated with the nested index join */ auto GetIndexName() const -> std::string { return index_name_; } - /** @return The index oid associated with the nested index join */ + /** @return the index oid associated with the nested index join */ auto GetIndexOid() const -> index_oid_t { return index_oid_; } /** @return Schema with needed columns in from the inner table */ diff --git a/src/include/execution/plans/nested_loop_join_plan.h b/src/include/execution/plans/nested_loop_join_plan.h index 12d36fa..026ad04 100644 --- a/src/include/execution/plans/nested_loop_join_plan.h +++ b/src/include/execution/plans/nested_loop_join_plan.h @@ -46,7 +46,7 @@ class NestedLoopJoinPlanNode : public AbstractPlanNode { auto GetType() const -> PlanType override { return PlanType::NestedLoopJoin; } /** @return The predicate to be used in the nested loop join */ - auto Predicate() const -> const AbstractExpressionRef & { return predicate_; } + auto Predicate() const -> const AbstractExpression & { return *predicate_; } /** @return The join type used in the nested loop join */ auto GetJoinType() const -> JoinType { return join_type_; }; diff --git a/src/include/execution/plans/seq_scan_plan.h b/src/include/execution/plans/seq_scan_plan.h index 9caf460..30174a9 100644 --- a/src/include/execution/plans/seq_scan_plan.h +++ b/src/include/execution/plans/seq_scan_plan.h @@ -57,9 +57,9 @@ class SeqScanPlanNode : public AbstractPlanNode { /** The table name */ std::string table_name_; - /** The predicate to filter in seqscan. - * For Fall 2023, We'll enable the MergeFilterScan rule, so we can further support index point lookup - */ + /** The predicate to filter in seqscan. It will ALWAYS be nullptr until you enable the MergeFilterScan rule. + You don't need to handle it to get a perfect score as of in Fall 2022. + */ AbstractExpressionRef filter_predicate_; protected: diff --git a/src/include/execution/plans/topn_per_group_plan.h b/src/include/execution/plans/topn_per_group_plan.h deleted file mode 100644 index 29c4c61..0000000 --- a/src/include/execution/plans/topn_per_group_plan.h +++ /dev/null @@ -1,75 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// topn_per_group_plan.h -// -// Identification: src/include/execution/plans/topn_per_group_plan.h -// -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include -#include - -#include "binder/bound_order_by.h" -#include "catalog/catalog.h" -#include "execution/expressions/abstract_expression.h" -#include "execution/plans/abstract_plan.h" - -namespace bustub { - -/** - * The TopNPerGroupPlanNode represents a top-n operation. It will gather the n extreme rows based on - * limit and order expressions. - */ -class TopNPerGroupPlanNode : public AbstractPlanNode { - public: - /** - * Construct a new TopNPerGroupPlanNode instance. - * @param output The output schema of this TopNPerGroup plan node - * @param child The child plan node - * @param order_bys The sort expressions and their order by types. - * @param n Retain n elements. - */ - TopNPerGroupPlanNode(SchemaRef output, AbstractPlanNodeRef child, std::vector group_bys, - std::vector> order_bys, std::size_t n) - : AbstractPlanNode(std::move(output), {std::move(child)}), - order_bys_(std::move(order_bys)), - group_bys_(std::move(group_bys)), - n_{n} {} - - /** @return The type of the plan node */ - auto GetType() const -> PlanType override { return PlanType::TopNPerGroup; } - - /** @return The N (limit) */ - auto GetN() const -> size_t { return n_; } - - /** @return Get order by expressions */ - auto GetOrderBy() const -> const std::vector> & { return order_bys_; } - - /** @return Get group by expressions */ - auto GetGroupBy() const -> const std::vector & { return group_bys_; } - - /** @return The child plan node */ - auto GetChildPlan() const -> AbstractPlanNodeRef { - BUSTUB_ASSERT(GetChildren().size() == 1, "TopNPerGroup should have exactly one child plan."); - return GetChildAt(0); - } - - BUSTUB_PLAN_NODE_CLONE_WITH_CHILDREN(TopNPerGroupPlanNode); - - std::vector> order_bys_; - std::vector group_bys_; - std::size_t n_; - - protected: - auto PlanNodeToString() const -> std::string override { return "TopNPerGroupPlan PlanNodeToString Not Implemented"; }; -}; - -} // namespace bustub diff --git a/src/include/execution/plans/topn_plan.h b/src/include/execution/plans/topn_plan.h index 6adc042..cc1fd2c 100644 --- a/src/include/execution/plans/topn_plan.h +++ b/src/include/execution/plans/topn_plan.h @@ -32,7 +32,7 @@ class TopNPlanNode : public AbstractPlanNode { public: /** * Construct a new TopNPlanNode instance. - * @param output The output schema of this TopN plan node + * @param output The output schema of this topN plan node * @param child The child plan node * @param order_bys The sort expressions and their order by types. * @param n Retain n elements. diff --git a/src/include/execution/plans/update_plan.h b/src/include/execution/plans/update_plan.h index 628eab2..e2e9e8e 100644 --- a/src/include/execution/plans/update_plan.h +++ b/src/include/execution/plans/update_plan.h @@ -30,9 +30,9 @@ class UpdatePlanNode : public AbstractPlanNode { public: /** * Construct a new UpdatePlanNode instance. - * @param child The child plan to obtain tuple from - * @param table_oid The identifier of the table that should be updated - * @param target_expressions The target expressions for new tuples + * @param child the child plan to obtain tuple from + * @param table_oid the identifier of the table that should be updated + * @param target_expressions the target expressions for new tuples */ UpdatePlanNode(SchemaRef output, AbstractPlanNodeRef child, table_oid_t table_oid, std::vector target_expressions) @@ -44,7 +44,7 @@ class UpdatePlanNode : public AbstractPlanNode { auto GetType() const -> PlanType override { return PlanType::Update; } /** @return The identifier of the table that should be updated */ - auto GetTableOid() const -> table_oid_t { return table_oid_; } + auto TableOid() const -> table_oid_t { return table_oid_; } /** @return The child plan providing tuples to be inserted */ auto GetChildPlan() const -> AbstractPlanNodeRef { diff --git a/src/include/execution/plans/window_plan.h b/src/include/execution/plans/window_plan.h deleted file mode 100644 index b67cf23..0000000 --- a/src/include/execution/plans/window_plan.h +++ /dev/null @@ -1,143 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// aggregation_plan.h -// -// Identification: src/include/execution/plans/aggregation_plan.h -// -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include -#include -#include - -#include "binder/bound_order_by.h" -#include "common/util/hash_util.h" -#include "execution/expressions/abstract_expression.h" -#include "execution/plans/abstract_plan.h" -#include "fmt/format.h" -#include "storage/table/tuple.h" - -namespace bustub { - -/** WindowFunctionType enumerates all the possible window functions in our system */ -enum class WindowFunctionType { CountStarAggregate, CountAggregate, SumAggregate, MinAggregate, MaxAggregate, Rank }; - -class WindowFunctionPlanNode : public AbstractPlanNode { - public: - /** - * Construct a new WindowFunctionPlanNode. - * @param output_schema The output format of this plan node - * @param child The child plan to aggregate data over - * @param window_func_indexes The indexes of the window functions - * @param columns All columns include the placeholder for window functions - * @param partition_bys The partition by clause of the window functions - * @param order_bys The order by clause of the window functions - * @param funcions The expressions that we are aggregating - * @param window_func_types The types that we are aggregating - * - * Window Aggregation is different from normal aggregation as it outputs one row for each inputing rows, - * and can be combined with normal selected columns. The columns in WindowFunctionPlanNode contains both - * normal selected columns and placeholder columns for window aggregations. - * - * For example, if we have a query like: - * SELECT 0.1, 0.2, SUM(0.3) OVER (PARTITION BY 0.2 ORDER BY 0.3), SUM(0.4) OVER (PARTITION BY 0.1 ORDER BY - * 0.2,0.3) FROM table; - * - * The WindowFunctionPlanNode should contains following structure: - * columns: std::vector{0.1, 0.2, 0.-1(placeholder), 0.-1(placeholder)} - * partition_bys: std::vector>{{0.2}, {0.1}} - * order_bys: std::vector>{{0.3}, {0.2,0.3}} - * functions: std::vector{0.3, 0.4} - * window_func_types: std::vector{SumAggregate, SumAggregate} - */ - WindowFunctionPlanNode(SchemaRef output_schema, AbstractPlanNodeRef child, std::vector window_func_indexes, - std::vector columns, - std::vector> partition_bys, - std::vector>> order_bys, - std::vector functions, - std::vector window_func_types) - : AbstractPlanNode(std::move(output_schema), {std::move(child)}), columns_(std::move(columns)) { - for (uint32_t i = 0; i < window_func_indexes.size(); i++) { - window_functions_[window_func_indexes[i]] = - WindowFunction{functions[i], window_func_types[i], partition_bys[i], order_bys[i]}; - } - } - - /** @return The type of the plan node */ - auto GetType() const -> PlanType override { return PlanType::Window; } - - /** @return the child of this aggregation plan node */ - auto GetChildPlan() const -> AbstractPlanNodeRef { - BUSTUB_ASSERT(GetChildren().size() == 1, "Window Aggregation expected to only have one child."); - return GetChildAt(0); - } - - static auto InferWindowSchema(const std::vector &columns) -> Schema; - - BUSTUB_PLAN_NODE_CLONE_WITH_CHILDREN(WindowFunctionPlanNode); - - struct WindowFunction { - AbstractExpressionRef function_; - WindowFunctionType type_; - std::vector partition_by_; - std::vector> order_by_; - }; - - /** all columns expressions */ - std::vector columns_; - - std::unordered_map window_functions_; - - protected: - auto PlanNodeToString() const -> std::string override; -}; - -} // namespace bustub - -template <> -struct fmt::formatter : formatter { - template - auto format(const bustub::WindowFunctionPlanNode::WindowFunction &x, FormatContext &ctx) const { - return formatter::format(fmt::format("{{ function_arg={}, type={}, partition_by={}, order_by={} }}", - x.function_, x.type_, x.partition_by_, x.order_by_), - ctx); - } -}; - -template <> -struct fmt::formatter : formatter { - template - auto format(bustub::WindowFunctionType c, FormatContext &ctx) const { - using bustub::WindowFunctionType; - std::string name = "unknown"; - switch (c) { - case WindowFunctionType::CountStarAggregate: - name = "count_star"; - break; - case WindowFunctionType::CountAggregate: - name = "count"; - break; - case WindowFunctionType::SumAggregate: - name = "sum"; - break; - case WindowFunctionType::MinAggregate: - name = "min"; - break; - case WindowFunctionType::MaxAggregate: - name = "max"; - break; - case WindowFunctionType::Rank: - name = "rank"; - break; - } - return formatter::format(name, ctx); - } -}; diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index d625f89..57eb085 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -12,6 +12,8 @@ #include "execution/expressions/abstract_expression.h" #include "execution/plans/abstract_plan.h" +#define BUSTUB_OPTIMIZER_HACK_REMOVE_AFTER_2022_FALL + namespace bustub { /** @@ -76,19 +78,15 @@ class Optimizer { -> AbstractExpressionRef; /** @brief check if the predicate is true::boolean */ - auto IsPredicateTrue(const AbstractExpressionRef &expr) -> bool; + auto IsPredicateTrue(const AbstractExpression &expr) -> bool; + + auto IsPredicateFalse(const AbstractExpression &expr) -> bool; /** * @brief optimize order by as index scan if there's an index on a table */ auto OptimizeOrderByAsIndexScan(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; - /** - * @brief optimize seq scan as index scan if there's an index on a table - * @note Fall 2023 only: using hash index and only support point lookup - */ - auto OptimizeSeqScanAsIndexScan(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; - /** @brief check if the index can be matched */ auto MatchIndex(const std::string &table_name, uint32_t index_key_idx) -> std::optional>; @@ -98,6 +96,18 @@ class Optimizer { */ auto OptimizeSortLimitAsTopN(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; + auto OptimizeReorderJoinUseIndex(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; + + auto OptimizePredicatePushDown(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; + + auto OptimizeFalseFilter(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; + + auto OptimizeRemoveJoin(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; + + auto OptimizeRemoveColumn(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; + + auto OptimizeMergeFilterIndexScan(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef; + /** * @brief get the estimated cardinality for a table based on the table name. Useful when join reordering. BusTub * doesn't support statistics for now, so it's the only way for you to get the table size :( diff --git a/src/include/optimizer/optimizer_internal.h b/src/include/optimizer/optimizer_internal.h deleted file mode 100644 index 5bcb010..0000000 --- a/src/include/optimizer/optimizer_internal.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -namespace bustub { - -// Note: You can define your optimizer helper functions here -void OptimizerHelperFunction(); - -} // namespace bustub diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index ac6d829..d217b7c 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -14,7 +14,6 @@ #include "common/exception.h" #include "common/macros.h" #include "execution/plans/aggregation_plan.h" -#include "execution/plans/window_plan.h" namespace bustub { @@ -36,7 +35,6 @@ class BoundJoinRef; class BoundExpressionListRef; class BoundAggCall; class BoundCTERef; -class BoundFuncCall; class ColumnValueExpression; /** @@ -63,7 +61,7 @@ class PlannerContext { /** * In the second phase of aggregation planning, we plan agg calls from `aggregations_`, and generate - * an aggregation plan node. The expressions in the vector should be used over the output from the + * an aggregation plan node. The expressions in thie vector should be used over the output from the * aggregation plan node. */ std::vector expr_in_agg_; @@ -120,9 +118,6 @@ class Planner { auto PlanBinaryOp(const BoundBinaryOp &expr, const std::vector &children) -> AbstractExpressionRef; - auto PlanFuncCall(const BoundFuncCall &expr, const std::vector &children) - -> AbstractExpressionRef; - auto PlanColumnRef(const BoundColumnRef &expr, const std::vector &children) -> std::tuple>; @@ -131,23 +126,15 @@ class Planner { auto PlanSelectAgg(const SelectStatement &statement, AbstractPlanNodeRef child) -> AbstractPlanNodeRef; - auto PlanSelectWindow(const SelectStatement &statement, AbstractPlanNodeRef child) -> AbstractPlanNodeRef; - auto PlanAggCall(const BoundAggCall &agg_call, const std::vector &children) -> std::tuple>; auto GetAggCallFromFactory(const std::string &func_name, std::vector args) -> std::tuple>; - auto GetWindowAggCallFromFactory(const std::string &func_name, std::vector args) - -> std::tuple>; - auto GetBinaryExpressionFromFactory(const std::string &op_name, AbstractExpressionRef left, AbstractExpressionRef right) -> AbstractExpressionRef; - auto GetFuncCallFromFactory(const std::string &func_name, std::vector args) - -> AbstractExpressionRef; - auto PlanInsert(const InsertStatement &statement) -> AbstractPlanNodeRef; auto PlanDelete(const DeleteStatement &statement) -> AbstractPlanNodeRef; diff --git a/src/include/primer/orset.h b/src/include/primer/orset.h deleted file mode 100644 index f602baa..0000000 --- a/src/include/primer/orset.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include -#include - -namespace bustub { - -/** @brief Unique ID type. */ -using uid_t = int64_t; - -/** @brief The observed remove set datatype. */ -template -class ORSet { - public: - ORSet() = default; - - /** - * @brief Checks if an element is in the set. - * - * @param elem the element to check - * @return true if the element is in the set, and false otherwise. - */ - auto Contains(const T &elem) const -> bool; - - /** - * @brief Adds an element to the set. - * - * @param elem the element to add - * @param uid unique token associated with the add operation. - */ - void Add(const T &elem, uid_t uid); - - /** - * @brief Removes an element from the set if it exists. - * - * @param elem the element to remove. - */ - void Remove(const T &elem); - - /** - * @brief Merge changes from another ORSet. - * - * @param other another ORSet - */ - void Merge(const ORSet &other); - - /** - * @brief Gets all the elements in the set. - * - * @return all the elements in the set. - */ - auto Elements() const -> std::vector; - - /** - * @brief Gets a string representation of the set. - * - * @return a string representation of the set. - */ - auto ToString() const -> std::string; - - private: - // TODO(student): Add your private memeber variables to represent ORSet. -}; - -} // namespace bustub diff --git a/src/include/primer/orset_driver.h b/src/include/primer/orset_driver.h deleted file mode 100644 index b575515..0000000 --- a/src/include/primer/orset_driver.h +++ /dev/null @@ -1,134 +0,0 @@ -#pragma once - -#include -#include -#include "primer/orset.h" - -namespace bustub { - -/** @brief Unique ID type. */ -using uid_t = int64_t; - -template -class ORSetDriver; - -template -class ORSetNode { - public: - ORSetNode() = delete; - - explicit ORSetNode(ORSetDriver *driver, size_t node_id, size_t n) - : driver_(driver), node_id_(node_id), peer_size_(n), last_read_version_(n, 0) {} - - /** - * @brief Adds an element to the local ORSet. - * - * @param elem the element to add - */ - inline void Add(const T &elem) { orset_.Add(elem, driver_->GenerateUid()); } - - /** - * @brief Removes an element from the local ORSet. - * - * @param elem the element to remove. - */ - inline void Remove(const T &elem) { orset_.Remove(elem); } - - /** - * @brief Checks if an element is in the local ORSet. - * - * @param elem the element to check - * @return true if the element is in the set, and false otherwise. - */ - inline auto Contains(const T &elem) -> bool { return orset_.Contains(elem); } - - /** - * @brief Merges another ORSet to the local ORSet. - * - * @param to_be_merged the ORSet to be merged. - */ - inline void Merge(const ORSet to_be_merged) { orset_.Merge(to_be_merged); } - - /** - * @brief Saves all local changes to the driver. - */ - void Save(); - - /** - * @brief Loads all the remote changes to the local ORSet. - */ - void Load(); - - /** - * @brief Gets a copy of the local ORSet. - * - * @return the local ORSet. - */ - inline auto GetORSet() -> ORSet { return orset_; } - - private: - /** @brief The local ORSet. */ - ORSet orset_; - - /** @brief ORSet Driver. */ - ORSetDriver *driver_; - - /** @brief node id */ - size_t node_id_; - - /** @brief total number of nodes in the same network */ - size_t peer_size_; - - /** @brief last read version number of each peer's copy */ - std::vector last_read_version_; -}; - -/** @brief A driver class for managing ORSets. */ -template -class ORSetDriver { - friend class ORSetNode; - - public: - explicit ORSetDriver(size_t num_orset_node); - - /** - * @brief Gets the ORSetNode at index. - */ - inline auto operator[](size_t index) -> std::unique_ptr> & { return orset_nodes_[index]; } - auto operator[](size_t index) const -> const std::unique_ptr> & { return orset_nodes_[index]; } - - /** - * @brief Gets the ORSet node at index. - * - * @param index index of the ORSet node. - * @return the ORSet node associated with the index. - */ - inline auto At(size_t index) -> std::unique_ptr> & { return orset_nodes_[index]; } - - /** - * @brief Saves changes in all nodes and then load all the changes. - */ - void Sync(); - - private: - /** - * @brief Generates a unique id. - * - * @return a unique id. - */ - inline auto GenerateUid() -> uid_t { return next_uid_++; } - - /** @brief A list of ORSet nodes. */ - std::vector>> orset_nodes_; - - /** @brief List of saved copies of ORSet. */ - std::vector> saved_copies_; - - /** @brief latest version number of each node */ - std::vector version_counter_; - - /** @brief Monotonically increasing unique id for the elements. */ - uid_t next_uid_ = 0; -}; - -} // namespace bustub diff --git a/src/include/primer/p0_trie.h b/src/include/primer/p0_trie.h new file mode 100644 index 0000000..9097547 --- /dev/null +++ b/src/include/primer/p0_trie.h @@ -0,0 +1,444 @@ +//===----------------------------------------------------------------------===// +// +// BusTub +// +// p0_trie.h +// +// Identification: src/include/primer/p0_trie.h +// +// Copyright (c) 2015-2022, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/exception.h" +#include "common/rwlatch.h" + +namespace bustub { + +/** + * TrieNode is a generic container for any node in Trie. + */ +class TrieNode { + public: + /** + * TODO(P0): Add implementation + * + * @brief Construct a new Trie Node object with the given key char. + * is_end_ flag should be initialized to false in this constructor. + * + * @param key_char Key character of this trie node + */ + explicit TrieNode(char key_char) { + key_char_ = key_char; + is_end_ = false; + children_.clear(); + } + + /** + * TODO(P0): Add implementation + * + * @brief Move constructor for trie node object. The unique pointers stored + * in children_ should be moved from other_trie_node to new trie node. + * + * @param other_trie_node Old trie node. + */ + TrieNode(TrieNode &&other_trie_node) noexcept { + key_char_ = other_trie_node.key_char_; + is_end_ = other_trie_node.is_end_; + children_.swap(other_trie_node.children_); + } + + /** + * @brief Destroy the TrieNode object. + */ + virtual ~TrieNode() = default; + + /** + * TODO(P0): Add implementation + * + * @brief Whether this trie node has a child node with specified key char. + * + * @param key_char Key char of child node. + * @return True if this trie node has a child with given key, false otherwise. + */ + bool HasChild(char key_char) const { return children_.find(key_char) != children_.end(); } + + /** + * TODO(P0): Add implementation + * + * @brief Whether this trie node has any children at all. This is useful + * when implementing 'Remove' functionality. + * + * @return True if this trie node has any child node, false if it has no child node. + */ + bool HasChildren() const { return !children_.empty(); } + + /** + * TODO(P0): Add implementation + * + * @brief Whether this trie node is the ending character of a key string. + * + * @return True if is_end_ flag is true, false if is_end_ is false. + */ + bool IsEndNode() const { return is_end_; } + + /** + * TODO(P0): Add implementation + * + * @brief Return key char of this trie node. + * + * @return key_char_ of this trie node. + */ + char GetKeyChar() const { return key_char_; } + + /** + * TODO(P0): Add implementation + * + * @brief Insert a child node for this trie node into children_ map, given the key char and + * unique_ptr of the child node. If specified key_char already exists in children_, + * return nullptr. If parameter `child`'s key char is different than parameter + * `key_char`, return nullptr. + * + * Note that parameter `child` is rvalue and should be moved when it is + * inserted into children_map. + * + * The return value is a pointer to unique_ptr because pointer to unique_ptr can access the + * underlying data without taking ownership of the unique_ptr. Further, we can set the return + * value to nullptr when error occurs. + * + * @param key Key of child node + * @param child Unique pointer created for the child node. This should be added to children_ map. + * @return Pointer to unique_ptr of the inserted child node. If insertion fails, return nullptr. + */ + std::unique_ptr *InsertChildNode(char key_char, std::unique_ptr &&child) { + if (HasChild(key_char) || key_char != child->key_char_) { + return nullptr; + } + children_[key_char] = std::forward>(child); + return &children_[key_char]; + } + + /** + * TODO(P0): Add implementation + * + * @brief Get the child node given its key char. If child node for given key char does + * not exist, return nullptr. + * + * @param key Key of child node + * @return Pointer to unique_ptr of the child node, nullptr if child + * node does not exist. + */ + std::unique_ptr *GetChildNode(char key_char) { + auto node = children_.find(key_char); + if (node != children_.end()) { + return &(node->second); + } + return nullptr; + } + + /** + * TODO(P0): Add implementation + * + * @brief Remove child node from children_ map. + * If key_char does not exist in children_, return immediately. + * + * @param key_char Key char of child node to be removed + */ + void RemoveChildNode(char key_char) { + auto node = children_.find(key_char); + if (node != children_.end()) { + children_.erase(key_char); + } + } + + /** + * TODO(P0): Add implementation + * + * @brief Set the is_end_ flag to true or false. + * + * @param is_end Whether this trie node is ending char of a key string + */ + void SetEndNode(bool is_end) { is_end_ = is_end; } + + protected: + /** Key character of this trie node */ + char key_char_; + /** whether this node marks the end of a key */ + bool is_end_{false}; + /** A map of all child nodes of this trie node, which can be accessed by each + * child node's key char. */ + std::unordered_map> children_; +}; + +/** + * TrieNodeWithValue is a node that marks the ending of a key, and it can + * hold a value of any type T. + */ +template +class TrieNodeWithValue : public TrieNode { + private: + /* Value held by this trie node. */ + T value_; + + public: + /** + * TODO(P0): Add implementation + * + * @brief Construct a new TrieNodeWithValue object from a TrieNode object and specify its value. + * This is used when a non-terminal TrieNode is converted to terminal TrieNodeWithValue. + * + * The children_ map of TrieNode should be moved to the new TrieNodeWithValue object. + * Since it contains unique pointers, the first parameter is a rvalue reference. + * + * You should: + * 1) invoke TrieNode's move constructor to move data from TrieNode to + * TrieNodeWithValue. + * 2) set value_ member variable of this node to parameter `value`. + * 3) set is_end_ to true + * + * @param trieNode TrieNode whose data is to be moved to TrieNodeWithValue + * @param value + */ + TrieNodeWithValue(TrieNode &&trieNode, T value) : TrieNode(std::forward(trieNode)) { + value_ = value; + SetEndNode(true); + } + + /** + * TODO(P0): Add implementation + * + * @brief Construct a new TrieNodeWithValue. This is used when a new terminal node is constructed. + * + * You should: + * 1) Invoke the constructor for TrieNode with the given key_char. + * 2) Set value_ for this node. + * 3) set is_end_ to true. + * + * @param key_char Key char of this node + * @param value Value of this node + */ + TrieNodeWithValue(char key_char, T value) : TrieNode(key_char) { + value_ = value; + SetEndNode(true); + } + + /** + * @brief Destroy the Trie Node With Value object + */ + ~TrieNodeWithValue() override = default; + + /** + * @brief Get the stored value_. + * + * @return Value of type T stored in this node + */ + T GetValue() const { return value_; } +}; + +/** + * Trie is a concurrent key-value store. Each key is a string and its corresponding + * value can be any type. + */ +class Trie { + private: + /* Root node of the trie */ + std::unique_ptr root_; + /* Read-write lock for the trie */ + ReaderWriterLatch latch_; + + public: + /** + * TODO(P0): Add implementation + * + * @brief Construct a new Trie object. Initialize the root node with '\0' + * character. + */ + Trie() { + auto *root = new TrieNode('\0'); + root_.reset(root); + } + + /** + * TODO(P0): Add implementation + * + * @brief Insert key-value pair into the trie. + * + * If the key is an empty string, return false immediately. + * + * If the key already exists, return false. Duplicated keys are not allowed and + * you should never overwrite value of an existing key. + * + * When you reach the ending character of a key: + * 1. If TrieNode with this ending character does not exist, create new TrieNodeWithValue + * and add it to parent node's children_ map. + * 2. If the terminal node is a TrieNode, then convert it into TrieNodeWithValue by + * invoking the appropriate constructor. + * 3. If it is already a TrieNodeWithValue, + * then insertion fails and returns false. Do not overwrite existing data with new data. + * + * You can quickly check whether a TrieNode pointer holds TrieNode or TrieNodeWithValue + * by checking the is_end_ flag. If is_end_ == false, then it points to TrieNode. If + * is_end_ == true, it points to TrieNodeWithValue. + * + * @param key Key used to traverse the trie and find the correct node + * @param value Value to be inserted + * @return True if insertion succeeds, false if the key already exists + */ + template + bool Insert(const std::string &key, T value) { + if (key.empty()) { + return false; + } + latch_.WLock(); + auto c = key.begin(); + auto pre_child = &root_; + while (c != key.end()) { + auto cur = c++; + if (c == key.end()) { + break; + } + + if (!pre_child->get()->HasChild(*cur)) { + pre_child = pre_child->get()->InsertChildNode(*cur, std::make_unique(*cur)); + } else { + pre_child = pre_child->get()->GetChildNode(*cur); + } + } + + c--; + + auto end_node = pre_child->get()->GetChildNode(*c); + if (end_node != nullptr && end_node->get()->IsEndNode()) { + latch_.WUnlock(); + return false; + } + + if (end_node != nullptr) { + auto new_node = new TrieNodeWithValue(std::move(**end_node), value); + end_node->reset(new_node); + latch_.WUnlock(); + return true; + } + + pre_child = pre_child->get()->InsertChildNode(*c, std::make_unique(*c)); + auto new_node = new TrieNodeWithValue(std::move(**pre_child), value); + pre_child->reset(new_node); + latch_.WUnlock(); + return true; + } + + /** + * TODO(P0): Add implementation + * + * @brief Remove key value pair from the trie. + * This function should also remove nodes that are no longer part of another + * key. If key is empty or not found, return false. + * + * You should: + * 1) Find the terminal node for the given key. + * 2) If this terminal node does not have any children, remove it from its + * parent's children_ map. + * 3) Recursively remove nodes that have no children and are not terminal node + * of another key. + * + * @param key Key used to traverse the trie and find the correct node + * @return True if the key exists and is removed, false otherwise + */ + bool Remove(const std::string &key) { + if (key.empty()) { + return false; + } + + latch_.WLock(); + + std::stack *>> s; + auto c = key.begin(); + auto pre_child = &root_; + while (c != key.end()) { + auto cur = c++; + if (pre_child->get()->HasChild(*cur)) { + s.push(std::make_tuple(*cur, pre_child)); + pre_child = pre_child->get()->GetChildNode(*cur); + continue; + } + latch_.WUnlock(); + return false; + } + + while (!s.empty()) { + auto t = s.top(); + s.pop(); + auto key = std::get<0>(t); + auto node = std::get<1>(t); + auto flag = (*node)->GetChildNode(key); + if (flag != nullptr && (*flag)->HasChildren()) { + continue; + } + (*node)->RemoveChildNode(key); + } + latch_.WUnlock(); + return true; + } + + /** + * TODO(P0): Add implementation + * + * @brief Get the corresponding value of type T given its key. + * If key is empty, set success to false. + * If key does not exist in trie, set success to false. + * If the given type T is not the same as the value type stored in TrieNodeWithValue + * (ie. GetValue is called but terminal node holds std::string), + * set success to false. + * + * To check whether the two types are the same, dynamic_cast + * the terminal TrieNode to TrieNodeWithValue. If the casted result + * is not nullptr, then type T is the correct type. + * + * @param key Key used to traverse the trie and find the correct node + * @param success Whether GetValue is successful or not + * @return Value of type T if type matches + */ + template + T GetValue(const std::string &key, bool *success) { + *success = false; + latch_.RLock(); + + auto pre_child = &root_; + auto c = key.begin(); + while (c != key.end()) { + auto cur = c++; + auto next_node = pre_child->get()->GetChildNode(*cur); + + if (!next_node) { + *success = false; + break; + } + + if (next_node->get()->IsEndNode() && c == key.end()) { + auto flag_node = dynamic_cast *>(next_node->get()); + if (!flag_node) { + *success = false; + break; + } + *success = true; + latch_.RUnlock(); + return flag_node->GetValue(); + } + pre_child = next_node; + } + latch_.RUnlock(); + return {}; + } +}; +} // namespace bustub diff --git a/src/include/primer/trie.h b/src/include/primer/trie.h deleted file mode 100644 index 2206b34..0000000 --- a/src/include/primer/trie.h +++ /dev/null @@ -1,136 +0,0 @@ -#pragma once - -#include -#include -#include // NOLINT -#include -#include -#include -#include -#include -#include -#include -#include - -namespace bustub { - -/// A special type that will block the move constructor and move assignment operator. Used in TrieStore tests. -class MoveBlocked { - public: - explicit MoveBlocked(std::future wait) : wait_(std::move(wait)) {} - - MoveBlocked(const MoveBlocked &) = delete; - MoveBlocked(MoveBlocked &&that) noexcept { - if (!that.waited_) { - that.wait_.get(); - } - that.waited_ = waited_ = true; - } - - auto operator=(const MoveBlocked &) -> MoveBlocked & = delete; - auto operator=(MoveBlocked &&that) noexcept -> MoveBlocked & { - if (!that.waited_) { - that.wait_.get(); - } - that.waited_ = waited_ = true; - return *this; - } - - bool waited_{false}; - std::future wait_; -}; - -// A TrieNode is a node in a Trie. -class TrieNode { - public: - // Create a TrieNode with no children. - TrieNode() = default; - - // Create a TrieNode with some children. - explicit TrieNode(std::map> children) : children_(std::move(children)) {} - - virtual ~TrieNode() = default; - - // Clone returns a copy of this TrieNode. If the TrieNode has a value, the value is copied. The return - // type of this function is a unique_ptr to a TrieNode. - // - // You cannot use the copy constructor to clone the node because it doesn't know whether a `TrieNode` - // contains a value or not. - // - // Note: if you want to convert `unique_ptr` into `shared_ptr`, you can use `std::shared_ptr(std::move(ptr))`. - virtual auto Clone() const -> std::unique_ptr { return std::make_unique(children_); } - - // A map of children, where the key is the next character in the key, and the value is the next TrieNode. - // You MUST store the children information in this structure. You are NOT allowed to remove the `const` from - // the structure. - std::map> children_; - - // Indicates if the node is the terminal node. - bool is_value_node_{false}; - - // You can add additional fields and methods here except storing children. But in general, you don't need to add extra - // fields to complete this project. -}; - -// A TrieNodeWithValue is a TrieNode that also has a value of type T associated with it. -template -class TrieNodeWithValue : public TrieNode { - public: - // Create a trie node with no children and a value. - explicit TrieNodeWithValue(std::shared_ptr value) : value_(std::move(value)) { this->is_value_node_ = true; } - - // Create a trie node with children and a value. - TrieNodeWithValue(std::map> children, std::shared_ptr value) - : TrieNode(std::move(children)), value_(std::move(value)) { - this->is_value_node_ = true; - } - - // Override the Clone method to also clone the value. - // - // Note: if you want to convert `unique_ptr` into `shared_ptr`, you can use `std::shared_ptr(std::move(ptr))`. - auto Clone() const -> std::unique_ptr override { - return std::make_unique>(children_, value_); - } - - // The value associated with this trie node. - std::shared_ptr value_; -}; - -// A Trie is a data structure that maps strings to values of type T. All operations on a Trie should not -// modify the trie itself. It should reuse the existing nodes as much as possible, and create new nodes to -// represent the new trie. -// -// You are NOT allowed to remove any `const` in this project, or use `mutable` to bypass the const checks. -class Trie { - private: - // The root of the trie. - std::shared_ptr root_{nullptr}; - - // Create a new trie with the given root. - explicit Trie(std::shared_ptr root) : root_(std::move(root)) {} - - public: - // Create an empty trie. - Trie() = default; - - // Get the value associated with the given key. - // 1. If the key is not in the trie, return nullptr. - // 2. If the key is in the trie but the type is mismatched, return nullptr. - // 3. Otherwise, return the value. - template - auto Get(std::string_view key) const -> const T *; - - // Put a new key-value pair into the trie. If the key already exists, overwrite the value. - // Returns the new trie. - template - auto Put(std::string_view key, T value) const -> Trie; - - // Remove the key from the trie. If the key does not exist, return the original trie. - // Otherwise, returns the new trie. - auto Remove(std::string_view key) const -> Trie; - - // Get the root of the trie, should only be used in test cases. - auto GetRoot() const -> std::shared_ptr { return root_; } -}; - -} // namespace bustub diff --git a/src/include/primer/trie_answer.h b/src/include/primer/trie_answer.h deleted file mode 100644 index aab3a2c..0000000 --- a/src/include/primer/trie_answer.h +++ /dev/null @@ -1,7 +0,0 @@ -#include "primer/trie.h" - -// TODO(student): fill your answer here - -const uint32_t CASE_1_YOUR_ANSWER = 0; -const uint32_t CASE_2_YOUR_ANSWER = 0; -const uint32_t CASE_3_YOUR_ANSWER = 0; diff --git a/src/include/primer/trie_store.h b/src/include/primer/trie_store.h deleted file mode 100644 index 3a8a597..0000000 --- a/src/include/primer/trie_store.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "primer/trie.h" - -namespace bustub { - -// This class is used to guard the value returned by the trie. It holds a reference to the root so -// that the reference to the value will not be invalidated. -template -class ValueGuard { - public: - ValueGuard(Trie root, const T &value) : root_(std::move(root)), value_(value) {} - auto operator*() const -> const T & { return value_; } - - private: - Trie root_; - const T &value_; -}; - -// This class is a thread-safe wrapper around the Trie class. It provides a simple interface for -// accessing the trie. It should allow concurrent reads and a single write operation at the same -// time. -class TrieStore { - public: - // This function returns a ValueGuard object that holds a reference to the value in the trie. If - // the key does not exist in the trie, it will return std::nullopt. - template - auto Get(std::string_view key) -> std::optional>; - - // This function will insert the key-value pair into the trie. If the key already exists in the - // trie, it will overwrite the value. - template - void Put(std::string_view key, T value); - - // This function will remove the key-value pair from the trie. - void Remove(std::string_view key); - - private: - // This mutex protects the root. Every time you want to access the trie root or modify it, you - // will need to take this lock. - std::mutex root_lock_; - - // This mutex sequences all writes operations and allows only one write operation at a time. - std::mutex write_lock_; - - // Stores the current root for the trie. - Trie root_; -}; - -} // namespace bustub diff --git a/src/include/storage/disk/disk_manager_memory.h b/src/include/storage/disk/disk_manager_memory.h index 057dc61..1ba22da 100644 --- a/src/include/storage/disk/disk_manager_memory.h +++ b/src/include/storage/disk/disk_manager_memory.h @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// #include -#include // NOLINT #include #include #include // NOLINT @@ -19,14 +18,12 @@ #include #include #include -#include // NOLINT #include #include #include "common/config.h" #include "common/exception.h" #include "common/logger.h" -#include "fmt/core.h" #include "storage/disk/disk_manager.h" namespace bustub { @@ -65,7 +62,7 @@ class DiskManagerMemory : public DiskManager { */ class DiskManagerUnlimitedMemory : public DiskManager { public: - DiskManagerUnlimitedMemory() { std::fill(recent_access_.begin(), recent_access_.end(), -1); } + DiskManagerUnlimitedMemory() = default; /** * Write a page to the database file. @@ -73,12 +70,7 @@ class DiskManagerUnlimitedMemory : public DiskManager { * @param page_data raw page data */ void WritePage(page_id_t page_id, const char *page_data) override { - ProcessLatency(page_id); - std::unique_lock l(mutex_); - if (!thread_id_.has_value()) { - thread_id_ = std::this_thread::get_id(); - } if (page_id >= static_cast(data_.size())) { data_.resize(page_id + 1); } @@ -90,8 +82,6 @@ class DiskManagerUnlimitedMemory : public DiskManager { l.unlock(); memcpy(ptr->first.data(), page_data, BUSTUB_PAGE_SIZE); - - PostProcessLatency(page_id); } /** @@ -100,20 +90,13 @@ class DiskManagerUnlimitedMemory : public DiskManager { * @param[out] page_data output buffer */ void ReadPage(page_id_t page_id, char *page_data) override { - ProcessLatency(page_id); - std::unique_lock l(mutex_); - if (!thread_id_.has_value()) { - thread_id_ = std::this_thread::get_id(); - } if (page_id >= static_cast(data_.size()) || page_id < 0) { - fmt::println(stderr, "page {} not in range", page_id); - std::terminate(); + LOG_WARN("page not exist"); return; } if (data_[page_id] == nullptr) { - fmt::println(stderr, "page {} not exist", page_id); - std::terminate(); + LOG_WARN("page not exist"); return; } std::shared_ptr ptr = data_[page_id]; @@ -121,58 +104,12 @@ class DiskManagerUnlimitedMemory : public DiskManager { l.unlock(); memcpy(page_data, ptr->first.data(), BUSTUB_PAGE_SIZE); - - PostProcessLatency(page_id); - } - - void ProcessLatency(page_id_t page_id) { - uint64_t sleep_micro_sec = 1000; // for random access, 1ms latency - if (latency_simulator_enabled_) { - std::unique_lock lck(latency_processor_mutex_); - for (auto &recent_page_id : recent_access_) { - if ((recent_page_id & (~0x3)) == (page_id & (~0x3))) { - sleep_micro_sec = 100; // for access in the same "block", 0.1ms latency - break; - } - if (page_id >= recent_page_id && page_id <= recent_page_id + 3) { - sleep_micro_sec = 100; // for sequential access, 0.1ms latency - break; - } - } - lck.unlock(); - std::this_thread::sleep_for(std::chrono::microseconds(sleep_micro_sec)); - } - } - - void PostProcessLatency(page_id_t page_id) { - if (latency_simulator_enabled_) { - std::scoped_lock lck(latency_processor_mutex_); - recent_access_[access_ptr_] = page_id; - access_ptr_ = (access_ptr_ + 1) % recent_access_.size(); - } - } - - void EnableLatencySimulator(bool enabled) { latency_simulator_enabled_ = enabled; } - - auto GetLastReadThreadAndClear() -> std::optional { - std::unique_lock lck(mutex_); - auto t = thread_id_; - thread_id_ = std::nullopt; - return t; } private: - bool latency_simulator_enabled_{false}; - - std::mutex latency_processor_mutex_; - std::array recent_access_; - uint64_t access_ptr_{0}; - + std::mutex mutex_; using Page = std::array; using ProtectedPage = std::pair; - - std::mutex mutex_; - std::optional thread_id_; std::vector> data_; }; diff --git a/src/include/storage/disk/disk_scheduler.h b/src/include/storage/disk/disk_scheduler.h deleted file mode 100644 index eee9f0f..0000000 --- a/src/include/storage/disk/disk_scheduler.h +++ /dev/null @@ -1,95 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// disk_scheduler.h -// -// Identification: src/include/storage/disk/disk_scheduler.h -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include // NOLINT -#include -#include // NOLINT - -#include "common/channel.h" -#include "storage/disk/disk_manager.h" - -namespace bustub { - -/** - * @brief Represents a Write or Read request for the DiskManager to execute. - */ -struct DiskRequest { - /** Flag indicating whether the request is a write or a read. */ - bool is_write_; - - /** - * Pointer to the start of the memory location where a page is either: - * 1. being read into from disk (on a read). - * 2. being written out to disk (on a write). - */ - char *data_; - - /** ID of the page being read from / written to disk. */ - page_id_t page_id_; - - /** Callback used to signal to the request issuer when the request has been completed. */ - std::promise callback_; -}; - -/** - * @brief The DiskScheduler schedules disk read and write operations. - * - * A request is scheduled by calling DiskScheduler::Schedule() with an appropriate DiskRequest object. The scheduler - * maintains a background worker thread that processes the scheduled requests using the disk manager. The background - * thread is created in the DiskScheduler constructor and joined in its destructor. - */ -class DiskScheduler { - public: - explicit DiskScheduler(DiskManager *disk_manager); - ~DiskScheduler(); - - /** - * TODO(P1): Add implementation - * - * @brief Schedules a request for the DiskManager to execute. - * - * @param r The request to be scheduled. - */ - void Schedule(DiskRequest r); - - /** - * TODO(P1): Add implementation - * - * @brief Background worker thread function that processes scheduled requests. - * - * The background thread needs to process requests while the DiskScheduler exists, i.e., this function should not - * return until ~DiskScheduler() is called. At that point you need to make sure that the function does return. - */ - void StartWorkerThread(); - - using DiskSchedulerPromise = std::promise; - - /** - * @brief Create a Promise object. If you want to implement your own version of promise, you can change this function - * so that our test cases can use your promise implementation. - * - * @return std::promise - */ - auto CreatePromise() -> DiskSchedulerPromise { return {}; }; - - private: - /** Pointer to the disk manager. */ - DiskManager *disk_manager_ __attribute__((__unused__)); - /** A shared queue to concurrently schedule and process requests. When the DiskScheduler's destructor is called, - * `std::nullopt` is put into the queue to signal to the background thread to stop execution. */ - Channel> request_queue_; - /** The background thread responsible for issuing scheduled requests to the disk manager. */ - std::optional background_thread_; -}; -} // namespace bustub diff --git a/src/include/storage/disk/write_back_cache.h b/src/include/storage/disk/write_back_cache.h deleted file mode 100644 index 1a09b59..0000000 --- a/src/include/storage/disk/write_back_cache.h +++ /dev/null @@ -1,97 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// write_back_cache.h -// -// Identification: src/include/storage/disk/write_back_cache.h -// -// Copyright (c) 2015-2024, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include "common/config.h" -#include "common/macros.h" -#include "storage/page/page.h" - -namespace bustub { - -/** - * WriteBackCache provides extra memory space other than the buffer pool to store the pages. It's purpose - * is to gather the copy of pages that are about to be written back to disk, so that the bpm doesn't have - * to incur IO penality and wait for the write to be completed when evicting. - * Spring 24: The cache is limited to store a constant number of pages in total (8). - * !! ANY ATTEMPTS TO ADD ANOTHER IN-MEMORY CACHE WILL BE REVIEWED MANUALLY AS PER LEADERBOARD POLICY !! - */ -class WriteBackCache { - public: - WriteBackCache() : write_back_pages_{new Page[8]} {} - ~WriteBackCache() { delete[] write_back_pages_; } - DISALLOW_COPY_AND_MOVE(WriteBackCache); - - /** - * @brief Adds a new page to the write back cache. - * @param page the page pointer from the bpm that is about to be evicted. - * @return pointer to the copied page in the cache, or nullptr if the cache is full. - */ - auto Add(Page *page) -> Page * { - if ((page == nullptr) || IsFull()) { - return nullptr; - } - - uint32_t slot = FindFreeSlot(); - memcpy(write_back_pages_[slot].GetData(), page->GetData(), BUSTUB_PAGE_SIZE); - MarkSlotUsed(slot); - - return write_back_pages_ + slot; - } - - /** - * @brief Removes a page from the write back cache. - * @param page the pointer previously returned by Add. - */ - auto Remove(Page *page) -> void { - if (page != nullptr) { - MarkSlotFree(page - write_back_pages_); - } - } - - private: - /** @brief Whether the cache is full. */ - auto IsFull() -> bool { return free_slot_bitmap_ == 0xFFU; } - - /** @brief Finds a free slot in the cache, if not full. */ - auto FindFreeSlot() -> uint32_t { - BUSTUB_ASSERT(!IsFull(), "no free slot in write back cache"); - uint32_t i = 0; - uint8_t bitmap = free_slot_bitmap_; - while ((bitmap & 1U) != 0) { - bitmap >>= 1; - i++; - } - return i; - } - - /** @brief Marks a free slot as used. */ - void MarkSlotUsed(uint32_t slot) { - BUSTUB_ASSERT(((free_slot_bitmap_ >> slot) & 1U) == 0, "slot has already been used"); - free_slot_bitmap_ |= (1U << slot); - } - - /** @brief Marks a used slot as free. */ - void MarkSlotFree(uint32_t slot) { - BUSTUB_ASSERT(((free_slot_bitmap_ >> slot) & 1U) == 1, "slot is already free"); - free_slot_bitmap_ &= ~(1U << slot); - } - - /** The array of write back cache pages. */ - Page *write_back_pages_; - /** The bitmap that records which slots are free. */ - uint8_t free_slot_bitmap_{0}; -}; - -} // namespace bustub diff --git a/src/include/storage/index/b_plus_tree.h b/src/include/storage/index/b_plus_tree.h index 09e2cbd..bf1a3b8 100644 --- a/src/include/storage/index/b_plus_tree.h +++ b/src/include/storage/index/b_plus_tree.h @@ -1,190 +1,125 @@ -/** - * b_plus_tree.h - * - * Implementation of simple b+ tree data structure where internal pages direct - * the search and leaf pages contain actual data. - * (1) We only support unique key - * (2) support insert & remove - * (3) The structure should shrink and grow dynamically - * (4) Implement index iterator for range scan - */ +//===----------------------------------------------------------------------===// +// +// CMU-DB Project (15-445/645) +// ***DO NO SHARE PUBLICLY*** +// +// Identification: src/include/index/b_plus_tree.h +// +// Copyright (c) 2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// #pragma once -#include -#include -#include -#include #include -#include #include #include -#include "common/config.h" -#include "common/macros.h" #include "concurrency/transaction.h" #include "storage/index/index_iterator.h" -#include "storage/page/b_plus_tree_header_page.h" #include "storage/page/b_plus_tree_internal_page.h" #include "storage/page/b_plus_tree_leaf_page.h" -#include "storage/page/page_guard.h" + +#include "common/rwlatch.h" namespace bustub { -struct PrintableBPlusTree; +#define BPLUSTREE_TYPE BPlusTree + +enum class Operation { SEARCH, INSERT, DELETE }; /** - * @brief Definition of the Context class. + * Main class providing the API for the Interactive B+ Tree. * - * Hint: This class is designed to help you keep track of the pages - * that you're modifying or accessing. + * Implementation of simple b+ tree data structure where internal pages direct + * the search and leaf pages contain actual data. + * (1) We only support unique key + * (2) support insert & remove + * (3) The structure should shrink and grow dynamically + * (4) Implement index iterator for range scan */ -class Context { - public: - // When you insert into / remove from the B+ tree, store the write guard of header page here. - // Remember to drop the header page guard and set it to nullopt when you want to unlock all. - std::optional header_page_{std::nullopt}; - - // Save the root page id here so that it's easier to know if the current page is the root page. - page_id_t root_page_id_{INVALID_PAGE_ID}; - - // Store the write guards of the pages that you're modifying here. - std::deque write_set_; - - // You may want to use this when getting value, but not necessary. - std::deque read_set_; - - auto IsRootPage(page_id_t page_id) -> bool { return page_id == root_page_id_; } -}; - -#define BPLUSTREE_TYPE BPlusTree - -// Main class providing the API for the Interactive B+ Tree. INDEX_TEMPLATE_ARGUMENTS class BPlusTree { using InternalPage = BPlusTreeInternalPage; using LeafPage = BPlusTreeLeafPage; public: - explicit BPlusTree(std::string name, page_id_t header_page_id, BufferPoolManager *buffer_pool_manager, - const KeyComparator &comparator, int leaf_max_size = LEAF_PAGE_SIZE, - int internal_max_size = INTERNAL_PAGE_SIZE); + explicit BPlusTree(std::string name, BufferPoolManager *buffer_pool_manager, const KeyComparator &comparator, + int leaf_max_size = LEAF_PAGE_SIZE, int internal_max_size = INTERNAL_PAGE_SIZE); // Returns true if this B+ tree has no keys and values. auto IsEmpty() const -> bool; // Insert a key-value pair into this B+ tree. - auto Insert(const KeyType &key, const ValueType &value, Transaction *txn = nullptr) -> bool; + auto Insert(const KeyType &key, const ValueType &value, Transaction *transaction = nullptr) -> bool; // Remove a key and its value from this B+ tree. - void Remove(const KeyType &key, Transaction *txn); + void Remove(const KeyType &key, Transaction *transaction = nullptr); - // Return the value associated with a given key - auto GetValue(const KeyType &key, std::vector *result, Transaction *txn = nullptr) -> bool; + // return the value associated with a given key + auto GetValue(const KeyType &key, std::vector *result, Transaction *transaction = nullptr) -> bool; - // Return the page id of the root node + // return the page id of the root node auto GetRootPageId() -> page_id_t; - // Index iterator + // index iterator auto Begin() -> INDEXITERATOR_TYPE; - - auto End() -> INDEXITERATOR_TYPE; - auto Begin(const KeyType &key) -> INDEXITERATOR_TYPE; + auto End() -> INDEXITERATOR_TYPE; - // Print the B+ tree + // print the B+ tree void Print(BufferPoolManager *bpm); - // Draw the B+ tree + // draw the B+ tree void Draw(BufferPoolManager *bpm, const std::string &outf); - /** - * @brief draw a B+ tree, below is a printed - * B+ tree(3 max leaf, 4 max internal) after inserting key: - * {1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 18, 19, 20} - * - * (25) - * (9,17,19) (33) - * (1,5) (9,13) (17,18) (19,20,21) (25,29) (33,37) - * - * @return std::string - */ - auto DrawBPlusTree() -> std::string; - // read data from file and insert one by one - void InsertFromFile(const std::string &file_name, Transaction *txn = nullptr); + void InsertFromFile(const std::string &file_name, Transaction *transaction = nullptr); // read data from file and remove one by one - void RemoveFromFile(const std::string &file_name, Transaction *txn = nullptr); - - /** - * @brief Read batch operations from input file, below is a sample file format - * insert some keys and delete 8, 9 from the tree with one step. - * { i1 i2 i3 i4 i5 i6 i7 i8 i9 i10 i30 d8 d9 } // batch.txt - * B+ Tree(4 max leaf, 4 max internal) after processing: - * (5) - * (3) (7) - * (1,2) (3,4) (5,6) (7,10,30) // The output tree example - */ - void BatchOpsFromFile(const std::string &file_name, Transaction *txn = nullptr); + void RemoveFromFile(const std::string &file_name, Transaction *transaction = nullptr); + + auto FindLeaf(const KeyType &key, Operation operation, Transaction *transaction = nullptr, bool leftMost = false, + bool rightMost = false) -> Page *; + void ReleaseLatchFromQueue(Transaction *transaction); private: + void UpdateRootPageId(int insert_record = 0); + /* Debug Routines for FREE!! */ - void ToGraph(page_id_t page_id, const BPlusTreePage *page, std::ofstream &out); + void ToGraph(BPlusTreePage *page, BufferPoolManager *bpm, std::ofstream &out) const; + + void ToString(BPlusTreePage *page, BufferPoolManager *bpm) const; + + void StartNewTree(const KeyType &key, const ValueType &value); + + auto InsertIntoLeaf(const KeyType &key, const ValueType &value, Transaction *transaction = nullptr) -> bool; + + void InsertIntoParent(BPlusTreePage *old_node, const KeyType &key, BPlusTreePage *new_node, + Transaction *transaction = nullptr); + + template + auto Split(N *node) -> N *; - void PrintTree(page_id_t page_id, const BPlusTreePage *page); + template + auto CoalesceOrRedistribute(N *node, Transaction *transaction = nullptr) -> bool; - /** - * @brief Convert A B+ tree into a Printable B+ tree - * - * @param root_id - * @return PrintableNode - */ - auto ToPrintableBPlusTree(page_id_t root_id) -> PrintableBPlusTree; + template + auto Coalesce(N *neighbor_node, N *node, BPlusTreeInternalPage *parent, int index, + Transaction *transaction = nullptr) -> bool; + template + void Redistribute(N *neighbor_node, N *node, BPlusTreeInternalPage *parent, + int index, bool from_prev); + + auto AdjustRoot(BPlusTreePage *node) -> bool; // member variable std::string index_name_; - BufferPoolManager *bpm_; + page_id_t root_page_id_; + BufferPoolManager *buffer_pool_manager_; KeyComparator comparator_; - std::vector log; // NOLINT int leaf_max_size_; int internal_max_size_; - page_id_t header_page_id_; -}; - -/** - * @brief for test only. PrintableBPlusTree is a printable B+ tree. - * We first convert B+ tree into a printable B+ tree and the print it. - */ -struct PrintableBPlusTree { - int size_; - std::string keys_; - std::vector children_; - - /** - * @brief BFS traverse a printable B+ tree and print it into - * into out_buf - * - * @param out_buf - */ - void Print(std::ostream &out_buf) { - std::vector que = {this}; - while (!que.empty()) { - std::vector new_que; - - for (auto &t : que) { - int padding = (t->size_ - t->keys_.size()) / 2; - out_buf << std::string(padding, ' '); - out_buf << t->keys_; - out_buf << std::string(padding, ' '); - - for (auto &c : t->children_) { - new_que.push_back(&c); - } - } - out_buf << "\n"; - que = new_que; - } - } + ReaderWriterLatch root_page_id_latch_; }; } // namespace bustub diff --git a/src/include/storage/index/b_plus_tree_index.h b/src/include/storage/index/b_plus_tree_index.h index fefcb7d..010dde3 100644 --- a/src/include/storage/index/b_plus_tree_index.h +++ b/src/include/storage/index/b_plus_tree_index.h @@ -29,7 +29,7 @@ class BPlusTreeIndex : public Index { public: BPlusTreeIndex(std::unique_ptr &&metadata, BufferPoolManager *buffer_pool_manager); - auto InsertEntry(const Tuple &key, RID rid, Transaction *transaction) -> bool override; + void InsertEntry(const Tuple &key, RID rid, Transaction *transaction) override; void DeleteEntry(const Tuple &key, RID rid, Transaction *transaction) override; @@ -45,19 +45,18 @@ class BPlusTreeIndex : public Index { // comparator for key KeyComparator comparator_; // container - std::shared_ptr> container_; + BPlusTree container_; }; /** We only support index table with one integer key for now in BusTub. Hardcode everything here. */ -constexpr static const auto TWO_INTEGER_SIZE_B_TREE = 8; -using IntegerKeyType_BTree = GenericKey; -using IntegerValueType_BTree = RID; -using IntegerComparatorType_BTree = GenericComparator; -using BPlusTreeIndexForTwoIntegerColumn = - BPlusTreeIndex; -using BPlusTreeIndexIteratorForTwoIntegerColumn = - IndexIterator; -using IntegerHashFunctionType = HashFunction; +constexpr static const auto INTEGER_SIZE = 4; +using IntegerKeyType = GenericKey; +using IntegerValueType = RID; +using IntegerComparatorType = GenericComparator; +using BPlusTreeIndexForOneIntegerColumn = BPlusTreeIndex; +using BPlusTreeIndexIteratorForOneIntegerColumn = + IndexIterator; +using IntegerHashFunctionType = HashFunction; } // namespace bustub diff --git a/src/include/storage/index/extendible_hash_table_index.h b/src/include/storage/index/extendible_hash_table_index.h index 4fd736b..b2e3515 100644 --- a/src/include/storage/index/extendible_hash_table_index.h +++ b/src/include/storage/index/extendible_hash_table_index.h @@ -33,7 +33,7 @@ class ExtendibleHashTableIndex : public Index { ~ExtendibleHashTableIndex() override = default; - auto InsertEntry(const Tuple &key, RID rid, Transaction *transaction) -> bool override; + void InsertEntry(const Tuple &key, RID rid, Transaction *transaction) override; void DeleteEntry(const Tuple &key, RID rid, Transaction *transaction) override; @@ -46,11 +46,4 @@ class ExtendibleHashTableIndex : public Index { DiskExtendibleHashTable container_; }; -constexpr static const auto TWO_INTEGER_SIZE = 8; -using IntegerKeyType = GenericKey; -using IntegerValueType = RID; -using IntegerComparatorType = GenericComparator; -using HashTableIndexForTwoIntegerColumn = - ExtendibleHashTableIndex; - } // namespace bustub diff --git a/src/include/storage/index/generic_key.h b/src/include/storage/index/generic_key.h index a20f5bb..497814d 100644 --- a/src/include/storage/index/generic_key.h +++ b/src/include/storage/index/generic_key.h @@ -30,7 +30,7 @@ template class GenericKey { public: inline void SetFromKey(const Tuple &tuple) { - // initialize to 0 + // intialize to 0 memset(data_, 0, KeySize); memcpy(data_, tuple.GetData(), tuple.GetLength()); } diff --git a/src/include/storage/index/index.h b/src/include/storage/index/index.h index 60277d0..1d7676c 100644 --- a/src/include/storage/index/index.h +++ b/src/include/storage/index/index.h @@ -45,11 +45,8 @@ class IndexMetadata { * @param key_attrs The mapping from indexed columns to base table columns */ IndexMetadata(std::string index_name, std::string table_name, const Schema *tuple_schema, - std::vector key_attrs, bool is_primary_key) - : name_(std::move(index_name)), - table_name_(std::move(table_name)), - key_attrs_(std::move(key_attrs)), - is_primary_key_(is_primary_key) { + std::vector key_attrs) + : name_(std::move(index_name)), table_name_(std::move(table_name)), key_attrs_(std::move(key_attrs)) { key_schema_ = std::make_shared(Schema::CopySchema(tuple_schema, key_attrs_)); } @@ -75,9 +72,6 @@ class IndexMetadata { /** @return The mapping relation between indexed columns and base table columns */ inline auto GetKeyAttrs() const -> const std::vector & { return key_attrs_; } - /** @return is primary key */ - inline auto IsPrimaryKey() const -> bool { return is_primary_key_; } - /** @return A string representation for debugging */ auto ToString() const -> std::string { std::stringstream os; @@ -100,8 +94,6 @@ class IndexMetadata { const std::vector key_attrs_; /** The schema of the indexed key */ std::shared_ptr key_schema_; - /** Is primary key? */ - bool is_primary_key_; }; ///////////////////////////////////////////////////////////////////// @@ -126,7 +118,7 @@ class Index { public: /** * Construct a new Index instance. - * @param metadata An owning pointer to the index metadata + * @param metdata An owning pointer to the index metadata */ explicit Index(std::unique_ptr &&metadata) : metadata_{std::move(metadata)} {} @@ -164,9 +156,8 @@ class Index { * @param key The index key * @param rid The RID associated with the key * @param transaction The transaction context - * @returns whether insertion is successful */ - virtual auto InsertEntry(const Tuple &key, RID rid, Transaction *transaction) -> bool = 0; + virtual void InsertEntry(const Tuple &key, RID rid, Transaction *transaction) = 0; /** * Delete an index entry by key. @@ -184,7 +175,7 @@ class Index { */ virtual void ScanKey(const Tuple &key, std::vector *result, Transaction *transaction) = 0; - protected: + private: /** The Index structure owns its metadata */ std::unique_ptr metadata_; }; diff --git a/src/include/storage/index/index_iterator.h b/src/include/storage/index/index_iterator.h index 7618823..5ef80d6 100644 --- a/src/include/storage/index/index_iterator.h +++ b/src/include/storage/index/index_iterator.h @@ -22,9 +22,11 @@ namespace bustub { INDEX_TEMPLATE_ARGUMENTS class IndexIterator { public: + using LeafPage = BPlusTreeLeafPage; + // you may define your own constructor based on your member variables - IndexIterator(); - ~IndexIterator(); // NOLINT + IndexIterator(BufferPoolManager *bpm, Page *page, int index = 0); + ~IndexIterator(); auto IsEnd() -> bool; @@ -32,12 +34,16 @@ class IndexIterator { auto operator++() -> IndexIterator &; - auto operator==(const IndexIterator &itr) const -> bool { throw std::runtime_error("unimplemented"); } + auto operator==(const IndexIterator &itr) const -> bool; - auto operator!=(const IndexIterator &itr) const -> bool { throw std::runtime_error("unimplemented"); } + auto operator!=(const IndexIterator &itr) const -> bool; private: // add your own private member variables here + BufferPoolManager *buffer_pool_manager_; + Page *page_; + LeafPage *leaf_ = nullptr; + int index_ = 0; }; } // namespace bustub diff --git a/src/include/storage/index/linear_probe_hash_table_index.h b/src/include/storage/index/linear_probe_hash_table_index.h index 330d8b6..4be36ff 100644 --- a/src/include/storage/index/linear_probe_hash_table_index.h +++ b/src/include/storage/index/linear_probe_hash_table_index.h @@ -33,7 +33,7 @@ class LinearProbeHashTableIndex : public Index { ~LinearProbeHashTableIndex() override = default; - auto InsertEntry(const Tuple &key, RID rid, Transaction *transaction) -> bool override; + void InsertEntry(const Tuple &key, RID rid, Transaction *transaction) override; void DeleteEntry(const Tuple &key, RID rid, Transaction *transaction) override; diff --git a/src/include/storage/index/stl_equal_wrapper.h b/src/include/storage/index/stl_equal_wrapper.h deleted file mode 100644 index 74badf1..0000000 --- a/src/include/storage/index/stl_equal_wrapper.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -namespace bustub { - -template -class StlEqualWrapper { - public: - explicit StlEqualWrapper(const KC &cmp) : cmp_{cmp} {} - - inline auto operator()(const K &lhs, const K &rhs) const -> bool { return cmp_(lhs, rhs) == 0; } - - KC cmp_; -}; - -} // namespace bustub diff --git a/src/include/storage/index/stl_hasher_wrapper.h b/src/include/storage/index/stl_hasher_wrapper.h deleted file mode 100644 index 881bf87..0000000 --- a/src/include/storage/index/stl_hasher_wrapper.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include "container/hash/hash_function.h" - -namespace bustub { - -template -class StlHasherWrapper { - public: - explicit StlHasherWrapper(const HashFunction &hash_fn) : hash_fn_{hash_fn} {} - - inline auto operator()(const K &key) const -> std::size_t { return hash_fn_.GetHash(key); } - - HashFunction hash_fn_; -}; - -} // namespace bustub diff --git a/src/include/storage/index/stl_ordered.h b/src/include/storage/index/stl_ordered.h deleted file mode 100644 index 5260368..0000000 --- a/src/include/storage/index/stl_ordered.h +++ /dev/null @@ -1,102 +0,0 @@ -#pragma once - -#include -#include -#include // NOLINT -#include -#include -#include - -#include "common/rid.h" -#include "container/hash/hash_function.h" -#include "storage/index/b_plus_tree.h" -#include "storage/index/index.h" -#include "storage/index/stl_comparator_wrapper.h" - -namespace bustub { - -template -class STLOrderedIndexIterator { - public: - STLOrderedIndexIterator(const std::map> *map, - typename std::map>::const_iterator iter) - : map_(map), iter_(std::move(iter)) {} - - ~STLOrderedIndexIterator() = default; - - auto IsEnd() -> bool { return iter_ == map_->cend(); } - - auto operator*() -> const std::pair & { - ret_val_ = *iter_; - return ret_val_; - } - - auto operator++() -> STLOrderedIndexIterator & { - iter_++; - return *this; - } - - inline auto operator==(const STLOrderedIndexIterator &itr) const -> bool { return itr.iter_ == iter_; } - - inline auto operator!=(const STLOrderedIndexIterator &itr) const -> bool { return !(*this == itr); } - - private: - const std::map> *map_; - typename std::map>::const_iterator iter_; - std::pair ret_val_; -}; - -template -class STLOrderedIndex : public Index { - public: - STLOrderedIndex(std::unique_ptr &&metadata, BufferPoolManager *buffer_pool_manager) - : Index(std::move(metadata)), - comparator_(StlComparatorWrapper(Cmp(metadata_->GetKeySchema()))), - data_(comparator_) {} - - auto InsertEntry(const Tuple &key, VT rid, Transaction *transaction) -> bool override { - KT index_key; - index_key.SetFromKey(key); - std::scoped_lock lck(lock_); - if (data_.count(index_key) == 1) { - return false; - } - data_.emplace(index_key, rid); - return true; - } - - void DeleteEntry(const Tuple &key, VT rid, Transaction *transaction) override { - KT index_key; - index_key.SetFromKey(key); - std::scoped_lock lck(lock_); - data_.erase(index_key); - } - - void ScanKey(const Tuple &key, std::vector *result, Transaction *transaction) override { - KT index_key; - index_key.SetFromKey(key); - std::scoped_lock lck(lock_); - if (data_.count(index_key) == 1) { - *result = std::vector{data_[index_key]}; - return; - } - *result = {}; - } - - auto GetBeginIterator() -> STLOrderedIndexIterator { return {&data_, data_.cbegin()}; } - - auto GetBeginIterator(const KT &key) -> STLOrderedIndexIterator { - return {&data_, data_.lower_bound(key)}; - } - - auto GetEndIterator() -> STLOrderedIndexIterator { return {&data_, data_.cend()}; } - - protected: - std::mutex lock_; - StlComparatorWrapper comparator_; - std::map> data_; -}; - -using STLOrderedIndexForTwoIntegerColumn = STLOrderedIndex, RID, GenericComparator<8>>; - -} // namespace bustub diff --git a/src/include/storage/index/stl_unordered.h b/src/include/storage/index/stl_unordered.h deleted file mode 100644 index fee68f3..0000000 --- a/src/include/storage/index/stl_unordered.h +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include -#include -#include // NOLINT -#include -#include -#include -#include - -#include "common/rid.h" -#include "container/hash/hash_function.h" -#include "storage/index/b_plus_tree.h" -#include "storage/index/index.h" -#include "storage/index/stl_comparator_wrapper.h" -#include "storage/index/stl_equal_wrapper.h" -#include "storage/index/stl_hasher_wrapper.h" - -namespace bustub { - -template -class STLUnorderedIndex : public Index { - public: - STLUnorderedIndex(std::unique_ptr &&metadata, BufferPoolManager *buffer_pool_manager, - const HashFunction &hash_fn) - : Index(std::move(metadata)), - comparator_(StlComparatorWrapper(Cmp(metadata_->GetKeySchema()))), - hash_fn_(StlHasherWrapper(hash_fn)), - eq_(StlEqualWrapper(Cmp(metadata_->GetKeySchema()))), - data_(0, hash_fn_, eq_) {} - - auto InsertEntry(const Tuple &key, VT rid, Transaction *transaction) -> bool override { - KT index_key; - index_key.SetFromKey(key); - std::scoped_lock lck(lock_); - if (data_.find(index_key) != data_.end()) { - return false; - } - data_.emplace(index_key, rid); - return true; - } - - void DeleteEntry(const Tuple &key, VT rid, Transaction *transaction) override { - KT index_key; - index_key.SetFromKey(key); - std::scoped_lock lck(lock_); - if (auto it = data_.find(index_key); it != data_.end()) { - data_.erase(it); - return; - } - } - - void ScanKey(const Tuple &key, std::vector *result, Transaction *transaction) override { - KT index_key; - index_key.SetFromKey(key); - std::scoped_lock lck(lock_); - if (auto it = data_.find(index_key); it != data_.end()) { - *result = std::vector{it->second}; - return; - } - *result = {}; - } - - protected: - std::mutex lock_; - StlComparatorWrapper comparator_; - StlHasherWrapper hash_fn_; - StlEqualWrapper eq_; - std::unordered_map, StlEqualWrapper> data_; -}; - -using STLUnorderedIndexForTwoIntegerColumn = STLUnorderedIndex, RID, GenericComparator<8>>; - -} // namespace bustub diff --git a/src/include/storage/page/b_plus_tree_header_page.h b/src/include/storage/page/b_plus_tree_header_page.h deleted file mode 100644 index 67e59c2..0000000 --- a/src/include/storage/page/b_plus_tree_header_page.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include "common/config.h" - -namespace bustub { - -/** - * The header page is just used to retrieve the root page, - * preventing potential race condition under concurrent environment. - */ -class BPlusTreeHeaderPage { - public: - // Delete all constructor / destructor to ensure memory safety - BPlusTreeHeaderPage() = delete; - BPlusTreeHeaderPage(const BPlusTreeHeaderPage &other) = delete; - - page_id_t root_page_id_; -}; - -} // namespace bustub diff --git a/src/include/storage/page/b_plus_tree_internal_page.h b/src/include/storage/page/b_plus_tree_internal_page.h index 29639b9..114a240 100644 --- a/src/include/storage/page/b_plus_tree_internal_page.h +++ b/src/include/storage/page/b_plus_tree_internal_page.h @@ -11,96 +11,57 @@ #pragma once #include -#include #include "storage/page/b_plus_tree_page.h" namespace bustub { #define B_PLUS_TREE_INTERNAL_PAGE_TYPE BPlusTreeInternalPage -#define INTERNAL_PAGE_HEADER_SIZE 12 +#define INTERNAL_PAGE_HEADER_SIZE 24 #define INTERNAL_PAGE_SIZE ((BUSTUB_PAGE_SIZE - INTERNAL_PAGE_HEADER_SIZE) / (sizeof(MappingType))) - /** - * Store `n` indexed keys and `n + 1` child pointers (page_id) within internal page. + * Store n indexed keys and n+1 child pointers (page_id) within internal page. * Pointer PAGE_ID(i) points to a subtree in which all keys K satisfy: * K(i) <= K < K(i+1). - * NOTE: Since the number of keys does not equal to number of child pointers, - * the first key always remains invalid. That is to say, any search / lookup + * NOTE: since the number of keys does not equal to number of child pointers, + * the first key always remains invalid. That is to say, any search/lookup * should ignore the first key. * * Internal page format (keys are stored in increasing order): - * ---------------------------------------------------------------------------------- - * | HEADER | KEY(1) + PAGE_ID(1) | KEY(2) + PAGE_ID(2) | ... | KEY(n) + PAGE_ID(n) | - * ---------------------------------------------------------------------------------- + * -------------------------------------------------------------------------- + * | HEADER | KEY(1)+PAGE_ID(1) | KEY(2)+PAGE_ID(2) | ... | KEY(n)+PAGE_ID(n) | + * -------------------------------------------------------------------------- */ INDEX_TEMPLATE_ARGUMENTS class BPlusTreeInternalPage : public BPlusTreePage { public: - // Delete all constructor / destructor to ensure memory safety - BPlusTreeInternalPage() = delete; - BPlusTreeInternalPage(const BPlusTreeInternalPage &other) = delete; - - /** - * Writes the necessary header information to a newly created page, must be called after - * the creation of a new page to make a valid `BPlusTreeInternalPage` - * @param max_size Maximal size of the page - */ - void Init(int max_size = INTERNAL_PAGE_SIZE); + // must call initialize method after "create" a new node + void Init(page_id_t page_id, page_id_t parent_id = INVALID_PAGE_ID, int max_size = INTERNAL_PAGE_SIZE); - /** - * @param index The index of the key to get. Index must be non-zero. - * @return Key at index - */ auto KeyAt(int index) const -> KeyType; - - /** - * @param index The index of the key to set. Index must be non-zero. - * @param key The new value for key - */ void SetKeyAt(int index, const KeyType &key); - - /** - * @param value The value to search for - * @return The index that corresponds to the specified value - */ - auto ValueIndex(const ValueType &value) const -> int; - - /** - * @param index The index to search for - * @return The value at the index - */ auto ValueAt(int index) const -> ValueType; + void SetValueAt(int index, const ValueType &value); + auto ValueIndex(const ValueType &value) const -> int; - /** - * @brief For test only, return a string representing all keys in - * this internal page, formatted as "(key1,key2,key3,...)" - * - * @return The string representation of all keys in the current internal page - */ - auto ToString() const -> std::string { - std::string kstr = "("; - bool first = true; - - // First key of internal page is always invalid - for (int i = 1; i < GetSize(); i++) { - KeyType key = KeyAt(i); - if (first) { - first = false; - } else { - kstr.append(","); - } - - kstr.append(std::to_string(key.ToString())); - } - kstr.append(")"); + auto Lookup(const KeyType &key, const KeyComparator &comparator) const -> ValueType; + void PopulateNewRoot(const ValueType &old_value, const KeyType &new_key, const ValueType &new_value); + auto InsertNodeAfter(const ValueType &old_value, const KeyType &new_key, const ValueType &new_value) -> int; + void Remove(int index); + auto RemoveAndReturnOnlyChild() -> ValueType; - return kstr; - } + void MoveAllTo(BPlusTreeInternalPage *recipient, const KeyType &middle_key, BufferPoolManager *buffer_pool_manager); + void MoveHalfTo(BPlusTreeInternalPage *recipient, BufferPoolManager *buffer_pool_manager); + void MoveFirstToEndOf(BPlusTreeInternalPage *recipient, const KeyType &middle_key, + BufferPoolManager *buffer_pool_manager); + void MoveLastToFrontOf(BPlusTreeInternalPage *recipient, const KeyType &middle_key, + BufferPoolManager *buffer_pool_manager); private: // Flexible array member for page data. - MappingType array_[0]; + MappingType array_[1]; + void CopyNFrom(MappingType *items, int size, BufferPoolManager *buffer_pool_manager); + void CopyLastFrom(const MappingType &pair, BufferPoolManager *buffer_pool_manager); + void CopyFirstFrom(const MappingType &pair, BufferPoolManager *buffer_pool_manager); }; - } // namespace bustub diff --git a/src/include/storage/page/b_plus_tree_leaf_page.h b/src/include/storage/page/b_plus_tree_leaf_page.h index d952647..2ae82f7 100644 --- a/src/include/storage/page/b_plus_tree_leaf_page.h +++ b/src/include/storage/page/b_plus_tree_leaf_page.h @@ -10,7 +10,6 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include @@ -19,72 +18,54 @@ namespace bustub { #define B_PLUS_TREE_LEAF_PAGE_TYPE BPlusTreeLeafPage -#define LEAF_PAGE_HEADER_SIZE 16 +#define LEAF_PAGE_HEADER_SIZE 28 #define LEAF_PAGE_SIZE ((BUSTUB_PAGE_SIZE - LEAF_PAGE_HEADER_SIZE) / sizeof(MappingType)) /** - * Store indexed key and record id (record id = page id combined with slot id, - * see `include/common/rid.h` for detailed implementation) together within leaf + * Store indexed key and record id(record id = page id combined with slot id, + * see include/common/rid.h for detailed implementation) together within leaf * page. Only support unique key. * * Leaf page format (keys are stored in order): - * ----------------------------------------------------------------------- - * | HEADER | KEY(1) + RID(1) | KEY(2) + RID(2) | ... | KEY(n) + RID(n) | - * ----------------------------------------------------------------------- + * ---------------------------------------------------------------------- + * | HEADER | KEY(1) + RID(1) | KEY(2) + RID(2) | ... | KEY(n) + RID(n) + * ---------------------------------------------------------------------- * - * Header format (size in byte, 16 bytes in total): - * ----------------------------------------------------------------------- - * | PageType (4) | CurrentSize (4) | MaxSize (4) | NextPageId (4) | ... | - * ----------------------------------------------------------------------- + * Header format (size in byte, 28 bytes in total): + * --------------------------------------------------------------------- + * | PageType (4) | LSN (4) | CurrentSize (4) | MaxSize (4) | + * --------------------------------------------------------------------- + * ----------------------------------------------- + * | ParentPageId (4) | PageId (4) | NextPageId (4) + * ----------------------------------------------- */ INDEX_TEMPLATE_ARGUMENTS class BPlusTreeLeafPage : public BPlusTreePage { public: - // Delete all constructor / destructor to ensure memory safety - BPlusTreeLeafPage() = delete; - BPlusTreeLeafPage(const BPlusTreeLeafPage &other) = delete; - - /** - * After creating a new leaf page from buffer pool, must call initialize - * method to set default values - * @param max_size Max size of the leaf node - */ - void Init(int max_size = LEAF_PAGE_SIZE); - - // Helper methods + // After creating a new leaf page from buffer pool, must call initialize + // method to set default values + void Init(page_id_t page_id, page_id_t parent_id = INVALID_PAGE_ID, int max_size = LEAF_PAGE_SIZE); + // helper methods auto GetNextPageId() const -> page_id_t; void SetNextPageId(page_id_t next_page_id); auto KeyAt(int index) const -> KeyType; + auto GetItem(int index) -> const MappingType &; + auto KeyIndex(const KeyType &key, const KeyComparator &comparator) const -> int; + auto Insert(const KeyType &key, const ValueType &value, const KeyComparator &keyComparator) -> int; + auto Lookup(const KeyType &key, ValueType *value, const KeyComparator &keyComparator) const -> bool; + auto RemoveAndDeleteRecord(const KeyType &key, const KeyComparator &keyComparator) -> int; - /** - * @brief For test only return a string representing all keys in - * this leaf page formatted as "(key1,key2,key3,...)" - * - * @return The string representation of all keys in the current internal page - */ - auto ToString() const -> std::string { - std::string kstr = "("; - bool first = true; - - for (int i = 0; i < GetSize(); i++) { - KeyType key = KeyAt(i); - if (first) { - first = false; - } else { - kstr.append(","); - } - - kstr.append(std::to_string(key.ToString())); - } - kstr.append(")"); - - return kstr; - } + void MoveHalfTo(BPlusTreeLeafPage *recipient); + void MoveAllTo(BPlusTreeLeafPage *recipient); + void MoveFirstToEndOf(BPlusTreeLeafPage *recipient); + void MoveLastToFrontOf(BPlusTreeLeafPage *recipient); private: page_id_t next_page_id_; // Flexible array member for page data. - MappingType array_[0]; + MappingType array_[1]; + void CopyNFrom(MappingType *items, int size); + void CopyLastFrom(const MappingType &item); + void CopyFirstFrom(const MappingType &item); }; - } // namespace bustub diff --git a/src/include/storage/page/b_plus_tree_page.h b/src/include/storage/page/b_plus_tree_page.h index 884e2e6..8119d01 100644 --- a/src/include/storage/page/b_plus_tree_page.h +++ b/src/include/storage/page/b_plus_tree_page.h @@ -33,20 +33,19 @@ enum class IndexPageType { INVALID_INDEX_PAGE = 0, LEAF_PAGE, INTERNAL_PAGE }; * It actually serves as a header part for each B+ tree page and * contains information shared by both leaf page and internal page. * - * Header format (size in byte, 12 bytes in total): - * --------------------------------------------------------- - * | PageType (4) | CurrentSize (4) | MaxSize (4) | ... | - * --------------------------------------------------------- + * Header format (size in byte, 24 bytes in total): + * ---------------------------------------------------------------------------- + * | PageType (4) | LSN (4) | CurrentSize (4) | MaxSize (4) | + * ---------------------------------------------------------------------------- + * | ParentPageId (4) | PageId(4) | + * ---------------------------------------------------------------------------- */ class BPlusTreePage { public: - // Delete all constructor / destructor to ensure memory safety - BPlusTreePage() = delete; - BPlusTreePage(const BPlusTreePage &other) = delete; - ~BPlusTreePage() = delete; - auto IsLeafPage() const -> bool; + auto IsRootPage() const -> bool; void SetPageType(IndexPageType page_type); + auto GetPageType() -> IndexPageType; auto GetSize() const -> int; void SetSize(int size); @@ -56,11 +55,22 @@ class BPlusTreePage { void SetMaxSize(int max_size); auto GetMinSize() const -> int; + auto GetParentPageId() const -> page_id_t; + void SetParentPageId(page_id_t parent_page_id); + + auto GetPageId() const -> page_id_t; + void SetPageId(page_id_t page_id); + + void SetLSN(lsn_t lsn = INVALID_LSN); + private: - // Member variables, attributes that both internal and leaf page share - IndexPageType page_type_ __attribute__((__unused__)); - int size_ __attribute__((__unused__)); - int max_size_ __attribute__((__unused__)); + // member variable, attributes that both internal and leaf page share + IndexPageType page_type_; + lsn_t lsn_; + int size_; + int max_size_; + page_id_t parent_page_id_; + page_id_t page_id_; }; } // namespace bustub diff --git a/src/include/storage/page/extendible_htable_bucket_page.h b/src/include/storage/page/extendible_htable_bucket_page.h deleted file mode 100644 index 9f034d0..0000000 --- a/src/include/storage/page/extendible_htable_bucket_page.h +++ /dev/null @@ -1,140 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// extendible_htable_bucket_page.h -// -// Identification: src/include/storage/page/extendible_htable_bucket_page.h -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -/** - * Bucket page format: - * ---------------------------------------------------------------------------- - * | METADATA | KEY(1) + VALUE(1) | KEY(2) + VALUE(2) | ... | KEY(n) + VALUE(n) - * ---------------------------------------------------------------------------- - * - * Metadata format (size in byte, 8 bytes in total): - * -------------------------------- - * | CurrentSize (4) | MaxSize (4) - * -------------------------------- - */ -#pragma once - -#include -#include -#include - -#include "common/config.h" -#include "common/macros.h" -#include "storage/index/int_comparator.h" -#include "storage/page/b_plus_tree_page.h" -#include "type/value.h" - -namespace bustub { - -static constexpr uint64_t HTABLE_BUCKET_PAGE_METADATA_SIZE = sizeof(uint32_t) * 2; - -constexpr auto HTableBucketArraySize(uint64_t mapping_type_size) -> uint64_t { - return (BUSTUB_PAGE_SIZE - HTABLE_BUCKET_PAGE_METADATA_SIZE) / mapping_type_size; -}; - -/** - * Bucket page for extendible hash table. - */ -template -class ExtendibleHTableBucketPage { - public: - // Delete all constructor / destructor to ensure memory safety - ExtendibleHTableBucketPage() = delete; - DISALLOW_COPY_AND_MOVE(ExtendibleHTableBucketPage); - - /** - * After creating a new bucket page from buffer pool, must call initialize - * method to set default values - * @param max_size Max size of the bucket array - */ - void Init(uint32_t max_size = HTableBucketArraySize(sizeof(MappingType))); - - /** - * Lookup a key - * - * @param key key to lookup - * @param[out] value value to set - * @param cmp the comparator - * @return true if the key and value are present, false if not found. - */ - auto Lookup(const KeyType &key, ValueType &value, const KeyComparator &cmp) const -> bool; - - /** - * Attempts to insert a key and value in the bucket. - * - * @param key key to insert - * @param value value to insert - * @param cmp the comparator to use - * @return true if inserted, false if bucket is full or the same key is already present - */ - auto Insert(const KeyType &key, const ValueType &value, const KeyComparator &cmp) -> bool; - - /** - * Removes a key and value. - * - * @return true if removed, false if not found - */ - auto Remove(const KeyType &key, const KeyComparator &cmp) -> bool; - - void RemoveAt(uint32_t bucket_idx); - - /** - * @brief Gets the key at an index in the bucket. - * - * @param bucket_idx the index in the bucket to get the key at - * @return key at index bucket_idx of the bucket - */ - auto KeyAt(uint32_t bucket_idx) const -> KeyType; - - /** - * Gets the value at an index in the bucket. - * - * @param bucket_idx the index in the bucket to get the value at - * @return value at index bucket_idx of the bucket - */ - auto ValueAt(uint32_t bucket_idx) const -> ValueType; - - /** - * Gets the entry at an index in the bucket. - * - * @param bucket_idx the index in the bucket to get the entry at - * @return entry at index bucket_idx of the bucket - */ - auto EntryAt(uint32_t bucket_idx) const -> const std::pair &; - - /** - * @return number of entries in the bucket - */ - auto Size() const -> uint32_t; - - /** - * @return whether the bucket is full - */ - auto IsFull() const -> bool; - - /** - * @return whether the bucket is empty - */ - auto IsEmpty() const -> bool; - - /** - * Prints the bucket's occupancy information - */ - void PrintBucket() const; - - private: - uint32_t size_; - uint32_t max_size_; - MappingType array_[HTableBucketArraySize(sizeof(MappingType))]; -}; - -} // namespace bustub diff --git a/src/include/storage/page/extendible_htable_directory_page.h b/src/include/storage/page/extendible_htable_directory_page.h deleted file mode 100644 index ec73601..0000000 --- a/src/include/storage/page/extendible_htable_directory_page.h +++ /dev/null @@ -1,203 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// extendible_htable_directory_page.h -// -// Identification: src/include/storage/page/extendible_htable_directory_page.h -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -/** - * Directory page format: - * -------------------------------------------------------------------------------------- - * | MaxDepth (4) | GlobalDepth (4) | LocalDepths (512) | BucketPageIds(2048) | Free(1528) - * -------------------------------------------------------------------------------------- - */ - -#pragma once - -#include -#include -#include -#include - -#include "common/config.h" -#include "storage/index/generic_key.h" - -namespace bustub { - -static constexpr uint64_t HTABLE_DIRECTORY_PAGE_METADATA_SIZE = sizeof(uint32_t) * 2; - -/** - * HTABLE_DIRECTORY_ARRAY_SIZE is the number of page_ids that can fit in the directory page of an extendible hash index. - * This is 512 because the directory array must grow in powers of 2, and 1024 page_ids leaves zero room for - * storage of the other member variables. - */ -static constexpr uint64_t HTABLE_DIRECTORY_MAX_DEPTH = 9; -static constexpr uint64_t HTABLE_DIRECTORY_ARRAY_SIZE = 1 << HTABLE_DIRECTORY_MAX_DEPTH; - -/** - * Directory Page for extendible hash table. - */ -class ExtendibleHTableDirectoryPage { - public: - // Delete all constructor / destructor to ensure memory safety - ExtendibleHTableDirectoryPage() = delete; - DISALLOW_COPY_AND_MOVE(ExtendibleHTableDirectoryPage); - - /** - * After creating a new directory page from buffer pool, must call initialize - * method to set default values - * @param max_depth Max depth in the directory page - */ - void Init(uint32_t max_depth = HTABLE_DIRECTORY_MAX_DEPTH); - - /** - * Get the bucket index that the key is hashed to - * - * @param hash the hash of the key - * @return bucket index current key is hashed to - */ - auto HashToBucketIndex(uint32_t hash) const -> uint32_t; - - /** - * Lookup a bucket page using a directory index - * - * @param bucket_idx the index in the directory to lookup - * @return bucket page_id corresponding to bucket_idx - */ - auto GetBucketPageId(uint32_t bucket_idx) const -> page_id_t; - - /** - * Updates the directory index using a bucket index and page_id - * - * @param bucket_idx directory index at which to insert page_id - * @param bucket_page_id page_id to insert - */ - void SetBucketPageId(uint32_t bucket_idx, page_id_t bucket_page_id); - - /** - * Gets the split image of an index - * - * @param bucket_idx the directory index for which to find the split image - * @return the directory index of the split image - **/ - auto GetSplitImageIndex(uint32_t bucket_idx) const -> uint32_t; - - /** - * GetGlobalDepthMask - returns a mask of global_depth 1's and the rest 0's. - * - * In Extendible Hashing we map a key to a directory index - * using the following hash + mask function. - * - * DirectoryIndex = Hash(key) & GLOBAL_DEPTH_MASK - * - * where GLOBAL_DEPTH_MASK is a mask with exactly GLOBAL_DEPTH 1's from LSB - * upwards. For example, global depth 3 corresponds to 0x00000007 in a 32-bit - * representation. - * - * @return mask of global_depth 1's and the rest 0's (with 1's from LSB upwards) - */ - auto GetGlobalDepthMask() const -> uint32_t; - - /** - * GetLocalDepthMask - same as global depth mask, except it - * uses the local depth of the bucket located at bucket_idx - * - * @param bucket_idx the index to use for looking up local depth - * @return mask of local 1's and the rest 0's (with 1's from LSB upwards) - */ - auto GetLocalDepthMask(uint32_t bucket_idx) const -> uint32_t; - - /** - * Get the global depth of the hash table directory - * - * @return the global depth of the directory - */ - auto GetGlobalDepth() const -> uint32_t; - - auto GetMaxDepth() const -> uint32_t; - - /** - * Increment the global depth of the directory - */ - void IncrGlobalDepth(); - - /** - * Decrement the global depth of the directory - */ - void DecrGlobalDepth(); - - /** - * @return true if the directory can be shrunk - */ - auto CanShrink() -> bool; - - /** - * @return the current directory size - */ - auto Size() const -> uint32_t; - - /** - * @return the max directory size - */ - auto MaxSize() const -> uint32_t; - - /** - * Gets the local depth of the bucket at bucket_idx - * - * @param bucket_idx the bucket index to lookup - * @return the local depth of the bucket at bucket_idx - */ - auto GetLocalDepth(uint32_t bucket_idx) const -> uint32_t; - - /** - * Set the local depth of the bucket at bucket_idx to local_depth - * - * @param bucket_idx bucket index to update - * @param local_depth new local depth - */ - void SetLocalDepth(uint32_t bucket_idx, uint8_t local_depth); - - /** - * Increment the local depth of the bucket at bucket_idx - * @param bucket_idx bucket index to increment - */ - void IncrLocalDepth(uint32_t bucket_idx); - - /** - * Decrement the local depth of the bucket at bucket_idx - * @param bucket_idx bucket index to decrement - */ - void DecrLocalDepth(uint32_t bucket_idx); - - /** - * VerifyIntegrity - * - * Verify the following invariants: - * (1) All LD <= GD. - * (2) Each bucket has precisely 2^(GD - LD) pointers pointing to it. - * (3) The LD is the same at each index with the same bucket_page_id - */ - void VerifyIntegrity() const; - - /** - * Prints the current directory - */ - void PrintDirectory() const; - - private: - uint32_t max_depth_; - uint32_t global_depth_; - uint8_t local_depths_[HTABLE_DIRECTORY_ARRAY_SIZE]; - page_id_t bucket_page_ids_[HTABLE_DIRECTORY_ARRAY_SIZE]; -}; - -static_assert(sizeof(page_id_t) == 4); - -static_assert(sizeof(ExtendibleHTableDirectoryPage) <= BUSTUB_PAGE_SIZE); - -} // namespace bustub diff --git a/src/include/storage/page/extendible_htable_header_page.h b/src/include/storage/page/extendible_htable_header_page.h deleted file mode 100644 index bdecafc..0000000 --- a/src/include/storage/page/extendible_htable_header_page.h +++ /dev/null @@ -1,88 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// extendible_htable_header_page.h -// -// Identification: src/include/storage/page/extendible_htable_header_page.h -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -/** - * Header page format: - * --------------------------------------------------- - * | DirectoryPageIds(2048) | MaxDepth (4) | Free(2044) - * --------------------------------------------------- - */ - -#pragma once - -#include -#include "common/config.h" -#include "common/macros.h" - -namespace bustub { - -static constexpr uint64_t HTABLE_HEADER_PAGE_METADATA_SIZE = sizeof(uint32_t); -static constexpr uint64_t HTABLE_HEADER_MAX_DEPTH = 9; -static constexpr uint64_t HTABLE_HEADER_ARRAY_SIZE = 1 << HTABLE_HEADER_MAX_DEPTH; - -class ExtendibleHTableHeaderPage { - public: - // Delete all constructor / destructor to ensure memory safety - ExtendibleHTableHeaderPage() = delete; - DISALLOW_COPY_AND_MOVE(ExtendibleHTableHeaderPage); - - /** - * After creating a new header page from buffer pool, must call initialize - * method to set default values - * @param max_depth Max depth in the header page - */ - void Init(uint32_t max_depth = HTABLE_HEADER_MAX_DEPTH); - - /** - * Get the directory index that the key is hashed to - * - * @param hash the hash of the key - * @return directory index the key is hashed to - */ - auto HashToDirectoryIndex(uint32_t hash) const -> uint32_t; - - /** - * Get the directory page id at an index - * - * @param directory_idx index in the directory page id array - * @return directory page_id at index - */ - auto GetDirectoryPageId(uint32_t directory_idx) const -> uint32_t; - - /** - * @brief Set the directory page id at an index - * - * @param directory_idx index in the directory page id array - * @param directory_page_id page id of the directory - */ - void SetDirectoryPageId(uint32_t directory_idx, page_id_t directory_page_id); - - /** - * @brief Get the maximum number of directory page ids the header page could handle - */ - auto MaxSize() const -> uint32_t; - - /** - * Prints the header's occupancy information - */ - void PrintHeader() const; - - private: - page_id_t directory_page_ids_[HTABLE_HEADER_ARRAY_SIZE]; - uint32_t max_depth_; -}; - -static_assert(sizeof(page_id_t) == 4); - -static_assert(sizeof(ExtendibleHTableHeaderPage) <= BUSTUB_PAGE_SIZE); - -} // namespace bustub diff --git a/src/include/storage/page/hash_table_page_defs.h b/src/include/storage/page/hash_table_page_defs.h index e92cebb..c3d9668 100644 --- a/src/include/storage/page/hash_table_page_defs.h +++ b/src/include/storage/page/hash_table_page_defs.h @@ -14,8 +14,6 @@ #define MappingType std::pair -#define INDEX_TEMPLATE_ARGUMENTS template - /** * Linear Probe Hashing Definitions */ diff --git a/src/include/storage/page/header_page.h b/src/include/storage/page/header_page.h new file mode 100644 index 0000000..b179fcd --- /dev/null +++ b/src/include/storage/page/header_page.h @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// CMU-DB Project (15-445/645) +// ***DO NO SHARE PUBLICLY*** +// +// Identification: src/include/page/header_page.h +// +// Copyright (c) 2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include +#include "storage/page/page.h" + +namespace bustub { + +/** + * Database use the first page (page_id = 0) as header page to store metadata, in + * our case, we will contain information about table/index name (length less than + * 32 bytes) and their corresponding root_id + * + * Format (size in byte): + * ----------------------------------------------------------------- + * | RecordCount (4) | Entry_1 name (32) | Entry_1 root_id (4) | ... | + * ----------------------------------------------------------------- + */ +class HeaderPage : public Page { + public: + void Init() { SetRecordCount(0); } + /** + * Record related + */ + auto InsertRecord(const std::string &name, page_id_t root_id) -> bool; + auto DeleteRecord(const std::string &name) -> bool; + auto UpdateRecord(const std::string &name, page_id_t root_id) -> bool; + + // return root_id if success + auto GetRootId(const std::string &name, page_id_t *root_id) -> bool; + auto GetRecordCount() -> int; + + private: + /** + * helper functions + */ + auto FindRecord(const std::string &name) -> int; + + void SetRecordCount(int record_count); +}; +} // namespace bustub diff --git a/src/include/storage/page/page.h b/src/include/storage/page/page.h index 63009fe..fd6a11f 100644 --- a/src/include/storage/page/page.h +++ b/src/include/storage/page/page.h @@ -27,17 +27,14 @@ namespace bustub { */ class Page { // There is book-keeping information inside the page that should only be relevant to the buffer pool manager. - friend class BufferPoolManager; + friend class BufferPoolManagerInstance; public: /** Constructor. Zeros out the page data. */ - Page() { - data_ = new char[BUSTUB_PAGE_SIZE]; - ResetMemory(); - } + Page() { ResetMemory(); } /** Default destructor. */ - ~Page() { delete[] data_; } + ~Page() = default; /** @return the actual data contained within this page */ inline auto GetData() -> char * { return data_; } @@ -82,9 +79,7 @@ class Page { inline void ResetMemory() { memset(data_, OFFSET_PAGE_START, BUSTUB_PAGE_SIZE); } /** The actual data that is stored within a page. */ - // Usually this should be stored as `char data_[BUSTUB_PAGE_SIZE]{};`. But to enable ASAN to detect page overflow, - // we store it as a ptr. - char *data_; + char data_[BUSTUB_PAGE_SIZE]{}; /** The ID of this page. */ page_id_t page_id_ = INVALID_PAGE_ID; /** The pin count of this page. */ diff --git a/src/include/storage/page/page_guard.h b/src/include/storage/page/page_guard.h deleted file mode 100644 index c7e7d32..0000000 --- a/src/include/storage/page/page_guard.h +++ /dev/null @@ -1,241 +0,0 @@ -#pragma once - -#include "storage/page/page.h" - -namespace bustub { - -class BufferPoolManager; -class ReadPageGuard; -class WritePageGuard; - -class BasicPageGuard { - public: - BasicPageGuard() = default; - - BasicPageGuard(BufferPoolManager *bpm, Page *page) : bpm_(bpm), page_(page) {} - - BasicPageGuard(const BasicPageGuard &) = delete; - auto operator=(const BasicPageGuard &) -> BasicPageGuard & = delete; - - /** TODO(P1): Add implementation - * - * @brief Move constructor for BasicPageGuard - * - * When you call BasicPageGuard(std::move(other_guard)), you - * expect that the new guard will behave exactly like the other - * one. In addition, the old page guard should not be usable. For - * example, it should not be possible to call .Drop() on both page - * guards and have the pin count decrease by 2. - */ - BasicPageGuard(BasicPageGuard &&that) noexcept; - - /** TODO(P1): Add implementation - * - * @brief Drop a page guard - * - * Dropping a page guard should clear all contents - * (so that the page guard is no longer useful), and - * it should tell the BPM that we are done using this page, - * per the specification in the writeup. - */ - void Drop(); - - /** TODO(P1): Add implementation - * - * @brief Move assignment for BasicPageGuard - * - * Similar to a move constructor, except that the move - * assignment assumes that BasicPageGuard already has a page - * being guarded. Think carefully about what should happen when - * a guard replaces its held page with a different one, given - * the purpose of a page guard. - */ - auto operator=(BasicPageGuard &&that) noexcept -> BasicPageGuard &; - - /** TODO(P1): Add implementation - * - * @brief Destructor for BasicPageGuard - * - * When a page guard goes out of scope, it should behave as if - * the page guard was dropped. - */ - ~BasicPageGuard(); - - /** TODO(P1): Add implementation - * - * @brief Upgrade a BasicPageGuard to a ReadPageGuard - * - * The protected page is not evicted from the buffer pool during the upgrade, - * and the basic page guard should be made invalid after calling this function. - * - * @return an upgraded ReadPageGuard - */ - auto UpgradeRead() -> ReadPageGuard; - - /** TODO(P1): Add implementation - * - * @brief Upgrade a BasicPageGuard to a WritePageGuard - * - * The protected page is not evicted from the buffer pool during the upgrade, - * and the basic page guard should be made invalid after calling this function. - * - * @return an upgraded WritePageGuard - */ - auto UpgradeWrite() -> WritePageGuard; - - auto PageId() -> page_id_t { return page_->GetPageId(); } - - auto GetData() -> const char * { return page_->GetData(); } - - template - auto As() -> const T * { - return reinterpret_cast(GetData()); - } - - auto GetDataMut() -> char * { - is_dirty_ = true; - return page_->GetData(); - } - - template - auto AsMut() -> T * { - return reinterpret_cast(GetDataMut()); - } - - private: - friend class ReadPageGuard; - friend class WritePageGuard; - - [[maybe_unused]] BufferPoolManager *bpm_{nullptr}; - Page *page_{nullptr}; - bool is_dirty_{false}; -}; - -class ReadPageGuard { - public: - ReadPageGuard() = default; - ReadPageGuard(BufferPoolManager *bpm, Page *page); - ReadPageGuard(const ReadPageGuard &) = delete; - auto operator=(const ReadPageGuard &) -> ReadPageGuard & = delete; - - /** TODO(P1): Add implementation - * - * @brief Move constructor for ReadPageGuard - * - * Very similar to BasicPageGuard. You want to create - * a ReadPageGuard using another ReadPageGuard. Think - * about if there's any way you can make this easier for yourself... - */ - ReadPageGuard(ReadPageGuard &&that) noexcept; - - /** TODO(P1): Add implementation - * - * @brief Move assignment for ReadPageGuard - * - * Very similar to BasicPageGuard. Given another ReadPageGuard, - * replace the contents of this one with that one. - */ - auto operator=(ReadPageGuard &&that) noexcept -> ReadPageGuard &; - - /** TODO(P1): Add implementation - * - * @brief Drop a ReadPageGuard - * - * ReadPageGuard's Drop should behave similarly to BasicPageGuard, - * except that ReadPageGuard has an additional resource - the latch! - * However, you should think VERY carefully about in which order you - * want to release these resources. - */ - void Drop(); - - /** TODO(P1): Add implementation - * - * @brief Destructor for ReadPageGuard - * - * Just like with BasicPageGuard, this should behave - * as if you were dropping the guard. - */ - ~ReadPageGuard(); - - auto PageId() -> page_id_t { return guard_.PageId(); } - - auto GetData() -> const char * { return guard_.GetData(); } - - template - auto As() -> const T * { - return guard_.As(); - } - - private: - // You may choose to get rid of this and add your own private variables. - BasicPageGuard guard_; -}; - -class WritePageGuard { - public: - WritePageGuard() = default; - WritePageGuard(BufferPoolManager *bpm, Page *page); - WritePageGuard(const WritePageGuard &) = delete; - auto operator=(const WritePageGuard &) -> WritePageGuard & = delete; - - /** TODO(P1): Add implementation - * - * @brief Move constructor for WritePageGuard - * - * Very similar to BasicPageGuard. You want to create - * a WritePageGuard using another WritePageGuard. Think - * about if there's any way you can make this easier for yourself... - */ - WritePageGuard(WritePageGuard &&that) noexcept; - - /** TODO(P1): Add implementation - * - * @brief Move assignment for WritePageGuard - * - * Very similar to BasicPageGuard. Given another WritePageGuard, - * replace the contents of this one with that one. - */ - auto operator=(WritePageGuard &&that) noexcept -> WritePageGuard &; - - /** TODO(P1): Add implementation - * - * @brief Drop a WritePageGuard - * - * WritePageGuard's Drop should behave similarly to BasicPageGuard, - * except that WritePageGuard has an additional resource - the latch! - * However, you should think VERY carefully about in which order you - * want to release these resources. - */ - void Drop(); - - /** TODO(P1): Add implementation - * - * @brief Destructor for WritePageGuard - * - * Just like with BasicPageGuard, this should behave - * as if you were dropping the guard. - */ - ~WritePageGuard(); - - auto PageId() -> page_id_t { return guard_.PageId(); } - - auto GetData() -> const char * { return guard_.GetData(); } - - template - auto As() -> const T * { - return guard_.As(); - } - - auto GetDataMut() -> char * { return guard_.GetDataMut(); } - - template - auto AsMut() -> T * { - return guard_.AsMut(); - } - - private: - // You may choose to get rid of this and add your own private variables. - BasicPageGuard guard_; -}; - -} // namespace bustub diff --git a/src/include/storage/page/table_page.h b/src/include/storage/page/table_page.h index aac012a..2dc4c47 100644 --- a/src/include/storage/page/table_page.h +++ b/src/include/storage/page/table_page.h @@ -13,21 +13,16 @@ #pragma once #include -#include -#include -#include -#include "common/config.h" #include "common/rid.h" #include "concurrency/lock_manager.h" #include "recovery/log_manager.h" #include "storage/page/page.h" -#include "storage/table/table_heap.h" #include "storage/table/tuple.h" -namespace bustub { +static constexpr uint64_t DELETE_MASK = (1U << (8 * sizeof(uint32_t) - 1)); -static constexpr uint64_t TABLE_PAGE_HEADER_SIZE = 8; +namespace bustub { /** * Slotted page format: @@ -39,76 +34,176 @@ static constexpr uint64_t TABLE_PAGE_HEADER_SIZE = 8; * * Header format (size in bytes): * ---------------------------------------------------------------------------- - * | NextPageId (4)| NumTuples(2) | NumDeletedTuples(2) | + * | PageId (4)| LSN (4)| PrevPageId (4)| NextPageId (4)| FreeSpacePointer(4) | * ---------------------------------------------------------------------------- * ---------------------------------------------------------------- - * | Tuple_1 offset+size (4) | Tuple_2 offset+size (4) | ... | + * | TupleCount (4) | Tuple_1 offset (4) | Tuple_1 size (4) | ... | * ---------------------------------------------------------------- * - * Tuple format: - * | meta | data | */ - -class TablePage { +class TablePage : public Page { public: /** * Initialize the TablePage header. + * @param page_id the page ID of this table page + * @param page_size the size of this table page + * @param prev_page_id the previous table page ID + * @param log_manager the log manager in use + * @param txn the transaction that this page is created in */ - void Init(); + void Init(page_id_t page_id, uint32_t page_size, page_id_t prev_page_id, LogManager *log_manager, Transaction *txn); - /** @return number of tuples in this page */ - auto GetNumTuples() const -> uint32_t { return num_tuples_; } + /** @return the page ID of this table page */ + auto GetTablePageId() -> page_id_t { return *reinterpret_cast(GetData()); } + + /** @return the page ID of the previous table page */ + auto GetPrevPageId() -> page_id_t { return *reinterpret_cast(GetData() + OFFSET_PREV_PAGE_ID); } /** @return the page ID of the next table page */ - auto GetNextPageId() const -> page_id_t { return next_page_id_; } + auto GetNextPageId() -> page_id_t { return *reinterpret_cast(GetData() + OFFSET_NEXT_PAGE_ID); } - /** Set the page id of the next page in the table. */ - void SetNextPageId(page_id_t next_page_id) { next_page_id_ = next_page_id; } + /** Set the page id of the previous page in the table. */ + void SetPrevPageId(page_id_t prev_page_id) { + memcpy(GetData() + OFFSET_PREV_PAGE_ID, &prev_page_id, sizeof(page_id_t)); + } - /** Get the next offset to insert, return nullopt if this tuple cannot fit in this page */ - auto GetNextTupleOffset(const TupleMeta &meta, const Tuple &tuple) const -> std::optional; + /** Set the page id of the next page in the table. */ + void SetNextPageId(page_id_t next_page_id) { + memcpy(GetData() + OFFSET_NEXT_PAGE_ID, &next_page_id, sizeof(page_id_t)); + } /** * Insert a tuple into the table. * @param tuple tuple to insert + * @param[out] rid rid of the inserted tuple + * @param txn transaction performing the insert + * @param lock_manager the lock manager + * @param log_manager the log manager * @return true if the insert is successful (i.e. there is enough space) */ - auto InsertTuple(const TupleMeta &meta, const Tuple &tuple) -> std::optional; + auto InsertTuple(const Tuple &tuple, RID *rid, Transaction *txn, LockManager *lock_manager, LogManager *log_manager) + -> bool; + + /** + * Mark a tuple as deleted. This does not actually delete the tuple. + * @param rid rid of the tuple to mark as deleted + * @param txn transaction performing the delete + * @param lock_manager the lock manager + * @param log_manager the log manager + * @return true if marking the tuple as deleted is successful (i.e the tuple exists) + */ + auto MarkDelete(const RID &rid, Transaction *txn, LockManager *lock_manager, LogManager *log_manager) -> bool; /** * Update a tuple. + * @param new_tuple new value of the tuple + * @param[out] old_tuple old value of the tuple + * @param rid rid of the tuple + * @param txn transaction performing the update + * @param lock_manager the lock manager + * @param log_manager the log manager + * @return true if updating the tuple succeeded */ - void UpdateTupleMeta(const TupleMeta &meta, const RID &rid); + auto UpdateTuple(const Tuple &new_tuple, Tuple *old_tuple, const RID &rid, Transaction *txn, + LockManager *lock_manager, LogManager *log_manager) -> bool; + + /** To be called on commit or abort. Actually perform the delete or rollback an insert. */ + void ApplyDelete(const RID &rid, Transaction *txn, LogManager *log_manager); + + /** To be called on abort. Rollback a delete, i.e. this reverses a MarkDelete. */ + void RollbackDelete(const RID &rid, Transaction *txn, LogManager *log_manager); /** * Read a tuple from a table. + * @param rid rid of the tuple to read + * @param[out] tuple the tuple that was read + * @param txn transaction performing the read + * @param lock_manager the lock manager + * @return true if the read is successful (i.e. the tuple exists) */ - auto GetTuple(const RID &rid) const -> std::pair; + auto GetTuple(const RID &rid, Tuple *tuple, Transaction *txn, LockManager *lock_manager) -> bool; + + /** @return the rid of the first tuple in this page */ /** - * Read a tuple meta from a table. + * @param[out] first_rid the RID of the first tuple in this page + * @return true if the first tuple exists, false otherwise */ - auto GetTupleMeta(const RID &rid) const -> TupleMeta; + auto GetFirstTupleRid(RID *first_rid) -> bool; /** - * Update a tuple in place. + * @param cur_rid the RID of the current tuple + * @param[out] next_rid the RID of the tuple following the current tuple + * @return true if the next tuple exists, false otherwise */ - void UpdateTupleInPlaceUnsafe(const TupleMeta &meta, const Tuple &tuple, RID rid); + auto GetNextTupleRid(const RID &cur_rid, RID *next_rid) -> bool; + private: static_assert(sizeof(page_id_t) == 4); - private: - using TupleInfo = std::tuple; - char page_start_[0]; - page_id_t next_page_id_; - uint16_t num_tuples_; - uint16_t num_deleted_tuples_; - TupleInfo tuple_info_[0]; - - static constexpr size_t TUPLE_INFO_SIZE = 24; - static_assert(sizeof(TupleInfo) == TUPLE_INFO_SIZE); -}; + static constexpr size_t SIZE_TABLE_PAGE_HEADER = 24; + static constexpr size_t SIZE_TUPLE = 8; + static constexpr size_t OFFSET_PREV_PAGE_ID = 8; + static constexpr size_t OFFSET_NEXT_PAGE_ID = 12; + static constexpr size_t OFFSET_FREE_SPACE = 16; + static constexpr size_t OFFSET_TUPLE_COUNT = 20; + static constexpr size_t OFFSET_TUPLE_OFFSET = 24; // Naming things is hard. + static constexpr size_t OFFSET_TUPLE_SIZE = 28; -static_assert(sizeof(TablePage) == TABLE_PAGE_HEADER_SIZE); + /** @return pointer to the end of the current free space, see header comment */ + auto GetFreeSpacePointer() -> uint32_t { return *reinterpret_cast(GetData() + OFFSET_FREE_SPACE); } + /** Sets the pointer, this should be the end of the current free space. */ + void SetFreeSpacePointer(uint32_t free_space_pointer) { + memcpy(GetData() + OFFSET_FREE_SPACE, &free_space_pointer, sizeof(uint32_t)); + } + + /** + * @note returned tuple count may be an overestimate because some slots may be empty + * @return at least the number of tuples in this page + */ + auto GetTupleCount() -> uint32_t { return *reinterpret_cast(GetData() + OFFSET_TUPLE_COUNT); } + + /** Set the number of tuples in this page. */ + void SetTupleCount(uint32_t tuple_count) { memcpy(GetData() + OFFSET_TUPLE_COUNT, &tuple_count, sizeof(uint32_t)); } + + auto GetFreeSpaceRemaining() -> uint32_t { + return GetFreeSpacePointer() - SIZE_TABLE_PAGE_HEADER - SIZE_TUPLE * GetTupleCount(); + } + + /** @return tuple offset at slot slot_num */ + auto GetTupleOffsetAtSlot(uint32_t slot_num) -> uint32_t { + return *reinterpret_cast(GetData() + OFFSET_TUPLE_OFFSET + SIZE_TUPLE * slot_num); + } + + /** Set tuple offset at slot slot_num. */ + void SetTupleOffsetAtSlot(uint32_t slot_num, uint32_t offset) { + memcpy(GetData() + OFFSET_TUPLE_OFFSET + SIZE_TUPLE * slot_num, &offset, sizeof(uint32_t)); + } + + /** @return tuple size at slot slot_num */ + auto GetTupleSize(uint32_t slot_num) -> uint32_t { + return *reinterpret_cast(GetData() + OFFSET_TUPLE_SIZE + SIZE_TUPLE * slot_num); + } + + /** Set tuple size at slot slot_num. */ + void SetTupleSize(uint32_t slot_num, uint32_t size) { + memcpy(GetData() + OFFSET_TUPLE_SIZE + SIZE_TUPLE * slot_num, &size, sizeof(uint32_t)); + } + + /** @return true if the tuple is deleted or empty */ + static auto IsDeleted(uint32_t tuple_size) -> bool { + return static_cast(tuple_size & DELETE_MASK) || tuple_size == 0; + } + + /** @return tuple size with the deleted flag set */ + static auto SetDeletedFlag(uint32_t tuple_size) -> uint32_t { + return static_cast(tuple_size | DELETE_MASK); + } + + /** @return tuple size with the deleted flag unset */ + static auto UnsetDeletedFlag(uint32_t tuple_size) -> uint32_t { + return static_cast(tuple_size & (~DELETE_MASK)); + } +}; } // namespace bustub diff --git a/src/include/storage/table/table_heap.h b/src/include/storage/table/table_heap.h index 31e8845..bbd54f2 100644 --- a/src/include/storage/table/table_heap.h +++ b/src/include/storage/table/table_heap.h @@ -12,25 +12,14 @@ #pragma once -#include -#include // NOLINT -#include -#include - #include "buffer/buffer_pool_manager.h" -#include "common/config.h" -#include "concurrency/lock_manager.h" -#include "concurrency/transaction.h" #include "recovery/log_manager.h" -#include "storage/page/page_guard.h" #include "storage/page/table_page.h" #include "storage/table/table_iterator.h" #include "storage/table/tuple.h" namespace bustub { -class TablePage; - /** * TableHeap represents a physical table on disk. * This is just a doubly-linked list of pages. @@ -44,99 +33,86 @@ class TableHeap { /** * Create a table heap without a transaction. (open table) * @param buffer_pool_manager the buffer pool manager + * @param lock_manager the lock manager + * @param log_manager the log manager * @param first_page_id the id of the first page */ - explicit TableHeap(BufferPoolManager *bpm); + TableHeap(BufferPoolManager *buffer_pool_manager, LockManager *lock_manager, LogManager *log_manager, + page_id_t first_page_id); /** - * Insert a tuple into the table. If the tuple is too large (>= page_size), return std::nullopt. - * @param meta tuple meta - * @param tuple tuple to insert - * @return rid of the inserted tuple + * Create a table heap with a transaction. (create table) + * @param buffer_pool_manager the buffer pool manager + * @param lock_manager the lock manager + * @param log_manager the log manager + * @param txn the creating transaction */ - auto InsertTuple(const TupleMeta &meta, const Tuple &tuple, LockManager *lock_mgr = nullptr, - Transaction *txn = nullptr, table_oid_t oid = 0) -> std::optional; + TableHeap(BufferPoolManager *buffer_pool_manager, LockManager *lock_manager, LogManager *log_manager, + Transaction *txn); /** - * Update the meta of a tuple. - * @param meta new tuple meta - * @param rid the rid of the inserted tuple + * Insert a tuple into the table. If the tuple is too large (>= page_size), return false. + * @param tuple tuple to insert + * @param[out] rid the rid of the inserted tuple + * @param txn the transaction performing the insert + * @return true iff the insert is successful */ - void UpdateTupleMeta(const TupleMeta &meta, RID rid); + auto InsertTuple(const Tuple &tuple, RID *rid, Transaction *txn) -> bool; /** - * Read a tuple from the table. - * @param rid rid of the tuple to read - * @return the meta and tuple + * Mark the tuple as deleted. The actual delete will occur when ApplyDelete is called. + * @param rid resource id of the tuple of delete + * @param txn transaction performing the delete + * @return true iff the delete is successful (i.e the tuple exists) */ - auto GetTuple(RID rid) -> std::pair; + auto MarkDelete(const RID &rid, Transaction *txn) -> bool; // for delete /** - * Read a tuple meta from the table. Note: if you want to get tuple and meta together, use `GetTuple` instead - * to ensure atomicity. - * @param rid rid of the tuple to read - * @return the meta + * if the new tuple is too large to fit in the old page, return false (will delete and insert) + * @param tuple new tuple + * @param rid rid of the old tuple + * @param txn transaction performing the update + * @return true is update is successful. */ - auto GetTupleMeta(RID rid) -> TupleMeta; - - /** @return the iterator of this table. When this iterator is created, it will record the current last tuple in the - * table heap, and the iterator will stop at that point, in order to avoid halloween problem. You usually will need to - * use this function for project 3. Given that you have already implemented your project 4 update executor as a - * pipeline breaker, you may use `MakeEagerIterator` to test whether the update executor is implemented correctly. - * There should be no difference between this function and `MakeEagerIterator` in project 4 if everything is - * implemented correctly. */ - auto MakeIterator() -> TableIterator; - - /** @return the iterator of this table. The iterator will stop at the last tuple at the time of iterating. */ - auto MakeEagerIterator() -> TableIterator; - - /** @return the id of the first page of this table */ - inline auto GetFirstPageId() const -> page_id_t { return first_page_id_; } + auto UpdateTuple(const Tuple &tuple, const RID &rid, Transaction *txn) -> bool; /** - * Update a tuple in place. Should NOT be used in project 3. Implement your project 3 update executor as delete and - * insert. You will need to use this function in project 4. - * @param meta new tuple meta - * @param tuple new tuple - * @param rid the rid of the tuple to be updated - * @param check the check to run before actually update. + * Called on Commit/Abort to actually delete a tuple or rollback an insert. + * @param rid rid of the tuple to delete + * @param txn transaction performing the delete. */ - auto UpdateTupleInPlace(const TupleMeta &meta, const Tuple &tuple, RID rid, - std::function &&check = nullptr) - -> bool; + void ApplyDelete(const RID &rid, Transaction *txn); - /** For binder tests */ - static auto CreateEmptyHeap(bool create_table_heap = false) -> std::unique_ptr { - // The input parameter should be false in order to generate a empty heap - assert(!create_table_heap); - return std::unique_ptr(new TableHeap(create_table_heap)); - } - - // The below functions are useful only when you want to implement abort in a way that removes an undo log from the - // version chain. DO NOT USE THEM if you are unsure what they are supposed to do. - // - // And if you decide to use the below functions, DO NOT use the normal ones like `GetTuple`. Having two read locks - // on the same thing in one thread might cause deadlocks. - - auto AcquireTablePageReadLock(RID rid) -> ReadPageGuard; + /** + * Called on abort to rollback a delete. + * @param rid rid of the deleted tuple. + * @param txn transaction performing the rollback + */ + void RollbackDelete(const RID &rid, Transaction *txn); - auto AcquireTablePageWriteLock(RID rid) -> WritePageGuard; + /** + * Read a tuple from the table. + * @param rid rid of the tuple to read + * @param tuple output variable for the tuple + * @param txn transaction performing the read + * @return true if the read was successful (i.e. the tuple exists) + */ + auto GetTuple(const RID &rid, Tuple *tuple, Transaction *txn, bool acquire_read_lock = true) -> bool; - void UpdateTupleInPlaceWithLockAcquired(const TupleMeta &meta, const Tuple &tuple, RID rid, TablePage *page); + /** @return the begin iterator of this table */ + auto Begin(Transaction *txn) -> TableIterator; - auto GetTupleWithLockAcquired(RID rid, const TablePage *page) -> std::pair; + /** @return the end iterator of this table */ + auto End() -> TableIterator; - auto GetTupleMetaWithLockAcquired(RID rid, const TablePage *page) -> TupleMeta; + /** @return the id of the first page of this table */ + inline auto GetFirstPageId() const -> page_id_t { return first_page_id_; } private: - /** Used for binder tests */ - explicit TableHeap(bool create_table_heap = false); - - BufferPoolManager *bpm_; - page_id_t first_page_id_{INVALID_PAGE_ID}; - - std::mutex latch_; - page_id_t last_page_id_{INVALID_PAGE_ID}; /* protected by latch_ */ + BufferPoolManager *buffer_pool_manager_; + LockManager *lock_manager_; + LogManager *log_manager_; + page_id_t first_page_id_{}; }; } // namespace bustub diff --git a/src/include/storage/table/table_iterator.h b/src/include/storage/table/table_iterator.h index 584efc4..ff97de0 100644 --- a/src/include/storage/table/table_iterator.h +++ b/src/include/storage/table/table_iterator.h @@ -13,10 +13,7 @@ #pragma once #include -#include -#include -#include "common/macros.h" #include "common/rid.h" #include "concurrency/transaction.h" #include "storage/table/tuple.h" @@ -32,29 +29,38 @@ class TableIterator { friend class Cursor; public: - DISALLOW_COPY(TableIterator); + TableIterator(TableHeap *table_heap, RID rid, Transaction *txn); - TableIterator(TableHeap *table_heap, RID rid, RID stop_at_rid); - TableIterator(TableIterator &&) = default; + TableIterator(const TableIterator &other) + : table_heap_(other.table_heap_), tuple_(new Tuple(*other.tuple_)), txn_(other.txn_) {} - ~TableIterator() = default; + ~TableIterator() { delete tuple_; } - auto GetTuple() -> std::pair; + inline auto operator==(const TableIterator &itr) const -> bool { + return tuple_->rid_.Get() == itr.tuple_->rid_.Get(); + } - auto GetRID() -> RID; + inline auto operator!=(const TableIterator &itr) const -> bool { return !(*this == itr); } - auto IsEnd() -> bool; + auto operator*() -> const Tuple &; + + auto operator->() -> Tuple *; auto operator++() -> TableIterator &; + auto operator++(int) -> TableIterator; + + auto operator=(const TableIterator &other) -> TableIterator & { + table_heap_ = other.table_heap_; + *tuple_ = *other.tuple_; + txn_ = other.txn_; + return *this; + } + private: TableHeap *table_heap_; - RID rid_; - - // When creating table iterator, we will record the maximum RID that we should scan. - // Otherwise we will have dead loops when updating while scanning. (In project 4, update should be implemented as - // deletion + insertion.) - RID stop_at_rid_; + Tuple *tuple_; + Transaction *txn_; }; } // namespace bustub diff --git a/src/include/storage/table/tuple.h b/src/include/storage/table/tuple.h index c8a5b73..44a9532 100644 --- a/src/include/storage/table/tuple.h +++ b/src/include/storage/table/tuple.h @@ -16,32 +16,11 @@ #include #include "catalog/schema.h" -#include "common/config.h" #include "common/rid.h" #include "type/value.h" namespace bustub { -using timestamp_t = int64_t; -const timestamp_t INVALID_TS = -1; - -static constexpr size_t TUPLE_META_SIZE = 16; - -struct TupleMeta { - /** the ts / txn_id of this tuple. In project 3, simply set it to 0. */ - timestamp_t ts_; - /** marks whether this tuple is marked removed from table heap. */ - bool is_deleted_; - - friend auto operator==(const TupleMeta &a, const TupleMeta &b) { - return a.ts_ == b.ts_ && a.is_deleted_ == b.is_deleted_; - } - - friend auto operator!=(const TupleMeta &a, const TupleMeta &b) { return !(a == b); } -}; - -static_assert(sizeof(TupleMeta) == TUPLE_META_SIZE); - /** * Tuple format: * --------------------------------------------------------------------- @@ -60,22 +39,22 @@ class Tuple { // constructor for table heap tuple explicit Tuple(RID rid) : rid_(rid) {} - static auto Empty() -> Tuple { return Tuple{RID{INVALID_PAGE_ID, 0}}; } - // constructor for creating a new tuple based on input value Tuple(std::vector values, const Schema *schema); - Tuple(const Tuple &other) = default; - - // move constructor - Tuple(Tuple &&other) noexcept = default; + // copy constructor, deep copy + Tuple(const Tuple &other); // assign operator, deep copy - auto operator=(const Tuple &other) -> Tuple & = default; - - // move assignment - auto operator=(Tuple &&other) noexcept -> Tuple & = default; - + auto operator=(const Tuple &other) -> Tuple &; + + ~Tuple() { + if (allocated_) { + delete[] data_; + } + allocated_ = false; + data_ = nullptr; + } // serialize tuple data void SerializeTo(char *storage) const; @@ -85,14 +64,11 @@ class Tuple { // return RID of current tuple inline auto GetRid() const -> RID { return rid_; } - // return RID of current tuple - inline auto SetRid(RID rid) { rid_ = rid; } - // Get the address of this tuple in the table's backing store - inline auto GetData() const -> const char * { return data_.data(); } + inline auto GetData() const -> char * { return data_; } - // Get length of the tuple, including varchar length - inline auto GetLength() const -> uint32_t { return data_.size(); } + // Get length of the tuple, including varchar legth + inline auto GetLength() const -> uint32_t { return size_; } // Get the value of a specified column (const) // checks the schema to see how to return the Value. @@ -106,17 +82,18 @@ class Tuple { Value value = GetValue(schema, column_idx); return value.IsNull(); } + inline auto IsAllocated() -> bool { return allocated_; } auto ToString(const Schema *schema) const -> std::string; - friend inline auto IsTupleContentEqual(const Tuple &a, const Tuple &b) { return a.data_ == b.data_; } - private: // Get the starting storage address of specific column auto GetDataPtr(const Schema *schema, uint32_t column_idx) const -> const char *; - RID rid_{}; // if pointing to the table heap, the rid is valid - std::vector data_; + bool allocated_{false}; // is allocated? + RID rid_{}; // if pointing to the table heap, the rid is valid + uint32_t size_{0}; + char *data_{nullptr}; }; } // namespace bustub diff --git a/src/include/type/type.h b/src/include/type/type.h index 26f6188..ef8e7bb 100644 --- a/src/include/type/type.h +++ b/src/include/type/type.h @@ -28,7 +28,6 @@ class Type { explicit Type(TypeId type_id) : type_id_(type_id) {} virtual ~Type() = default; - // Get the size of this data type in bytes static auto GetTypeSize(TypeId type_id) -> uint64_t; @@ -100,16 +99,19 @@ class Type { virtual auto CastAs(const Value &val, TypeId type_id) const -> Value; - // Access the raw varlen data stored from the tuple storage + // Access the raw variable length data virtual auto GetData(const Value &val) const -> const char *; - // Get the storage size of the value. - virtual auto GetStorageSize(const Value &val) const -> uint32_t; + // Get the length of the variable length data + virtual auto GetLength(const Value &val) const -> uint32_t; + + // Access the raw varlen data stored from the tuple storage + virtual auto GetData(char *storage) -> char *; protected: // The actual type ID TypeId type_id_; // Singleton instances. - static Type *k_types[10]; + static Type *k_types[14]; }; } // namespace bustub diff --git a/src/include/type/type_id.h b/src/include/type/type_id.h index 4239143..54e7794 100644 --- a/src/include/type/type_id.h +++ b/src/include/type/type_id.h @@ -14,5 +14,5 @@ namespace bustub { // Every possible SQL type ID -enum TypeId { INVALID = 0, BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, DECIMAL, VARCHAR, TIMESTAMP, VECTOR }; +enum TypeId { INVALID = 0, BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, DECIMAL, VARCHAR, TIMESTAMP }; } // namespace bustub diff --git a/src/include/type/value.h b/src/include/type/value.h index fab783b..b539e82 100644 --- a/src/include/type/value.h +++ b/src/include/type/value.h @@ -16,7 +16,6 @@ #include #include #include -#include #include "fmt/format.h" @@ -25,8 +24,6 @@ namespace bustub { -class Column; - inline auto GetCmpBool(bool boolean) -> CmpBool { return boolean ? CmpBool::CmpTrue : CmpBool::CmpFalse; } // A value is an abstract class that represents a view over SQL data stored in @@ -45,7 +42,6 @@ class Value { friend class TimestampType; friend class BooleanType; friend class VarlenType; - friend class VectorType; public: explicit Value(const TypeId type) : manage_data_(false), type_id_(type) { size_.len_ = BUSTUB_VALUE_NULL; } @@ -65,7 +61,6 @@ class Value { // VARCHAR Value(TypeId type, const char *data, uint32_t len, bool manage_data); Value(TypeId type, const std::string &data); - Value(TypeId type, const std::vector &data); Value() : Value(TypeId::INVALID) {} Value(const Value &other); @@ -85,11 +80,8 @@ class Value { // Get the type of this value inline auto GetTypeId() const -> TypeId { return type_id_; } - // Get the type of this value - auto GetColumn() const -> Column; - // Get the length of the variable length data - inline auto GetStorageSize() const -> uint32_t { return Type::GetInstance(type_id_)->GetStorageSize(*this); } + inline auto GetLength() const -> uint32_t { return Type::GetInstance(type_id_)->GetLength(*this); } // Access the raw variable length data inline auto GetData() const -> const char * { return Type::GetInstance(type_id_)->GetData(*this); } @@ -98,18 +90,9 @@ class Value { return *reinterpret_cast(&value_); } - auto GetVector() const -> std::vector; - inline auto CastAs(const TypeId type_id) const -> Value { return Type::GetInstance(type_id_)->CastAs(*this, type_id); } - // You will likely need this in project 4... - inline auto CompareExactlyEquals(const Value &o) const -> bool { - if (this->IsNull() && o.IsNull()) { - return true; - } - return (Type::GetInstance(type_id_)->CompareEquals(*this, o)) == CmpBool::CmpTrue; - } // Comparison Methods inline auto CompareEquals(const Value &o) const -> CmpBool { return Type::GetInstance(type_id_)->CompareEquals(*this, o); diff --git a/src/include/type/value_factory.h b/src/include/type/value_factory.h index 22a0d11..6d53d56 100644 --- a/src/include/type/value_factory.h +++ b/src/include/type/value_factory.h @@ -15,7 +15,6 @@ #include #include #include -#include #include "common/macros.h" #include "common/util/string_util.h" @@ -75,11 +74,6 @@ class ValueFactory { return {TypeId::VARCHAR, value}; } - static inline auto GetVectorValue(const std::vector &value, - __attribute__((__unused__)) AbstractPool *pool = nullptr) -> Value { - return {TypeId::VECTOR, value}; - } - static inline auto GetNullValueByType(TypeId type_id) -> Value { Value ret_value; switch (type_id) { diff --git a/src/include/type/varlen_type.h b/src/include/type/varlen_type.h index bee5dff..3c791b1 100644 --- a/src/include/type/varlen_type.h +++ b/src/include/type/varlen_type.h @@ -29,7 +29,7 @@ class VarlenType : public Type { auto GetData(const Value &val) const -> const char * override; // Get the length of the variable length data - auto GetStorageSize(const Value &val) const -> uint32_t override; + auto GetLength(const Value &val) const -> uint32_t override; // Comparison functions auto CompareEquals(const Value &left, const Value &right) const -> CmpBool override; diff --git a/src/include/type/vector_type.h b/src/include/type/vector_type.h deleted file mode 100644 index b6c0d25..0000000 --- a/src/include/type/vector_type.h +++ /dev/null @@ -1,67 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// vector_type.h -// -// Identification: src/include/type/vector_type.h -// -// Copyright (c) 2015-2019, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once -#include -#include -#include -#include "type/value.h" - -namespace bustub { -/* A varlen value is an abstract class representing all objects that have - * variable length. - * For simplicity, for valen_type we always set flag "inline" as true, which - * means we store actual data along with its size rather than a pointer - */ -class VectorType : public Type { - public: - VectorType(); - ~VectorType() override; - - // Access the raw variable length data - auto GetData(const Value &val) const -> const char * override; - - auto GetVector(const Value &val) const -> std::vector; - - // Get the length of the variable length data - auto GetStorageSize(const Value &val) const -> uint32_t override; - - // Comparison functions - auto CompareEquals(const Value &left, const Value &right) const -> CmpBool override; - auto CompareNotEquals(const Value &left, const Value &right) const -> CmpBool override; - auto CompareLessThan(const Value &left, const Value &right) const -> CmpBool override; - auto CompareLessThanEquals(const Value &left, const Value &right) const -> CmpBool override; - auto CompareGreaterThan(const Value &left, const Value &right) const -> CmpBool override; - auto CompareGreaterThanEquals(const Value &left, const Value &right) const -> CmpBool override; - - // Other mathematical functions - auto Min(const Value &left, const Value &right) const -> Value override; - auto Max(const Value &left, const Value &right) const -> Value override; - - auto CastAs(const Value &value, TypeId type_id) const -> Value override; - - // Decimal types are always inlined - auto IsInlined(const Value & /*val*/) const -> bool override { return false; } - - // Debug - auto ToString(const Value &val) const -> std::string override; - - // Serialize this value into the given storage space - void SerializeTo(const Value &val, char *storage) const override; - - // Deserialize a value of the given type from the given storage space. - auto DeserializeFrom(const char *storage) const -> Value override; - - // Create a copy of this value - auto Copy(const Value &val) const -> Value override; -}; -} // namespace bustub diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index 4c86c7d..0c92967 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -1,19 +1,17 @@ add_library( - bustub_optimizer - OBJECT - eliminate_true_filter.cpp - merge_projection.cpp - merge_filter_nlj.cpp - merge_filter_scan.cpp - nlj_as_hash_join.cpp - nlj_as_index_join.cpp - optimizer.cpp - optimizer_custom_rules.cpp - optimizer_internal.cpp - order_by_index_scan.cpp - sort_limit_as_topn.cpp - seqscan_as_indexscan.cpp) + bustub_optimizer + OBJECT + eliminate_true_filter.cpp + merge_projection.cpp + merge_filter_nlj.cpp + merge_filter_scan.cpp + nlj_as_hash_join.cpp + nlj_as_index_join.cpp + optimizer.cpp + optimizer_custom_rules.cpp + order_by_index_scan.cpp + sort_limit_as_topn.cpp) set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ - PARENT_SCOPE) + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/optimizer/eliminate_true_filter.cpp b/src/optimizer/eliminate_true_filter.cpp index b509ceb..7a3b787 100644 --- a/src/optimizer/eliminate_true_filter.cpp +++ b/src/optimizer/eliminate_true_filter.cpp @@ -8,6 +8,8 @@ namespace bustub { +#ifdef BUSTUB_OPTIMIZER_HACK_REMOVE_AFTER_2022_FALL + auto Optimizer::OptimizeEliminateTrueFilter(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { std::vector children; for (const auto &child : plan->GetChildren()) { @@ -18,7 +20,7 @@ auto Optimizer::OptimizeEliminateTrueFilter(const AbstractPlanNodeRef &plan) -> if (optimized_plan->GetType() == PlanType::Filter) { const auto &filter_plan = dynamic_cast(*optimized_plan); - if (IsPredicateTrue(filter_plan.GetPredicate())) { + if (IsPredicateTrue(*filter_plan.GetPredicate())) { BUSTUB_ASSERT(optimized_plan->children_.size() == 1, "must have exactly one children"); return optimized_plan->children_[0]; } @@ -27,4 +29,6 @@ auto Optimizer::OptimizeEliminateTrueFilter(const AbstractPlanNodeRef &plan) -> return optimized_plan; } +#endif + } // namespace bustub diff --git a/src/optimizer/merge_filter_nlj.cpp b/src/optimizer/merge_filter_nlj.cpp index ae0329c..4ca0ecc 100644 --- a/src/optimizer/merge_filter_nlj.cpp +++ b/src/optimizer/merge_filter_nlj.cpp @@ -36,8 +36,8 @@ auto Optimizer::RewriteExpressionForJoin(const AbstractExpressionRef &expr, size return expr->CloneWithChildren(children); } -auto Optimizer::IsPredicateTrue(const AbstractExpressionRef &expr) -> bool { - if (const auto *const_expr = dynamic_cast(expr.get()); const_expr != nullptr) { +auto Optimizer::IsPredicateTrue(const AbstractExpression &expr) -> bool { + if (const auto *const_expr = dynamic_cast(&expr); const_expr != nullptr) { return const_expr->val_.CastAs(TypeId::BOOLEAN).GetAs(); } return false; diff --git a/src/optimizer/merge_filter_scan.cpp b/src/optimizer/merge_filter_scan.cpp index 739aaf4..b67ee19 100644 --- a/src/optimizer/merge_filter_scan.cpp +++ b/src/optimizer/merge_filter_scan.cpp @@ -10,6 +10,8 @@ namespace bustub { +#ifdef BUSTUB_OPTIMIZER_HACK_REMOVE_AFTER_2022_FALL + auto Optimizer::OptimizeMergeFilterScan(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { std::vector children; for (const auto &child : plan->GetChildren()) { @@ -34,4 +36,6 @@ auto Optimizer::OptimizeMergeFilterScan(const AbstractPlanNodeRef &plan) -> Abst return optimized_plan; } +#endif + } // namespace bustub diff --git a/src/optimizer/nlj_as_hash_join.cpp b/src/optimizer/nlj_as_hash_join.cpp index 77d69d3..4df77bc 100644 --- a/src/optimizer/nlj_as_hash_join.cpp +++ b/src/optimizer/nlj_as_hash_join.cpp @@ -18,10 +18,48 @@ namespace bustub { auto Optimizer::OptimizeNLJAsHashJoin(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { - // TODO(student): implement NestedLoopJoin -> HashJoin optimizer rule - // Note for 2023 Fall: You should support join keys of any number of conjunction of equi-condistions: - // E.g. = AND = AND ... - return plan; + std::vector children; + for (const auto &child : plan->GetChildren()) { + children.emplace_back(OptimizeNLJAsHashJoin(child)); + } + auto optimized_plan = plan->CloneWithChildren(std::move(children)); + + if (optimized_plan->GetType() == PlanType::NestedLoopJoin) { + const auto &nlj_plan = dynamic_cast(*optimized_plan); + // Has exactly two children + BUSTUB_ENSURE(nlj_plan.children_.size() == 2, "NLJ should have exactly 2 children."); + + // Check if expr is equal condition where one is for the left table, and one is for the right table. + if (const auto *expr = dynamic_cast(&nlj_plan.Predicate()); expr != nullptr) { + if (expr->comp_type_ == ComparisonType::Equal) { + if (const auto *left_expr = dynamic_cast(expr->children_[0].get()); + left_expr != nullptr) { + if (const auto *right_expr = dynamic_cast(expr->children_[1].get()); + right_expr != nullptr) { + // Ensure both exprs have tuple_id == 0 + auto left_expr_tuple_0 = + std::make_shared(0, left_expr->GetColIdx(), left_expr->GetReturnType()); + auto right_expr_tuple_0 = + std::make_shared(0, right_expr->GetColIdx(), right_expr->GetReturnType()); + // Now it's in form of = . Let's check if one of them is from the left table, and + // the other is from the right table. + if (left_expr->GetTupleIdx() == 0 && right_expr->GetTupleIdx() == 1) { + return std::make_shared(nlj_plan.output_schema_, nlj_plan.GetLeftPlan(), + nlj_plan.GetRightPlan(), std::move(left_expr_tuple_0), + std::move(right_expr_tuple_0), nlj_plan.GetJoinType()); + } + if (left_expr->GetTupleIdx() == 1 && right_expr->GetTupleIdx() == 0) { + return std::make_shared(nlj_plan.output_schema_, nlj_plan.GetLeftPlan(), + nlj_plan.GetRightPlan(), std::move(right_expr_tuple_0), + std::move(left_expr_tuple_0), nlj_plan.GetJoinType()); + } + } + } + } + } + } + + return optimized_plan; } } // namespace bustub diff --git a/src/optimizer/nlj_as_index_join.cpp b/src/optimizer/nlj_as_index_join.cpp index b0413ab..7e114ce 100644 --- a/src/optimizer/nlj_as_index_join.cpp +++ b/src/optimizer/nlj_as_index_join.cpp @@ -44,7 +44,7 @@ auto Optimizer::OptimizeNLJAsIndexJoin(const AbstractPlanNodeRef &plan) -> Abstr // Has exactly two children BUSTUB_ENSURE(nlj_plan.children_.size() == 2, "NLJ should have exactly 2 children."); // Check if expr is equal condition where one is for the left table, and one is for the right table. - if (const auto *expr = dynamic_cast(nlj_plan.Predicate().get()); expr != nullptr) { + if (const auto *expr = dynamic_cast(&nlj_plan.Predicate()); expr != nullptr) { if (expr->comp_type_ == ComparisonType::Equal) { if (const auto *left_expr = dynamic_cast(expr->children_[0].get()); left_expr != nullptr) { diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 7a5a865..89f8f68 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -11,10 +11,9 @@ auto Optimizer::Optimize(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef auto p = plan; p = OptimizeMergeProjection(p); p = OptimizeMergeFilterNLJ(p); + p = OptimizeNLJAsIndexJoin(p); p = OptimizeOrderByAsIndexScan(p); p = OptimizeSortLimitAsTopN(p); - p = OptimizeMergeFilterScan(p); - p = OptimizeSeqScanAsIndexScan(p); return p; } // By default, use user-defined rules. diff --git a/src/optimizer/optimizer_custom_rules.cpp b/src/optimizer/optimizer_custom_rules.cpp index c8e493d..59c493a 100644 --- a/src/optimizer/optimizer_custom_rules.cpp +++ b/src/optimizer/optimizer_custom_rules.cpp @@ -1,21 +1,374 @@ +#include "execution/expressions/arithmetic_expression.h" +#include "execution/expressions/column_value_expression.h" +#include "execution/expressions/comparison_expression.h" +#include "execution/expressions/constant_value_expression.h" +#include "execution/expressions/logic_expression.h" #include "execution/plans/abstract_plan.h" +#include "execution/plans/aggregation_plan.h" +#include "execution/plans/filter_plan.h" +#include "execution/plans/index_scan_plan.h" +#include "execution/plans/mock_scan_plan.h" +#include "execution/plans/nested_loop_join_plan.h" +#include "execution/plans/projection_plan.h" +#include "execution/plans/seq_scan_plan.h" +#include "execution/plans/values_plan.h" #include "optimizer/optimizer.h" -// Note for 2023 Fall: You can add all optimizer rule implementations and apply the rules as you want in this file. -// Note that for some test cases, we force using starter rules, so that the configuration here won't take effects. -// Starter rule can be forcibly enabled by `set force_optimizer_starter_rule=yes`. +// Note for 2022 Fall: You can add all optimizer rule implementations and apply the rules as you want in this file. Note +// that for some test cases, we force using starter rules, so that the configuration here won't take effects. Starter +// rule can be forcibly enabled by `set force_optimizer_starter_rule=yes`. namespace bustub { +auto Optimizer::OptimizeReorderJoinUseIndex(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { + std::vector children; + for (const auto &child : plan->GetChildren()) { + children.emplace_back(OptimizeReorderJoinUseIndex(child)); + } + auto optimized_plan = plan->CloneWithChildren(std::move(children)); + if (optimized_plan->GetType() == PlanType::NestedLoopJoin) { + const auto &nlj_plan = dynamic_cast(*optimized_plan); + BUSTUB_ASSERT(nlj_plan.children_.size() == 2, "NLJ should have exactly 2 children."); + + // ensure the left child is nlp + // the right child is seqscan or mockscan + if (nlj_plan.GetLeftPlan()->GetType() != PlanType::NestedLoopJoin || + (nlj_plan.GetRightPlan()->GetType() != PlanType::SeqScan && + nlj_plan.GetRightPlan()->GetType() != PlanType::MockScan)) { + return optimized_plan; + } + + const auto &left_nlj_plan = dynamic_cast(*nlj_plan.GetLeftPlan()); + + if (left_nlj_plan.GetLeftPlan()->GetType() == PlanType::NestedLoopJoin || + left_nlj_plan.GetRightPlan()->GetType() == PlanType::NestedLoopJoin) { + return optimized_plan; + } + + if (const auto *expr = dynamic_cast(&left_nlj_plan.Predicate()); expr != nullptr) { + if (expr->comp_type_ == ComparisonType::Equal) { + if (const auto *left_expr = dynamic_cast(expr->children_[0].get()); + left_expr != nullptr) { + if (const auto *right_expr = dynamic_cast(expr->children_[1].get()); + right_expr != nullptr) { + if (left_nlj_plan.GetLeftPlan()->GetType() == PlanType::SeqScan) { + const auto &left_seq_scan = dynamic_cast(*left_nlj_plan.GetLeftPlan()); + if (auto index = MatchIndex(left_seq_scan.table_name_, left_expr->GetColIdx()); index != std::nullopt) { + auto *outer_expr = dynamic_cast(&nlj_plan.Predicate()); + auto left_outer_expr = dynamic_cast(outer_expr->children_[0].get()); + auto right_outer_expr = dynamic_cast(outer_expr->children_[1].get()); + BUSTUB_ASSERT(expr->comp_type_ == ComparisonType::Equal, "comparison type must be equal"); + BUSTUB_ASSERT(outer_expr->comp_type_ == ComparisonType::Equal, "comparison type must be equal"); + + auto inner_pred = std::make_shared( + std::make_shared( + 0, left_outer_expr->GetColIdx() - left_nlj_plan.GetLeftPlan()->output_schema_->GetColumnCount(), + left_outer_expr->GetReturnType()), + std::make_shared(1, right_outer_expr->GetColIdx(), + right_outer_expr->GetReturnType()), + ComparisonType::Equal); + auto outer_pred = std::make_shared( + std::make_shared(0, right_expr->GetColIdx(), right_expr->GetReturnType()), + std::make_shared(1, left_expr->GetColIdx(), left_expr->GetReturnType()), + ComparisonType::Equal); + + auto right_column_1 = left_nlj_plan.GetRightPlan()->output_schema_->GetColumns(); + auto right_column_2 = nlj_plan.GetRightPlan()->output_schema_->GetColumns(); + std::vector columns; + columns.reserve(right_column_1.size() + right_column_2.size()); + + for (const auto &col : right_column_1) { + columns.push_back(col); + } + for (const auto &col : right_column_2) { + columns.push_back(col); + } + + std::vector outer_columns(columns); + for (const auto &col : left_nlj_plan.GetLeftPlan()->output_schema_->GetColumns()) { + outer_columns.push_back(col); + } + + return std::make_shared( + std::make_shared(outer_columns), + std::make_shared(std::make_shared(columns), + left_nlj_plan.GetRightPlan(), nlj_plan.GetRightPlan(), + inner_pred, JoinType::INNER), + left_nlj_plan.GetLeftPlan(), outer_pred, JoinType::INNER); + } + } + } + } + } + } + } + + return optimized_plan; +} + +auto Optimizer::OptimizePredicatePushDown(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { + std::vector children; + for (const auto &child : plan->GetChildren()) { + children.emplace_back(OptimizePredicatePushDown(child)); + } + auto optimized_plan = plan->CloneWithChildren(std::move(children)); + + if (optimized_plan->GetType() == PlanType::NestedLoopJoin) { + const auto &nlj_plan = dynamic_cast(*optimized_plan); + + if (nlj_plan.GetLeftPlan()->GetType() != PlanType::NestedLoopJoin) { + return optimized_plan; + } + const auto &left_nlj_plan = dynamic_cast(*nlj_plan.GetLeftPlan()); + + if (nlj_plan.GetRightPlan()->GetType() != PlanType::MockScan || + left_nlj_plan.GetLeftPlan()->GetType() != PlanType::MockScan || + left_nlj_plan.GetRightPlan()->GetType() != PlanType::MockScan) { + return optimized_plan; + } + + std::vector join_preds; + std::vector filter_preds; + if (const auto *expr = dynamic_cast(&nlj_plan.Predicate()); expr != nullptr) { + while (const auto *inner_expr = dynamic_cast(expr->children_[0].get())) { + if (const auto *pred = dynamic_cast(expr->children_[1]->children_[1].get()); + pred != nullptr) { + join_preds.push_back(expr->children_[1]); + } else { + filter_preds.push_back(expr->children_[1]); + } + expr = dynamic_cast(expr->children_[0].get()); + } + if (const auto *pred = dynamic_cast(expr->children_[1]->children_[1].get()); + pred != nullptr) { + join_preds.push_back(expr->children_[1]); + } else { + filter_preds.push_back(expr->children_[1]); + } + if (const auto *pred = dynamic_cast(expr->children_[0]->children_[1].get()); + pred != nullptr) { + join_preds.push_back(expr->children_[0]); + } else { + filter_preds.push_back(expr->children_[0]); + } + + std::vector first_filter; + std::vector third_filter; + + for (const auto &pred : filter_preds) { + const auto *outer = dynamic_cast(pred.get()); + const auto *inner = dynamic_cast(pred->children_[0].get()); + if (inner->GetTupleIdx() == 0) { + first_filter.push_back(pred); + } else { + third_filter.push_back(std::make_shared( + std::make_shared(0, inner->GetColIdx(), inner->GetReturnType()), + pred->children_[1], outer->comp_type_)); + } + } + BUSTUB_ASSERT(first_filter.size() == 2, "only in leader board test!"); + BUSTUB_ASSERT(third_filter.size() == 2, "only in leader board test!"); + + auto first_pred = std::make_shared(first_filter[0], first_filter[1], LogicType::And); + auto third_pred = std::make_shared(third_filter[0], third_filter[1], LogicType::And); + + auto first_filter_scan = std::make_shared(left_nlj_plan.children_[0]->output_schema_, first_pred, + left_nlj_plan.children_[0]); + auto third_filter_scan = std::make_shared(nlj_plan.GetRightPlan()->output_schema_, third_pred, + nlj_plan.GetRightPlan()); + auto left_node = std::make_shared(left_nlj_plan.output_schema_, first_filter_scan, + left_nlj_plan.GetRightPlan(), left_nlj_plan.predicate_, + left_nlj_plan.GetJoinType()); + return std::make_shared(nlj_plan.output_schema_, left_node, third_filter_scan, + join_preds[0], nlj_plan.GetJoinType()); + } + } + + return optimized_plan; +} + +auto Optimizer::IsPredicateFalse(const AbstractExpression &expr) -> bool { + if (const auto *compare_expr = dynamic_cast(&expr); compare_expr != nullptr) { + if (const auto *left_expr = dynamic_cast(compare_expr->children_[0].get()); + left_expr != nullptr) { + if (const auto *right_expr = dynamic_cast(compare_expr->children_[1].get()); + right_expr != nullptr) { + if (compare_expr->comp_type_ == ComparisonType::Equal) { + if (left_expr->val_.CastAs(TypeId::INTEGER).GetAs() != + right_expr->val_.CastAs(TypeId::INTEGER).GetAs()) { + return true; + } + } + } + } + } + return false; +} + +auto Optimizer::OptimizeFalseFilter(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { + std::vector children; + for (const auto &child : plan->GetChildren()) { + children.emplace_back(OptimizeFalseFilter(child)); + } + + auto optimized_plan = plan->CloneWithChildren(std::move(children)); + + if (optimized_plan->GetType() == PlanType::Filter) { + const auto &filter_plan = dynamic_cast(*optimized_plan); + + if (IsPredicateFalse(*filter_plan.GetPredicate())) { + return std::make_shared(filter_plan.children_[0]->output_schema_, + std::vector>{}); + } + } + return optimized_plan; +} + +auto Optimizer::OptimizeRemoveJoin(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { + std::vector children; + for (const auto &child : plan->GetChildren()) { + children.emplace_back(OptimizeRemoveJoin(child)); + } + + auto optimized_plan = plan->CloneWithChildren(std::move(children)); + + if (optimized_plan->GetType() == PlanType::NestedLoopJoin) { + const auto &nlj_plan = dynamic_cast(*optimized_plan); + if (nlj_plan.GetRightPlan()->GetType() == PlanType::Values) { + const auto &right_plan = dynamic_cast(*nlj_plan.GetRightPlan()); + + if (right_plan.GetValues().empty()) { + return nlj_plan.children_[0]; + } + } + } + return optimized_plan; +} + +auto Optimizer::OptimizeRemoveColumn(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { + std::vector children; + for (const auto &child : plan->GetChildren()) { + children.emplace_back(OptimizeRemoveJoin(child)); + } + + auto optimized_plan = plan->CloneWithChildren(std::move(children)); + if (optimized_plan->GetType() == PlanType::Projection) { + const auto outer_proj = dynamic_cast(*optimized_plan); + + if (outer_proj.GetChildPlan()->GetType() == PlanType::Projection) { + const auto inner_proj = dynamic_cast(*outer_proj.GetChildPlan()); + + if (inner_proj.GetChildPlan()->GetType() == PlanType::Aggregation) { + const auto agg_plan = dynamic_cast(*inner_proj.GetChildPlan()); + std::vector cols; + for (size_t i = 0; i < outer_proj.GetExpressions().size(); ++i) { + if (const auto *pred = dynamic_cast(inner_proj.GetExpressions()[i].get()); + pred != nullptr) { + cols.push_back(inner_proj.GetExpressions()[i]); + } else { + // hacking + cols.push_back(inner_proj.GetExpressions()[i]->children_[0]->children_[0]); + cols.push_back(inner_proj.GetExpressions()[i]->children_[0]->children_[1]); + cols.push_back(inner_proj.GetExpressions()[i]->children_[1]); + } + } + + std::vector inner_schema; + std::vector inner_proj_expr; + for (const auto &i : outer_proj.GetExpressions()) { + const auto *col = dynamic_cast(i.get()); + inner_proj_expr.push_back(inner_proj.GetExpressions()[col->GetColIdx()]); + inner_schema.push_back(inner_proj.OutputSchema().GetColumns()[col->GetColIdx()]); + } + + inner_proj_expr.pop_back(); + inner_proj_expr.push_back(std::make_shared( + std::make_shared(std::make_shared(0, 1, TypeId::INTEGER), + std::make_shared(0, 1, TypeId::INTEGER), + ArithmeticType::Plus), + std::make_shared(0, 2, TypeId::INTEGER), ArithmeticType::Plus)); + + std::vector aggregates; + std::vector agg_types; + std::vector agg_schema; + + for (size_t i = 0; i < agg_plan.GetGroupBys().size(); ++i) { + agg_schema.push_back(agg_plan.OutputSchema().GetColumns()[i]); + } + + aggregates.push_back(agg_plan.GetAggregates()[0]); + agg_types.push_back(agg_plan.GetAggregateTypes()[0]); + agg_schema.push_back(agg_plan.OutputSchema().GetColumns()[agg_plan.GetGroupBys().size()]); + + aggregates.push_back(agg_plan.GetAggregates()[3]); + agg_types.push_back(agg_plan.GetAggregateTypes()[3]); + agg_schema.push_back(agg_plan.OutputSchema().GetColumns()[agg_plan.GetGroupBys().size() + 3]); + + return std::make_shared( + std::make_shared(inner_schema), inner_proj_expr, + std::make_shared(std::make_shared(agg_schema), agg_plan.GetChildAt(0), + agg_plan.GetGroupBys(), aggregates, agg_types)); + } + } + } + return optimized_plan; +} + +auto Optimizer::OptimizeMergeFilterIndexScan(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { + std::vector children; + for (const auto &child : plan->GetChildren()) { + children.emplace_back(OptimizeMergeFilterIndexScan(child)); + } + + auto optimized_plan = plan->CloneWithChildren(std::move(children)); + + if (optimized_plan->GetType() == PlanType::Filter) { + const auto &filter_plan = dynamic_cast(*optimized_plan); + BUSTUB_ASSERT(optimized_plan->children_.size() == 1, "must have exactly one children"); + const auto &child_plan = *optimized_plan->children_[0]; + if (child_plan.GetType() == PlanType::SeqScan) { + const auto &seq_scan_plan = dynamic_cast(child_plan); + const auto *table_info = catalog_.GetTable(seq_scan_plan.GetTableOid()); + const auto indices = catalog_.GetTableIndexes(table_info->name_); + if (const auto *expr = dynamic_cast(filter_plan.GetPredicate().get()); + expr != nullptr) { + if (expr->comp_type_ == ComparisonType::Equal) { + if (const auto *left_expr = dynamic_cast(expr->children_[0].get()); + left_expr != nullptr) { + if (const auto *right_expr = dynamic_cast(expr->children_[1].get()); + right_expr != nullptr) { + for (const auto *index : indices) { + const auto &columns = index->key_schema_.GetColumns(); + if (columns.size() == 1 && + columns[0].GetName() == table_info->schema_.GetColumn(left_expr->GetColIdx()).GetName()) { + return std::make_shared(optimized_plan->output_schema_, index->index_oid_, + filter_plan.GetPredicate()); + } + } + } + } + } + } + } + } + return optimized_plan; +} + auto Optimizer::OptimizeCustom(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { auto p = plan; p = OptimizeMergeProjection(p); + p = OptimizeMergeFilterIndexScan(p); + p = OptimizeMergeFilterScan(p); p = OptimizeMergeFilterNLJ(p); - p = OptimizeNLJAsHashJoin(p); + p = OptimizeReorderJoinUseIndex(p); + p = OptimizePredicatePushDown(p); + p = OptimizeFalseFilter(p); + p = OptimizeRemoveJoin(p); + p = OptimizeRemoveColumn(p); + p = OptimizeNLJAsIndexJoin(p); + p = OptimizeNLJAsHashJoin(p); // Enable this rule after you have implemented hash join. p = OptimizeOrderByAsIndexScan(p); p = OptimizeSortLimitAsTopN(p); - p = OptimizeMergeFilterScan(p); - p = OptimizeSeqScanAsIndexScan(p); return p; } diff --git a/src/optimizer/optimizer_internal.cpp b/src/optimizer/optimizer_internal.cpp deleted file mode 100644 index e901230..0000000 --- a/src/optimizer/optimizer_internal.cpp +++ /dev/null @@ -1,5 +0,0 @@ -namespace bustub { - -void OptimizerHelperFunction() {} - -} // namespace bustub diff --git a/src/optimizer/order_by_index_scan.cpp b/src/optimizer/order_by_index_scan.cpp index 24d7395..f3a8414 100644 --- a/src/optimizer/order_by_index_scan.cpp +++ b/src/optimizer/order_by_index_scan.cpp @@ -32,22 +32,25 @@ auto Optimizer::OptimizeOrderByAsIndexScan(const AbstractPlanNodeRef &plan) -> A const auto &sort_plan = dynamic_cast(*optimized_plan); const auto &order_bys = sort_plan.GetOrderBy(); - std::vector order_by_column_ids; - for (const auto &[order_type, expr] : order_bys) { - // Order type is asc or default - if (!(order_type == OrderByType::ASC || order_type == OrderByType::DEFAULT)) { - return optimized_plan; - } + // Has exactly one order by column + if (order_bys.size() != 1) { + return optimized_plan; + } - // Order expression is a column value expression - const auto *column_value_expr = dynamic_cast(expr.get()); - if (column_value_expr == nullptr) { - return optimized_plan; - } + // Order type is asc or default + const auto &[order_type, expr] = order_bys[0]; + if (!(order_type == OrderByType::ASC || order_type == OrderByType::DEFAULT)) { + return optimized_plan; + } - order_by_column_ids.push_back(column_value_expr->GetColIdx()); + // Order expression is a column value expression + const auto *column_value_expr = dynamic_cast(expr.get()); + if (column_value_expr == nullptr) { + return optimized_plan; } + auto order_by_column_id = column_value_expr->GetColIdx(); + // Has exactly one child BUSTUB_ENSURE(optimized_plan->children_.size() == 1, "Sort with multiple children?? Impossible!"); const auto &child_plan = optimized_plan->children_[0]; @@ -58,10 +61,11 @@ auto Optimizer::OptimizeOrderByAsIndexScan(const AbstractPlanNodeRef &plan) -> A const auto indices = catalog_.GetTableIndexes(table_info->name_); for (const auto *index : indices) { - const auto &columns = index->index_->GetKeyAttrs(); - if (order_by_column_ids == columns) { - return std::make_shared(optimized_plan->output_schema_, table_info->oid_, - index->index_oid_); + const auto &columns = index->key_schema_.GetColumns(); + if (columns.size() == 1 && + columns[0].GetName() == table_info->schema_.GetColumn(order_by_column_id).GetName()) { + // Index matched, return index scan instead + return std::make_shared(optimized_plan->output_schema_, index->index_oid_); } } } diff --git a/src/optimizer/seqscan_as_indexscan.cpp b/src/optimizer/seqscan_as_indexscan.cpp deleted file mode 100644 index b906986..0000000 --- a/src/optimizer/seqscan_as_indexscan.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include "optimizer/optimizer.h" - -namespace bustub { - -auto Optimizer::OptimizeSeqScanAsIndexScan(const bustub::AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { - // TODO(student): implement seq scan with predicate -> index scan optimizer rule - // The Filter Predicate Pushdown has been enabled for you in optimizer.cpp when forcing starter rule - return plan; -} - -} // namespace bustub diff --git a/src/optimizer/sort_limit_as_topn.cpp b/src/optimizer/sort_limit_as_topn.cpp index 6cd028f..3fe4650 100644 --- a/src/optimizer/sort_limit_as_topn.cpp +++ b/src/optimizer/sort_limit_as_topn.cpp @@ -1,10 +1,33 @@ +#include "execution/plans/limit_plan.h" +#include "execution/plans/sort_plan.h" +#include "execution/plans/topn_plan.h" #include "optimizer/optimizer.h" namespace bustub { auto Optimizer::OptimizeSortLimitAsTopN(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef { // TODO(student): implement sort + limit -> top N optimizer rule - return plan; + std::vector children; + for (const auto &child : plan->GetChildren()) { + children.emplace_back(OptimizeSortLimitAsTopN(child)); + } + auto optimized_plan = plan->CloneWithChildren(std::move(children)); + + if (optimized_plan->GetType() == PlanType::Limit) { + const auto &limit_plan = dynamic_cast(*optimized_plan); + const auto &limit = limit_plan.GetLimit(); + + BUSTUB_ENSURE(limit_plan.children_.size() == 1, "Limit Plan should have exactly 1 child."); + if (optimized_plan->GetChildAt(0)->GetType() == PlanType::Sort) { + const auto &sort_plan = dynamic_cast(*optimized_plan->GetChildAt(0)); + const auto &order_bys = sort_plan.GetOrderBy(); + + BUSTUB_ENSURE(sort_plan.children_.size() == 1, "Sort Plan should have exactly 1 child."); + + return std::make_shared(limit_plan.output_schema_, sort_plan.GetChildAt(0), order_bys, limit); + } + } + return optimized_plan; } } // namespace bustub diff --git a/src/planner/CMakeLists.txt b/src/planner/CMakeLists.txt index 3bf0e3d..a579907 100644 --- a/src/planner/CMakeLists.txt +++ b/src/planner/CMakeLists.txt @@ -3,12 +3,10 @@ add_library( OBJECT expression_factory.cpp plan_aggregation.cpp - plan_func_call.cpp plan_expression.cpp plan_insert.cpp plan_table_ref.cpp plan_select.cpp - plan_window_function.cpp planner.cpp) set(ALL_OBJECT_FILES diff --git a/src/planner/expression_factory.cpp b/src/planner/expression_factory.cpp index cf6657f..fdc1790 100644 --- a/src/planner/expression_factory.cpp +++ b/src/planner/expression_factory.cpp @@ -1,14 +1,11 @@ #include "binder/bound_expression.h" -#include "binder/expressions/bound_func_call.h" #include "binder/statement/select_statement.h" #include "execution/expressions/abstract_expression.h" #include "execution/expressions/arithmetic_expression.h" -#include "execution/expressions/array_expression.h" #include "execution/expressions/column_value_expression.h" #include "execution/expressions/comparison_expression.h" #include "execution/expressions/constant_value_expression.h" #include "execution/expressions/logic_expression.h" -#include "execution/plans/window_plan.h" #include "planner/planner.h" namespace bustub { @@ -38,35 +35,6 @@ auto Planner::GetAggCallFromFactory(const std::string &func_name, std::vector args) - -> std::tuple> { - if (args.empty()) { - if (func_name == "count_star") { - return {WindowFunctionType::CountStarAggregate, {}}; - } - if (func_name == "rank") { - return {WindowFunctionType::Rank, {}}; - } - } - if (args.size() == 1) { - auto expr = std::move(args[0]); - if (func_name == "min") { - return {WindowFunctionType::MinAggregate, {std::move(expr)}}; - } - if (func_name == "max") { - return {WindowFunctionType::MaxAggregate, {std::move(expr)}}; - } - if (func_name == "sum") { - return {WindowFunctionType::SumAggregate, {std::move(expr)}}; - } - if (func_name == "count") { - return {WindowFunctionType::CountAggregate, {std::move(expr)}}; - } - } - throw Exception(fmt::format("unsupported window_call {} with {} args", func_name, args.size())); -} - auto Planner::GetBinaryExpressionFromFactory(const std::string &op_name, AbstractExpressionRef left, AbstractExpressionRef right) -> AbstractExpressionRef { if (op_name == "=" || op_name == "==") { @@ -104,17 +72,4 @@ auto Planner::GetBinaryExpressionFromFactory(const std::string &op_name, Abstrac throw Exception(fmt::format("binary op {} not supported in planner yet", op_name)); } -auto Planner::PlanFuncCall(const BoundFuncCall &expr, const std::vector &children) - -> AbstractExpressionRef { - std::vector args; - for (const auto &arg : expr.args_) { - auto [_1, arg_expr] = PlanExpression(*arg, children); - args.push_back(std::move(arg_expr)); - } - if (expr.func_name_ == "construct_array") { - return std::make_shared(args); - } - return GetFuncCallFromFactory(expr.func_name_, std::move(args)); -} - } // namespace bustub diff --git a/src/planner/plan_aggregation.cpp b/src/planner/plan_aggregation.cpp index 9686217..603c20b 100644 --- a/src/planner/plan_aggregation.cpp +++ b/src/planner/plan_aggregation.cpp @@ -77,7 +77,7 @@ auto Planner::PlanSelectAgg(const SelectStatement &statement, AbstractPlanNodeRe * - That's all! */ - // Create a new context which allows aggregation call. + // Create a new context which allows aggrecation call. auto guard = NewContext(); ctx_.allow_aggregation_ = true; @@ -97,8 +97,6 @@ auto Planner::PlanSelectAgg(const SelectStatement &statement, AbstractPlanNodeRe } // Rewrite all agg call inside expression to a pseudo one. - // It replaces the agg call in select_list_ with a pseudo one with index - // adds the real agg call to context. for (auto &item : statement.select_list_) { AddAggCallToContext(*item); } @@ -128,7 +126,7 @@ auto Planner::PlanSelectAgg(const SelectStatement &statement, AbstractPlanNodeRe agg_types.push_back(agg_type); output_col_names.emplace_back(fmt::format("agg#{}", term_idx)); ctx_.expr_in_agg_.emplace_back( - std::make_unique(0, agg_begin_idx + term_idx, Column("", TypeId::INTEGER))); + std::make_unique(0, agg_begin_idx + term_idx, TypeId::INTEGER)); term_idx += 1; } diff --git a/src/planner/plan_expression.cpp b/src/planner/plan_expression.cpp index a408ce4..4873511 100644 --- a/src/planner/plan_expression.cpp +++ b/src/planner/plan_expression.cpp @@ -1,7 +1,5 @@ #include -#include #include -#include #include "binder/bound_expression.h" #include "binder/bound_statement.h" #include "binder/expressions/bound_agg_call.h" @@ -9,20 +7,16 @@ #include "binder/expressions/bound_binary_op.h" #include "binder/expressions/bound_column_ref.h" #include "binder/expressions/bound_constant.h" -#include "binder/expressions/bound_func_call.h" #include "binder/expressions/bound_unary_op.h" -#include "binder/expressions/bound_window.h" #include "binder/statement/select_statement.h" #include "common/exception.h" #include "common/macros.h" #include "common/util/string_util.h" -#include "execution/expressions/abstract_expression.h" #include "execution/expressions/column_value_expression.h" #include "execution/expressions/constant_value_expression.h" #include "execution/plans/abstract_plan.h" #include "fmt/format.h" #include "planner/planner.h" -#include "type/value_factory.h" namespace bustub { @@ -58,7 +52,7 @@ auto Planner::PlanColumnRef(const BoundColumnRef &expr, const std::vector(0, col_idx, col_type)); } if (children.size() == 2) { @@ -88,11 +82,11 @@ auto Planner::PlanColumnRef(const BoundColumnRef &expr, const std::vector(0, *col_idx_left, col_type)); } if (col_idx_right) { - auto col_type = right_schema.GetColumn(*col_idx_right); + auto col_type = right_schema.GetColumn(*col_idx_right).GetType(); return std::make_tuple(col_name, std::make_shared(1, *col_idx_right, col_type)); } throw bustub::Exception(fmt::format("column name {} not found", col_name)); @@ -125,13 +119,6 @@ void Planner::AddAggCallToContext(BoundExpression &expr) { AddAggCallToContext(*binary_op_expr.rarg_); return; } - case ExpressionType::FUNC_CALL: { - auto &func_call_expr = dynamic_cast(expr); - for (const auto &child : func_call_expr.args_) { - AddAggCallToContext(*child); - } - return; - } case ExpressionType::CONSTANT: { return; } @@ -163,10 +150,6 @@ auto Planner::PlanExpression(const BoundExpression &expr, const std::vector(expr); return std::make_tuple(UNNAMED_COLUMN, PlanBinaryOp(binary_op_expr, children)); } - case ExpressionType::FUNC_CALL: { - const auto &func_call_expr = dynamic_cast(expr); - return std::make_tuple(UNNAMED_COLUMN, PlanFuncCall(func_call_expr, children)); - } case ExpressionType::CONSTANT: { const auto &constant_expr = dynamic_cast(expr); return std::make_tuple(UNNAMED_COLUMN, PlanConstant(constant_expr, children)); @@ -176,9 +159,6 @@ auto Planner::PlanExpression(const BoundExpression &expr, const std::vector -#include -#include "binder/bound_expression.h" -#include "binder/bound_statement.h" -#include "binder/expressions/bound_agg_call.h" -#include "binder/expressions/bound_alias.h" -#include "binder/expressions/bound_binary_op.h" -#include "binder/expressions/bound_column_ref.h" -#include "binder/expressions/bound_constant.h" -#include "binder/expressions/bound_func_call.h" -#include "binder/expressions/bound_unary_op.h" -#include "binder/statement/select_statement.h" -#include "common/exception.h" -#include "common/macros.h" -#include "common/util/string_util.h" -#include "execution/expressions/abstract_expression.h" -#include "execution/expressions/column_value_expression.h" -#include "execution/expressions/constant_value_expression.h" -#include "execution/expressions/string_expression.h" -#include "execution/plans/abstract_plan.h" -#include "fmt/format.h" -#include "planner/planner.h" - -namespace bustub { - -// NOLINTNEXTLINE -auto Planner::GetFuncCallFromFactory(const std::string &func_name, std::vector args) - -> AbstractExpressionRef { - // 1. check if the parsed function name is "lower" or "upper". - // 2. verify the number of args (should be 1), refer to the test cases for when you should throw an `Exception`. - // 3. return a `StringExpression` std::shared_ptr. - throw Exception(fmt::format("func call {} not supported in planner yet", func_name)); -} - -} // namespace bustub diff --git a/src/planner/plan_insert.cpp b/src/planner/plan_insert.cpp index fee4338..83364d4 100644 --- a/src/planner/plan_insert.cpp +++ b/src/planner/plan_insert.cpp @@ -67,7 +67,8 @@ auto Planner::PlanUpdate(const UpdateStatement &statement) -> AbstractPlanNodeRe for (size_t idx = 0; idx < target_exprs.size(); idx++) { if (target_exprs[idx] == nullptr) { - target_exprs[idx] = std::make_shared(0, idx, filter->output_schema_->GetColumn(idx)); + target_exprs[idx] = + std::make_shared(0, idx, filter->output_schema_->GetColumn(idx).GetType()); } } diff --git a/src/planner/plan_select.cpp b/src/planner/plan_select.cpp index c8829ae..f62e64b 100644 --- a/src/planner/plan_select.cpp +++ b/src/planner/plan_select.cpp @@ -58,28 +58,14 @@ auto Planner::PlanSelect(const SelectStatement &statement) -> AbstractPlanNodeRe } bool has_agg = false; - bool has_window_agg = false; - // Binder already checked that normal aggregations and window aggregations cannot coexist. for (const auto &item : statement.select_list_) { if (item->HasAggregation()) { has_agg = true; break; } - if (item->HasWindowFunction()) { - has_window_agg = true; - break; - } } - if (has_window_agg) { - if (!statement.having_->IsInvalid()) { - throw Exception("HAVING on window function is not supported yet."); - } - if (!statement.group_by_.empty()) { - throw Exception("Group by is not allowed to use with window function."); - } - plan = PlanSelectWindow(statement, std::move(plan)); - } else if (!statement.having_->IsInvalid() || !statement.group_by_.empty() || has_agg) { + if (!statement.having_->IsInvalid() || !statement.group_by_.empty() || has_agg) { // Plan aggregation plan = PlanSelectAgg(statement, std::move(plan)); } else { @@ -107,7 +93,7 @@ auto Planner::PlanSelect(const SelectStatement &statement) -> AbstractPlanNodeRe std::vector distinct_exprs; size_t col_idx = 0; for (const auto &col : child->OutputSchema().GetColumns()) { - distinct_exprs.emplace_back(std::make_shared(0, col_idx++, col)); + distinct_exprs.emplace_back(std::make_shared(0, col_idx++, col.GetType())); } plan = std::make_shared(std::make_shared(child->OutputSchema()), child, diff --git a/src/planner/plan_table_ref.cpp b/src/planner/plan_table_ref.cpp index 22f32a5..e4342de 100644 --- a/src/planner/plan_table_ref.cpp +++ b/src/planner/plan_table_ref.cpp @@ -71,7 +71,7 @@ auto Planner::PlanSubquery(const BoundSubqueryRef &table_ref, const std::string // This projection will be removed by eliminate projection rule. It's solely used for renaming columns. for (const auto &col : select_node->OutputSchema().GetColumns()) { - auto expr = std::make_shared(0, idx, col); + auto expr = std::make_shared(0, idx, col.GetType()); output_column_names.emplace_back(fmt::format("{}.{}", alias, fmt::join(table_ref.select_list_name_[idx], "."))); exprs.push_back(std::move(expr)); idx++; @@ -155,7 +155,11 @@ auto Planner::PlanExpressionListRef(const BoundExpressionListRef &table_ref) -> size_t idx = 0; for (const auto &col : first_row) { auto col_name = fmt::format("{}.{}", table_ref.identifier_, idx); - cols.emplace_back(col->GetReturnType().WithColumnName(col_name)); + if (col->GetReturnType() != TypeId::VARCHAR) { + cols.emplace_back(Column(col_name, col->GetReturnType())); + } else { + cols.emplace_back(Column(col_name, col->GetReturnType(), VARCHAR_DEFAULT_LENGTH)); + } idx += 1; } auto schema = std::make_shared(cols); diff --git a/src/planner/plan_window_function.cpp b/src/planner/plan_window_function.cpp deleted file mode 100644 index b949e48..0000000 --- a/src/planner/plan_window_function.cpp +++ /dev/null @@ -1,155 +0,0 @@ -#include -#include - -#include "binder/bound_expression.h" -#include "binder/bound_statement.h" -#include "binder/bound_table_ref.h" -#include "binder/expressions/bound_alias.h" -#include "binder/expressions/bound_window.h" -#include "binder/statement/select_statement.h" -#include "binder/tokens.h" -#include "common/exception.h" -#include "common/macros.h" -#include "common/util/string_util.h" -#include "execution/expressions/abstract_expression.h" -#include "execution/expressions/column_value_expression.h" -#include "execution/expressions/constant_value_expression.h" -#include "execution/plans/abstract_plan.h" -#include "execution/plans/aggregation_plan.h" -#include "execution/plans/filter_plan.h" -#include "execution/plans/projection_plan.h" -#include "execution/plans/window_plan.h" -#include "fmt/format.h" -#include "planner/planner.h" -#include "type/type_id.h" -#include "type/value_factory.h" - -namespace bustub { - -// TODO(chi): clang-tidy on macOS will suggest changing it to const reference. Looks like a bug. - -void CheckOrderByCompatible( - const std::vector>> &order_by_exprs) { - if (order_by_exprs.empty()) { - // either or window functions not having order by clause - return; - } - // or all order by clause are the same - std::vector> first_order_by = order_by_exprs[0]; - for (auto &order_by : order_by_exprs) { - if (order_by.size() != first_order_by.size()) { - throw Exception("order by clause of window functions are not compatible"); - } - for (uint32_t i = 0; i < order_by.size(); i++) { - if (order_by[i].first != first_order_by[i].first) { - throw Exception("order by clause of window functions are not compatible"); - } - if (order_by[i].second->ToString() != first_order_by[i].second->ToString()) { - throw Exception("order by clause of window functions are not compatible"); - } - } - } -} - -/* NOLINTNEXTLINE */ -auto Planner::PlanSelectWindow(const SelectStatement &statement, AbstractPlanNodeRef child) -> AbstractPlanNodeRef { - /* For window function we don't do two passes rewrites like planning normal aggregations. - * Because standard sql does not allow using window function results in where clause, and - * our implementation does not support having on window function. We assume window functions - * only appear in select list, and we can plan them in one pass. - */ - std::vector columns; - std::vector column_names; - std::vector window_func_indexes; - std::vector window_func_types; - std::vector> partition_by_exprs; - std::vector>> order_by_exprs; - std::vector arg_exprs; - - for (uint32_t i = 0; i < statement.select_list_.size(); i++) { - const auto &item = statement.select_list_[i]; - if (!item->HasWindowFunction()) { - // normal select - auto [name, expr] = PlanExpression(*item, {child}); - if (name == UNNAMED_COLUMN) { - name = fmt::format("__unnamed#{}", universal_id_++); - } - columns.emplace_back(std::move(expr)); - column_names.emplace_back(std::move(name)); - continue; - } - - // parse window function - window_func_indexes.push_back(i); - // we assign a -1 here as a placeholder - columns.emplace_back(std::make_shared(0, -1, Column{"", TypeId::INTEGER})); - - const BoundExpression *window_item = nullptr; - if (item->type_ == ExpressionType::ALIAS) { - const auto &alias_expr = dynamic_cast(*item); - column_names.emplace_back(alias_expr.alias_); - window_item = &(*alias_expr.child_); - } else { - BUSTUB_ASSERT(item->type_ == ExpressionType::WINDOW, "Invalid expression type has window function"); - column_names.emplace_back(fmt::format("__unnamed#{}", universal_id_++)); - window_item = &(*item); - } - const auto &window_call = dynamic_cast(*window_item); - if (window_call.start_ != WindowBoundary::UNBOUNDED_PRECEDING || - (window_call.end_ != WindowBoundary::CURRENT_ROW_ROWS && - window_call.end_ != WindowBoundary::CURRENT_ROW_RANGE)) { - throw Exception("Bustub currently only support window function with default window frame settings"); - } - std::vector partition_by; - for (auto &item : window_call.partition_by_) { - auto [_, expr] = PlanExpression(*item, {child}); - partition_by.emplace_back(std::move(expr)); - } - partition_by_exprs.emplace_back(std::move(partition_by)); - - if (window_call.func_name_ == "rank" && window_call.order_bys_.empty()) { - throw Exception("order by clause is mandatory for rank function"); - } - - std::vector> order_by; - for (const auto &item : window_call.order_bys_) { - auto [_, expr] = PlanExpression(*item->expr_, {child}); - auto abstract_expr = std::move(expr); - order_by.emplace_back(item->type_, abstract_expr); - } - order_by_exprs.emplace_back(std::move(order_by)); - - std::vector raw_args; - AbstractExpressionRef clean_arg; - for (const auto &arg : window_call.args_) { - auto [_, ret] = PlanExpression(*arg, {child}); - raw_args.emplace_back(std::move(ret)); - } - auto [window_func_type, clean_args] = GetWindowAggCallFromFactory(window_call.func_name_, std::move(raw_args)); - window_func_types.emplace_back(window_func_type); - if (clean_args.size() > 1) { - throw bustub::NotImplementedException("only agg call of zero/one arg is supported"); - } - if (clean_args.empty()) { - // Rewrite count(*)/row_number into count(1) - clean_arg = std::make_shared(ValueFactory::GetIntegerValue(1)); - } else { - clean_arg = std::move(clean_args[0]); - } - arg_exprs.emplace_back(std::move(clean_arg)); - } - - CheckOrderByCompatible(order_by_exprs); - - // we don't need window_agg_indexes here because we already use placeholders to infer the window agg column type is - // Integer - auto window_output_schema = WindowFunctionPlanNode::InferWindowSchema(columns); - - auto plan = std::make_shared( - std::make_shared(ProjectionPlanNode::RenameSchema(window_output_schema, column_names)), child, - window_func_indexes, columns, partition_by_exprs, order_by_exprs, arg_exprs, window_func_types); - - return plan; -} - -} // namespace bustub diff --git a/src/primer/.clang-tidy b/src/primer/.clang-tidy new file mode 100644 index 0000000..034363c --- /dev/null +++ b/src/primer/.clang-tidy @@ -0,0 +1,10 @@ +--- +Checks: ' + -modernize-use-trailing-return-type, + ' +InheritParentConfig: true + +#### Disabled checks and why: ##### +# +# We didn't introduce this rule for project 0, so we're going to disable it solely for project 0. +# For future semesters, developers should remove this `.clang-tidy` file. diff --git a/src/primer/CMakeLists.txt b/src/primer/CMakeLists.txt index a75d464..65ab67d 100644 --- a/src/primer/CMakeLists.txt +++ b/src/primer/CMakeLists.txt @@ -1,11 +1,8 @@ add_library( bustub_primer OBJECT - orset.cpp - orset_driver.cpp - trie.cpp - trie_store.cpp) + p0_trie.cpp) set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ - PARENT_SCOPE) + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/primer/orset.cpp b/src/primer/orset.cpp deleted file mode 100644 index a926bef..0000000 --- a/src/primer/orset.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "primer/orset.h" -#include -#include -#include -#include "common/exception.h" -#include "fmt/format.h" - -namespace bustub { - -template -auto ORSet::Contains(const T &elem) const -> bool { - // TODO(student): Implement this - throw NotImplementedException("ORSet::Contains is not implemented"); -} - -template -void ORSet::Add(const T &elem, uid_t uid) { - // TODO(student): Implement this - throw NotImplementedException("ORSet::Add is not implemented"); -} - -template -void ORSet::Remove(const T &elem) { - // TODO(student): Implement this - throw NotImplementedException("ORSet::Remove is not implemented"); -} - -template -void ORSet::Merge(const ORSet &other) { - // TODO(student): Implement this - throw NotImplementedException("ORSet::Merge is not implemented"); -} - -template -auto ORSet::Elements() const -> std::vector { - // TODO(student): Implement this - throw NotImplementedException("ORSet::Elements is not implemented"); -} - -template -auto ORSet::ToString() const -> std::string { - auto elements = Elements(); - std::sort(elements.begin(), elements.end()); - return fmt::format("{{{}}}", fmt::join(elements, ", ")); -} - -template class ORSet; -template class ORSet; - -} // namespace bustub diff --git a/src/primer/orset_driver.cpp b/src/primer/orset_driver.cpp deleted file mode 100644 index 7efe853..0000000 --- a/src/primer/orset_driver.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include "primer/orset_driver.h" -#include - -namespace bustub { - -template -void ORSetNode::Load() { - for (size_t i = 0; i < peer_size_; ++i) { - if (i == node_id_) { - continue; - } - uint32_t curr_version = driver_->version_counter_[i]; - if (last_read_version_[i] < curr_version) { - Merge(driver_->saved_copies_[i]); - last_read_version_[i] = curr_version; - } - } -} - -template -void ORSetNode::Save() { - driver_->saved_copies_[node_id_] = orset_; - driver_->version_counter_[node_id_]++; -} - -template -ORSetDriver::ORSetDriver(size_t num_orset_node) : version_counter_(num_orset_node) { - orset_nodes_.reserve(num_orset_node); - for (size_t i = 0; i < num_orset_node; ++i) { - orset_nodes_.emplace_back(std::make_unique>(this, i, num_orset_node)); - version_counter_[i] = 0; - } - saved_copies_.resize(num_orset_node); -} - -template -void ORSetDriver::Sync() { - for (const auto &node : orset_nodes_) { - node->Save(); - } - for (const auto &node : orset_nodes_) { - node->Load(); - } -} - -template class ORSetNode; -template class ORSetNode; -template class ORSetDriver; -template class ORSetDriver; - -} // namespace bustub diff --git a/src/primer/p0_trie.cpp b/src/primer/p0_trie.cpp new file mode 100644 index 0000000..6485be2 --- /dev/null +++ b/src/primer/p0_trie.cpp @@ -0,0 +1,9 @@ +#include "primer/p0_trie.h" + +// This is a placeholder file for clang-tidy check. +// +// With this file, we can fire run_clang_tidy.py to check `p0_trie.h`, +// as it will filter out all header files and won't check header-only code. +// +// This file is not part of the submission. All of the modifications should +// be done in `src/include/primer/p0_trie.h`. diff --git a/src/primer/trie.cpp b/src/primer/trie.cpp deleted file mode 100644 index 5117dc9..0000000 --- a/src/primer/trie.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "primer/trie.h" -#include -#include "common/exception.h" - -namespace bustub { - -template -auto Trie::Get(std::string_view key) const -> const T * { - throw NotImplementedException("Trie::Get is not implemented."); - - // You should walk through the trie to find the node corresponding to the key. If the node doesn't exist, return - // nullptr. After you find the node, you should use `dynamic_cast` to cast it to `const TrieNodeWithValue *`. If - // dynamic_cast returns `nullptr`, it means the type of the value is mismatched, and you should return nullptr. - // Otherwise, return the value. -} - -template -auto Trie::Put(std::string_view key, T value) const -> Trie { - // Note that `T` might be a non-copyable type. Always use `std::move` when creating `shared_ptr` on that value. - throw NotImplementedException("Trie::Put is not implemented."); - - // You should walk through the trie and create new nodes if necessary. If the node corresponding to the key already - // exists, you should create a new `TrieNodeWithValue`. -} - -auto Trie::Remove(std::string_view key) const -> Trie { - throw NotImplementedException("Trie::Remove is not implemented."); - - // You should walk through the trie and remove nodes if necessary. If the node doesn't contain a value any more, - // you should convert it to `TrieNode`. If a node doesn't have children any more, you should remove it. -} - -// Below are explicit instantiation of template functions. -// -// Generally people would write the implementation of template classes and functions in the header file. However, we -// separate the implementation into a .cpp file to make things clearer. In order to make the compiler know the -// implementation of the template functions, we need to explicitly instantiate them here, so that they can be picked up -// by the linker. - -template auto Trie::Put(std::string_view key, uint32_t value) const -> Trie; -template auto Trie::Get(std::string_view key) const -> const uint32_t *; - -template auto Trie::Put(std::string_view key, uint64_t value) const -> Trie; -template auto Trie::Get(std::string_view key) const -> const uint64_t *; - -template auto Trie::Put(std::string_view key, std::string value) const -> Trie; -template auto Trie::Get(std::string_view key) const -> const std::string *; - -// If your solution cannot compile for non-copy tests, you can remove the below lines to get partial score. - -using Integer = std::unique_ptr; - -template auto Trie::Put(std::string_view key, Integer value) const -> Trie; -template auto Trie::Get(std::string_view key) const -> const Integer *; - -template auto Trie::Put(std::string_view key, MoveBlocked value) const -> Trie; -template auto Trie::Get(std::string_view key) const -> const MoveBlocked *; - -} // namespace bustub diff --git a/src/primer/trie_store.cpp b/src/primer/trie_store.cpp deleted file mode 100644 index 220d41c..0000000 --- a/src/primer/trie_store.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "primer/trie_store.h" -#include "common/exception.h" - -namespace bustub { - -template -auto TrieStore::Get(std::string_view key) -> std::optional> { - // Pseudo-code: - // (1) Take the root lock, get the root, and release the root lock. Don't lookup the value in the - // trie while holding the root lock. - // (2) Lookup the value in the trie. - // (3) If the value is found, return a ValueGuard object that holds a reference to the value and the - // root. Otherwise, return std::nullopt. - throw NotImplementedException("TrieStore::Get is not implemented."); -} - -template -void TrieStore::Put(std::string_view key, T value) { - // You will need to ensure there is only one writer at a time. Think of how you can achieve this. - // The logic should be somehow similar to `TrieStore::Get`. - throw NotImplementedException("TrieStore::Put is not implemented."); -} - -void TrieStore::Remove(std::string_view key) { - // You will need to ensure there is only one writer at a time. Think of how you can achieve this. - // The logic should be somehow similar to `TrieStore::Get`. - throw NotImplementedException("TrieStore::Remove is not implemented."); -} - -// Below are explicit instantiation of template functions. - -template auto TrieStore::Get(std::string_view key) -> std::optional>; -template void TrieStore::Put(std::string_view key, uint32_t value); - -template auto TrieStore::Get(std::string_view key) -> std::optional>; -template void TrieStore::Put(std::string_view key, std::string value); - -// If your solution cannot compile for non-copy tests, you can remove the below lines to get partial score. - -using Integer = std::unique_ptr; - -template auto TrieStore::Get(std::string_view key) -> std::optional>; -template void TrieStore::Put(std::string_view key, Integer value); - -template auto TrieStore::Get(std::string_view key) -> std::optional>; -template void TrieStore::Put(std::string_view key, MoveBlocked value); - -} // namespace bustub diff --git a/src/recovery/CMakeLists.txt b/src/recovery/CMakeLists.txt index 2454c91..92dd070 100644 --- a/src/recovery/CMakeLists.txt +++ b/src/recovery/CMakeLists.txt @@ -1,8 +1,10 @@ add_library( bustub_recovery OBJECT - log_manager.cpp) + checkpoint_manager.cpp + log_manager.cpp + log_recovery.cpp) set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ - PARENT_SCOPE) + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/recovery/log_recovery.cpp b/src/recovery/log_recovery.cpp new file mode 100644 index 0000000..99dbd96 --- /dev/null +++ b/src/recovery/log_recovery.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// BusTub +// +// log_recovery.cpp +// +// Identification: src/recovery/log_recovery.cpp +// +// Copyright (c) 2015-2019, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "recovery/log_recovery.h" + +#include "storage/page/table_page.h" + +namespace bustub { +/* + * deserialize a log record from log buffer + * @return: true means deserialize succeed, otherwise can't deserialize cause + * incomplete log record + */ +auto LogRecovery::DeserializeLogRecord(const char *data, LogRecord *log_record) -> bool { return false; } + +/* + *redo phase on TABLE PAGE level(table/table_page.h) + *read log file from the beginning to end (you must prefetch log records into + *log buffer to reduce unnecessary I/O operations), remember to compare page's + *LSN with log_record's sequence number, and also build active_txn_ table & + *lsn_mapping_ table + */ +void LogRecovery::Redo() {} + +/* + *undo phase on TABLE PAGE level(table/table_page.h) + *iterate through active txn map and undo each operation + */ +void LogRecovery::Undo() {} + +} // namespace bustub diff --git a/src/storage/disk/CMakeLists.txt b/src/storage/disk/CMakeLists.txt index 15f5c33..84eb319 100644 --- a/src/storage/disk/CMakeLists.txt +++ b/src/storage/disk/CMakeLists.txt @@ -2,8 +2,7 @@ add_library( bustub_storage_disk OBJECT disk_manager.cpp - disk_manager_memory.cpp - disk_scheduler.cpp) + disk_manager_memory.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/storage/disk/disk_scheduler.cpp b/src/storage/disk/disk_scheduler.cpp deleted file mode 100644 index 9a50397..0000000 --- a/src/storage/disk/disk_scheduler.cpp +++ /dev/null @@ -1,41 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// disk_scheduler.cpp -// -// Identification: src/storage/disk/disk_scheduler.cpp -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "storage/disk/disk_scheduler.h" -#include "common/exception.h" -#include "storage/disk/disk_manager.h" - -namespace bustub { - -DiskScheduler::DiskScheduler(DiskManager *disk_manager) : disk_manager_(disk_manager) { - // TODO(P1): remove this line after you have implemented the disk scheduler API - throw NotImplementedException( - "DiskScheduler is not implemented yet. If you have finished implementing the disk scheduler, please remove the " - "throw exception line in `disk_scheduler.cpp`."); - - // Spawn the background thread - background_thread_.emplace([&] { StartWorkerThread(); }); -} - -DiskScheduler::~DiskScheduler() { - // Put a `std::nullopt` in the queue to signal to exit the loop - request_queue_.Put(std::nullopt); - if (background_thread_.has_value()) { - background_thread_->join(); - } -} - -void DiskScheduler::Schedule(DiskRequest r) {} - -void DiskScheduler::StartWorkerThread() {} - -} // namespace bustub diff --git a/src/storage/index/b_plus_tree.cpp b/src/storage/index/b_plus_tree.cpp index d4f09b7..b4a50b8 100644 --- a/src/storage/index/b_plus_tree.cpp +++ b/src/storage/index/b_plus_tree.cpp @@ -1,33 +1,27 @@ -#include #include #include "common/exception.h" #include "common/logger.h" #include "common/rid.h" #include "storage/index/b_plus_tree.h" +#include "storage/page/header_page.h" namespace bustub { - INDEX_TEMPLATE_ARGUMENTS -BPLUSTREE_TYPE::BPlusTree(std::string name, page_id_t header_page_id, BufferPoolManager *buffer_pool_manager, - const KeyComparator &comparator, int leaf_max_size, int internal_max_size) +BPLUSTREE_TYPE::BPlusTree(std::string name, BufferPoolManager *buffer_pool_manager, const KeyComparator &comparator, + int leaf_max_size, int internal_max_size) : index_name_(std::move(name)), - bpm_(buffer_pool_manager), - comparator_(std::move(comparator)), + root_page_id_(INVALID_PAGE_ID), + buffer_pool_manager_(buffer_pool_manager), + comparator_(comparator), leaf_max_size_(leaf_max_size), - internal_max_size_(internal_max_size), - header_page_id_(header_page_id) { - WritePageGuard guard = bpm_->FetchPageWrite(header_page_id_); - auto root_page = guard.AsMut(); - root_page->root_page_id_ = INVALID_PAGE_ID; -} + internal_max_size_(internal_max_size) {} /* * Helper function to decide whether current b+tree is empty */ INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::IsEmpty() const -> bool { return true; } - +auto BPLUSTREE_TYPE::IsEmpty() const -> bool { return root_page_id_ == INVALID_PAGE_ID; } /***************************************************************************** * SEARCH *****************************************************************************/ @@ -37,11 +31,23 @@ auto BPLUSTREE_TYPE::IsEmpty() const -> bool { return true; } * @return : true means key exists */ INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::GetValue(const KeyType &key, std::vector *result, Transaction *txn) -> bool { - // Declaration of context instance. - Context ctx; - (void)ctx; - return false; +auto BPLUSTREE_TYPE::GetValue(const KeyType &key, std::vector *result, Transaction *transaction) -> bool { + root_page_id_latch_.RLock(); + auto leaf_page = FindLeaf(key, Operation::SEARCH, transaction); + auto *node = reinterpret_cast(leaf_page->GetData()); + + ValueType v; + auto existed = node->Lookup(key, &v, comparator_); + + leaf_page->RUnlatch(); + buffer_pool_manager_->UnpinPage(leaf_page->GetPageId(), false); + + if (!existed) { + return false; + } + + result->push_back(v); + return true; } /***************************************************************************** @@ -55,11 +61,149 @@ auto BPLUSTREE_TYPE::GetValue(const KeyType &key, std::vector *result * keys return false, otherwise return true. */ INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::Insert(const KeyType &key, const ValueType &value, Transaction *txn) -> bool { - // Declaration of context instance. - Context ctx; - (void)ctx; - return false; +auto BPLUSTREE_TYPE::Insert(const KeyType &key, const ValueType &value, Transaction *transaction) -> bool { + root_page_id_latch_.WLock(); + transaction->AddIntoPageSet(nullptr); // nullptr means root_page_id_latch_ + if (IsEmpty()) { + StartNewTree(key, value); + ReleaseLatchFromQueue(transaction); + return true; + } + return InsertIntoLeaf(key, value, transaction); +} + +INDEX_TEMPLATE_ARGUMENTS +void BPLUSTREE_TYPE::StartNewTree(const KeyType &key, const ValueType &value) { + auto page = buffer_pool_manager_->NewPage(&root_page_id_); + + if (page == nullptr) { + throw Exception(ExceptionType::OUT_OF_MEMORY, "Cannot allocate new page"); + } + + auto *leaf = reinterpret_cast(page->GetData()); + leaf->Init(root_page_id_, INVALID_PAGE_ID, leaf_max_size_); + + leaf->Insert(key, value, comparator_); + + buffer_pool_manager_->UnpinPage(page->GetPageId(), true); + + // UpdateRootPageId(1); +} + +INDEX_TEMPLATE_ARGUMENTS +auto BPLUSTREE_TYPE::InsertIntoLeaf(const KeyType &key, const ValueType &value, Transaction *transaction) -> bool { + auto leaf_page = FindLeaf(key, Operation::INSERT, transaction); + auto *node = reinterpret_cast(leaf_page->GetData()); + + auto size = node->GetSize(); + auto new_size = node->Insert(key, value, comparator_); + + // duplicate key + if (new_size == size) { + ReleaseLatchFromQueue(transaction); + leaf_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(leaf_page->GetPageId(), false); + return false; + } + + // leaf is not full + if (new_size < leaf_max_size_) { + ReleaseLatchFromQueue(transaction); + leaf_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(leaf_page->GetPageId(), true); + return true; + } + + // leaf is full, need to split + auto sibling_leaf_node = Split(node); + sibling_leaf_node->SetNextPageId(node->GetNextPageId()); + node->SetNextPageId(sibling_leaf_node->GetPageId()); + + auto risen_key = sibling_leaf_node->KeyAt(0); + InsertIntoParent(node, risen_key, sibling_leaf_node, transaction); + + leaf_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(leaf_page->GetPageId(), true); + buffer_pool_manager_->UnpinPage(sibling_leaf_node->GetPageId(), true); + return true; +} + +INDEX_TEMPLATE_ARGUMENTS +template +auto BPLUSTREE_TYPE::Split(N *node) -> N * { + page_id_t page_id; + auto page = buffer_pool_manager_->NewPage(&page_id); + + if (page == nullptr) { + throw Exception(ExceptionType::OUT_OF_MEMORY, "Cannot allocate new page"); + } + + N *new_node = reinterpret_cast(page->GetData()); + new_node->SetPageType(node->GetPageType()); + + if (node->IsLeafPage()) { + auto *leaf = reinterpret_cast(node); + auto *new_leaf = reinterpret_cast(new_node); + + new_leaf->Init(page->GetPageId(), node->GetParentPageId(), leaf_max_size_); + leaf->MoveHalfTo(new_leaf); + } else { + auto *internal = reinterpret_cast(node); + auto *new_internal = reinterpret_cast(new_node); + + new_internal->Init(page->GetPageId(), node->GetParentPageId(), internal_max_size_); + internal->MoveHalfTo(new_internal, buffer_pool_manager_); + } + + return new_node; +} + +INDEX_TEMPLATE_ARGUMENTS +void BPLUSTREE_TYPE::InsertIntoParent(BPlusTreePage *old_node, const KeyType &key, BPlusTreePage *new_node, + Transaction *transaction) { + if (old_node->IsRootPage()) { + auto page = buffer_pool_manager_->NewPage(&root_page_id_); + + if (page == nullptr) { + throw Exception(ExceptionType::OUT_OF_MEMORY, "Cannot allocate new page"); + } + + auto *new_root = reinterpret_cast(page->GetData()); + new_root->Init(root_page_id_, INVALID_PAGE_ID, internal_max_size_); + + new_root->PopulateNewRoot(old_node->GetPageId(), key, new_node->GetPageId()); + + old_node->SetParentPageId(new_root->GetPageId()); + new_node->SetParentPageId(new_root->GetPageId()); + + buffer_pool_manager_->UnpinPage(page->GetPageId(), true); + + UpdateRootPageId(0); + + ReleaseLatchFromQueue(transaction); + return; + } + auto parent_page = buffer_pool_manager_->FetchPage(old_node->GetParentPageId()); + auto *parent_node = reinterpret_cast(parent_page->GetData()); + + if (parent_node->GetSize() < internal_max_size_) { + parent_node->InsertNodeAfter(old_node->GetPageId(), key, new_node->GetPageId()); + ReleaseLatchFromQueue(transaction); + buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true); + return; + } + auto *mem = new char[INTERNAL_PAGE_HEADER_SIZE + sizeof(MappingType) * (parent_node->GetSize() + 1)]; + auto *copy_parent_node = reinterpret_cast(mem); + std::memcpy(mem, parent_page->GetData(), INTERNAL_PAGE_HEADER_SIZE + sizeof(MappingType) * (parent_node->GetSize())); + copy_parent_node->InsertNodeAfter(old_node->GetPageId(), key, new_node->GetPageId()); + auto parent_new_sibling_node = Split(copy_parent_node); + KeyType new_key = parent_new_sibling_node->KeyAt(0); + std::memcpy(parent_page->GetData(), mem, + INTERNAL_PAGE_HEADER_SIZE + sizeof(MappingType) * copy_parent_node->GetMinSize()); + InsertIntoParent(parent_node, new_key, parent_new_sibling_node, transaction); + buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true); + buffer_pool_manager_->UnpinPage(parent_new_sibling_node->GetPageId(), true); + delete[] mem; } /***************************************************************************** @@ -67,28 +211,213 @@ auto BPLUSTREE_TYPE::Insert(const KeyType &key, const ValueType &value, Transact *****************************************************************************/ /* * Delete key & value pair associated with input key - * If current tree is empty, return immediately. + * If current tree is empty, return immdiately. * If not, User needs to first find the right leaf page as deletion target, then * delete entry from leaf page. Remember to deal with redistribute or merge if * necessary. */ INDEX_TEMPLATE_ARGUMENTS -void BPLUSTREE_TYPE::Remove(const KeyType &key, Transaction *txn) { - // Declaration of context instance. - Context ctx; - (void)ctx; +void BPLUSTREE_TYPE::Remove(const KeyType &key, Transaction *transaction) { + root_page_id_latch_.WLock(); + transaction->AddIntoPageSet(nullptr); // nullptr means root_page_id_latch_ + + if (IsEmpty()) { + ReleaseLatchFromQueue(transaction); + return; + } + + auto leaf_page = FindLeaf(key, Operation::DELETE, transaction); + auto *node = reinterpret_cast(leaf_page->GetData()); + + if (node->GetSize() == node->RemoveAndDeleteRecord(key, comparator_)) { + ReleaseLatchFromQueue(transaction); + leaf_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(leaf_page->GetPageId(), false); + return; + } + + auto node_should_delete = CoalesceOrRedistribute(node, transaction); + leaf_page->WUnlatch(); + + if (node_should_delete) { + transaction->AddIntoDeletedPageSet(node->GetPageId()); + } + + buffer_pool_manager_->UnpinPage(leaf_page->GetPageId(), true); + + std::for_each(transaction->GetDeletedPageSet()->begin(), transaction->GetDeletedPageSet()->end(), + [&bpm = buffer_pool_manager_](const page_id_t page_id) { bpm->DeletePage(page_id); }); + transaction->GetDeletedPageSet()->clear(); +} + +INDEX_TEMPLATE_ARGUMENTS +template +auto BPLUSTREE_TYPE::CoalesceOrRedistribute(N *node, Transaction *transaction) -> bool { + if (node->IsRootPage()) { + auto root_should_delete = AdjustRoot(node); + ReleaseLatchFromQueue(transaction); + return root_should_delete; + } + + if (node->GetSize() >= node->GetMinSize()) { + ReleaseLatchFromQueue(transaction); + return false; + } + + auto parent_page = buffer_pool_manager_->FetchPage(node->GetParentPageId()); + auto *parent_node = reinterpret_cast(parent_page->GetData()); + auto idx = parent_node->ValueIndex(node->GetPageId()); + + if (idx > 0) { + auto sibling_page = buffer_pool_manager_->FetchPage(parent_node->ValueAt(idx - 1)); + sibling_page->WLatch(); + N *sibling_node = reinterpret_cast(sibling_page->GetData()); + + if (sibling_node->GetSize() > sibling_node->GetMinSize()) { + Redistribute(sibling_node, node, parent_node, idx, true); + + ReleaseLatchFromQueue(transaction); + + buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true); + sibling_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(sibling_page->GetPageId(), true); + return false; + } + + // coalesce + auto parent_node_should_delete = Coalesce(sibling_node, node, parent_node, idx, transaction); + + if (parent_node_should_delete) { + transaction->AddIntoDeletedPageSet(parent_node->GetPageId()); + } + buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true); + sibling_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(sibling_page->GetPageId(), true); + return true; + } + + if (idx != parent_node->GetSize() - 1) { + auto sibling_page = buffer_pool_manager_->FetchPage(parent_node->ValueAt(idx + 1)); + sibling_page->WLatch(); + N *sibling_node = reinterpret_cast(sibling_page->GetData()); + + if (sibling_node->GetSize() > sibling_node->GetMinSize()) { + Redistribute(sibling_node, node, parent_node, idx, false); + + ReleaseLatchFromQueue(transaction); + + buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true); + sibling_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(sibling_page->GetPageId(), true); + return false; + } + // coalesce + auto sibling_idx = parent_node->ValueIndex(sibling_node->GetPageId()); + auto parent_node_should_delete = Coalesce(node, sibling_node, parent_node, sibling_idx, transaction); // NOLINT + transaction->AddIntoDeletedPageSet(sibling_node->GetPageId()); + if (parent_node_should_delete) { + transaction->AddIntoDeletedPageSet(parent_node->GetPageId()); + } + buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true); + sibling_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(sibling_page->GetPageId(), true); + return false; + } + + return false; +} + +INDEX_TEMPLATE_ARGUMENTS +template +auto BPLUSTREE_TYPE::Coalesce(N *neighbor_node, N *node, + BPlusTreeInternalPage *parent, int index, + Transaction *transaction) -> bool { + auto middle_key = parent->KeyAt(index); + + if (node->IsLeafPage()) { + auto *leaf_node = reinterpret_cast(node); + auto *prev_leaf_node = reinterpret_cast(neighbor_node); + leaf_node->MoveAllTo(prev_leaf_node); + } else { + auto *internal_node = reinterpret_cast(node); + auto *prev_internal_node = reinterpret_cast(neighbor_node); + internal_node->MoveAllTo(prev_internal_node, middle_key, buffer_pool_manager_); + } + + parent->Remove(index); + + return CoalesceOrRedistribute(parent, transaction); +} + +INDEX_TEMPLATE_ARGUMENTS +template +void BPLUSTREE_TYPE::Redistribute(N *neighbor_node, N *node, + BPlusTreeInternalPage *parent, int index, + bool from_prev) { + if (node->IsLeafPage()) { + auto *leaf_node = reinterpret_cast(node); + auto *neighbor_leaf_node = reinterpret_cast(neighbor_node); + + if (!from_prev) { + neighbor_leaf_node->MoveFirstToEndOf(leaf_node); + parent->SetKeyAt(index + 1, neighbor_leaf_node->KeyAt(0)); + } else { + neighbor_leaf_node->MoveLastToFrontOf(leaf_node); + parent->SetKeyAt(index, leaf_node->KeyAt(0)); + } + } else { + auto *internal_node = reinterpret_cast(node); + auto *neighbor_internal_node = reinterpret_cast(neighbor_node); + + if (!from_prev) { + neighbor_internal_node->MoveFirstToEndOf(internal_node, parent->KeyAt(index + 1), buffer_pool_manager_); + parent->SetKeyAt(index + 1, neighbor_internal_node->KeyAt(0)); + } else { + neighbor_internal_node->MoveLastToFrontOf(internal_node, parent->KeyAt(index), buffer_pool_manager_); + parent->SetKeyAt(index, internal_node->KeyAt(0)); + } + } } +INDEX_TEMPLATE_ARGUMENTS +auto BPLUSTREE_TYPE::AdjustRoot(BPlusTreePage *old_root_node) -> bool { + if (!old_root_node->IsLeafPage() && old_root_node->GetSize() == 1) { + auto *root_node = reinterpret_cast(old_root_node); + auto only_child_page = buffer_pool_manager_->FetchPage(root_node->ValueAt(0)); + auto *only_child_node = reinterpret_cast(only_child_page->GetData()); + only_child_node->SetParentPageId(INVALID_PAGE_ID); + + root_page_id_ = only_child_node->GetPageId(); + + UpdateRootPageId(0); + + buffer_pool_manager_->UnpinPage(only_child_page->GetPageId(), true); + return true; + } + + if (old_root_node->IsLeafPage() && old_root_node->GetSize() == 0) { + root_page_id_ = INVALID_PAGE_ID; + return true; + } + return false; +} /***************************************************************************** * INDEX ITERATOR *****************************************************************************/ /* - * Input parameter is void, find the leftmost leaf page first, then construct + * Input parameter is void, find the leaftmost leaf page first, then construct * index iterator * @return : index iterator */ INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::Begin() -> INDEXITERATOR_TYPE { return INDEXITERATOR_TYPE(); } +auto BPLUSTREE_TYPE::Begin() -> INDEXITERATOR_TYPE { + if (root_page_id_ == INVALID_PAGE_ID) { + return INDEXITERATOR_TYPE(nullptr, nullptr); + } + root_page_id_latch_.RLock(); + auto leftmost_page = FindLeaf(KeyType(), Operation::SEARCH, nullptr, true); + return INDEXITERATOR_TYPE(buffer_pool_manager_, leftmost_page, 0); +} /* * Input parameter is low key, find the leaf page that contains the input key @@ -96,7 +425,16 @@ auto BPLUSTREE_TYPE::Begin() -> INDEXITERATOR_TYPE { return INDEXITERATOR_TYPE() * @return : index iterator */ INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::Begin(const KeyType &key) -> INDEXITERATOR_TYPE { return INDEXITERATOR_TYPE(); } +auto BPLUSTREE_TYPE::Begin(const KeyType &key) -> INDEXITERATOR_TYPE { + if (root_page_id_ == INVALID_PAGE_ID) { + return INDEXITERATOR_TYPE(nullptr, nullptr); + } + root_page_id_latch_.RLock(); + auto leaf_page = FindLeaf(key, Operation::SEARCH); + auto *leaf_node = reinterpret_cast(leaf_page->GetData()); + auto idx = leaf_node->KeyIndex(key, comparator_); + return INDEXITERATOR_TYPE(buffer_pool_manager_, leaf_page, idx); +} /* * Input parameter is void, construct an index iterator representing the end @@ -104,117 +442,166 @@ auto BPLUSTREE_TYPE::Begin(const KeyType &key) -> INDEXITERATOR_TYPE { return IN * @return : index iterator */ INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::End() -> INDEXITERATOR_TYPE { return INDEXITERATOR_TYPE(); } +auto BPLUSTREE_TYPE::End() -> INDEXITERATOR_TYPE { + if (root_page_id_ == INVALID_PAGE_ID) { + return INDEXITERATOR_TYPE(nullptr, nullptr); + } + root_page_id_latch_.RLock(); + auto rightmost_page = FindLeaf(KeyType(), Operation::SEARCH, nullptr, false, true); + auto *leaf_node = reinterpret_cast(rightmost_page->GetData()); + return INDEXITERATOR_TYPE(buffer_pool_manager_, rightmost_page, leaf_node->GetSize()); +} + +INDEX_TEMPLATE_ARGUMENTS +auto BPLUSTREE_TYPE::FindLeaf(const KeyType &key, Operation operation, Transaction *transaction, bool leftMost, + bool rightMost) -> Page * { + assert(operation == Operation::SEARCH ? !(leftMost && rightMost) : transaction != nullptr); + + assert(root_page_id_ != INVALID_PAGE_ID); + auto page = buffer_pool_manager_->FetchPage(root_page_id_); + auto *node = reinterpret_cast(page->GetData()); + if (operation == Operation::SEARCH) { + root_page_id_latch_.RUnlock(); + page->RLatch(); + } else { + page->WLatch(); + if (operation == Operation::DELETE && node->GetSize() > 2) { + ReleaseLatchFromQueue(transaction); + } + if (operation == Operation::INSERT && node->IsLeafPage() && node->GetSize() < node->GetMaxSize() - 1) { + ReleaseLatchFromQueue(transaction); + } + if (operation == Operation::INSERT && !node->IsLeafPage() && node->GetSize() < node->GetMaxSize()) { + ReleaseLatchFromQueue(transaction); + } + } + + while (!node->IsLeafPage()) { + auto *i_node = reinterpret_cast(node); + + page_id_t child_node_page_id; + if (leftMost) { + child_node_page_id = i_node->ValueAt(0); + } else if (rightMost) { + child_node_page_id = i_node->ValueAt(i_node->GetSize() - 1); + } else { + child_node_page_id = i_node->Lookup(key, comparator_); + } + assert(child_node_page_id > 0); + + auto child_page = buffer_pool_manager_->FetchPage(child_node_page_id); + auto child_node = reinterpret_cast(child_page->GetData()); + + if (operation == Operation::SEARCH) { + child_page->RLatch(); + page->RUnlatch(); + buffer_pool_manager_->UnpinPage(page->GetPageId(), false); + } else if (operation == Operation::INSERT) { + child_page->WLatch(); + transaction->AddIntoPageSet(page); + + // child node is safe, release all locks on ancestors + if (child_node->IsLeafPage() && child_node->GetSize() < child_node->GetMaxSize() - 1) { + ReleaseLatchFromQueue(transaction); + } + if (!child_node->IsLeafPage() && child_node->GetSize() < child_node->GetMaxSize()) { + ReleaseLatchFromQueue(transaction); + } + } else if (operation == Operation::DELETE) { + child_page->WLatch(); + transaction->AddIntoPageSet(page); + + // child node is safe, release all locks on ancestors + if (child_node->GetSize() > child_node->GetMinSize()) { + ReleaseLatchFromQueue(transaction); + } + } + + page = child_page; + node = child_node; + } + + return page; +} + +INDEX_TEMPLATE_ARGUMENTS +void BPLUSTREE_TYPE::ReleaseLatchFromQueue(Transaction *transaction) { + while (!transaction->GetPageSet()->empty()) { + Page *page = transaction->GetPageSet()->front(); + transaction->GetPageSet()->pop_front(); + if (page == nullptr) { + this->root_page_id_latch_.WUnlock(); + } else { + page->WUnlatch(); + buffer_pool_manager_->UnpinPage(page->GetPageId(), false); + } + } +} /** * @return Page id of the root of this tree */ INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::GetRootPageId() -> page_id_t { return 0; } +auto BPLUSTREE_TYPE::GetRootPageId() -> page_id_t { + root_page_id_latch_.RLock(); + root_page_id_latch_.RUnlock(); + return root_page_id_; +} /***************************************************************************** * UTILITIES AND DEBUG *****************************************************************************/ - /* - * This method is used for test only - * Read data from file and insert one by one + * Update/Insert root page id in header page(where page_id = 0, header_page is + * defined under include/page/header_page.h) + * Call this method everytime root page id is changed. + * @parameter: insert_record defualt value is false. When set to true, + * insert a record into header page instead of + * updating it. */ INDEX_TEMPLATE_ARGUMENTS -void BPLUSTREE_TYPE::InsertFromFile(const std::string &file_name, Transaction *txn) { - int64_t key; - std::ifstream input(file_name); - while (input >> key) { - KeyType index_key; - index_key.SetFromInteger(key); - RID rid(key); - Insert(index_key, rid, txn); +void BPLUSTREE_TYPE::UpdateRootPageId(int insert_record) { + auto *header_page = static_cast(buffer_pool_manager_->FetchPage(HEADER_PAGE_ID)); + if (insert_record != 0) { + // create a new record in header_page + header_page->InsertRecord(index_name_, root_page_id_); + } else { + // update root_page_id in header_page + header_page->UpdateRecord(index_name_, root_page_id_); } + buffer_pool_manager_->UnpinPage(HEADER_PAGE_ID, true); } + /* * This method is used for test only - * Read data from file and remove one by one + * Read data from file and insert one by one */ INDEX_TEMPLATE_ARGUMENTS -void BPLUSTREE_TYPE::RemoveFromFile(const std::string &file_name, Transaction *txn) { +void BPLUSTREE_TYPE::InsertFromFile(const std::string &file_name, Transaction *transaction) { int64_t key; std::ifstream input(file_name); - while (input >> key) { + while (input) { + input >> key; + KeyType index_key; index_key.SetFromInteger(key); - Remove(index_key, txn); + RID rid(key); + Insert(index_key, rid, transaction); } } - /* * This method is used for test only - * Read data from file and insert/remove one by one + * Read data from file and remove one by one */ INDEX_TEMPLATE_ARGUMENTS -void BPLUSTREE_TYPE::BatchOpsFromFile(const std::string &file_name, Transaction *txn) { +void BPLUSTREE_TYPE::RemoveFromFile(const std::string &file_name, Transaction *transaction) { int64_t key; - char instruction; std::ifstream input(file_name); while (input) { - input >> instruction >> key; - RID rid(key); + input >> key; KeyType index_key; index_key.SetFromInteger(key); - switch (instruction) { - case 'i': - Insert(index_key, rid, txn); - break; - case 'd': - Remove(index_key, txn); - break; - default: - break; - } - } -} - -INDEX_TEMPLATE_ARGUMENTS -void BPLUSTREE_TYPE::Print(BufferPoolManager *bpm) { - auto root_page_id = GetRootPageId(); - auto guard = bpm->FetchPageBasic(root_page_id); - PrintTree(guard.PageId(), guard.template As()); -} - -INDEX_TEMPLATE_ARGUMENTS -void BPLUSTREE_TYPE::PrintTree(page_id_t page_id, const BPlusTreePage *page) { - if (page->IsLeafPage()) { - auto *leaf = reinterpret_cast(page); - std::cout << "Leaf Page: " << page_id << "\tNext: " << leaf->GetNextPageId() << std::endl; - - // Print the contents of the leaf page. - std::cout << "Contents: "; - for (int i = 0; i < leaf->GetSize(); i++) { - std::cout << leaf->KeyAt(i); - if ((i + 1) < leaf->GetSize()) { - std::cout << ", "; - } - } - std::cout << std::endl; - std::cout << std::endl; - - } else { - auto *internal = reinterpret_cast(page); - std::cout << "Internal Page: " << page_id << std::endl; - - // Print the contents of the internal page. - std::cout << "Contents: "; - for (int i = 0; i < internal->GetSize(); i++) { - std::cout << internal->KeyAt(i) << ": " << internal->ValueAt(i); - if ((i + 1) < internal->GetSize()) { - std::cout << ", "; - } - } - std::cout << std::endl; - std::cout << std::endl; - for (int i = 0; i < internal->GetSize(); i++) { - auto guard = bpm_->FetchPageBasic(internal->ValueAt(i)); - PrintTree(guard.PageId(), guard.template As()); - } + Remove(index_key, transaction); } } @@ -224,16 +611,14 @@ void BPLUSTREE_TYPE::PrintTree(page_id_t page_id, const BPlusTreePage *page) { INDEX_TEMPLATE_ARGUMENTS void BPLUSTREE_TYPE::Draw(BufferPoolManager *bpm, const std::string &outf) { if (IsEmpty()) { - LOG_WARN("Drawing an empty tree"); + LOG_WARN("Draw an empty tree"); return; } - std::ofstream out(outf); out << "digraph G {" << std::endl; - auto root_page_id = GetRootPageId(); - auto guard = bpm->FetchPageBasic(root_page_id); - ToGraph(guard.PageId(), guard.template As(), out); + ToGraph(reinterpret_cast(bpm->FetchPage(root_page_id_)->GetData()), bpm, out); out << "}" << std::endl; + out.flush(); out.close(); } @@ -241,19 +626,37 @@ void BPLUSTREE_TYPE::Draw(BufferPoolManager *bpm, const std::string &outf) { * This method is used for debug only, You don't need to modify */ INDEX_TEMPLATE_ARGUMENTS -void BPLUSTREE_TYPE::ToGraph(page_id_t page_id, const BPlusTreePage *page, std::ofstream &out) { +void BPLUSTREE_TYPE::Print(BufferPoolManager *bpm) { + if (IsEmpty()) { + LOG_WARN("Print an empty tree"); + return; + } + ToString(reinterpret_cast(bpm->FetchPage(root_page_id_)->GetData()), bpm); +} + +/** + * This method is used for debug only, You don't need to modify + * @tparam KeyType + * @tparam ValueType + * @tparam KeyComparator + * @param page + * @param bpm + * @param out + */ +INDEX_TEMPLATE_ARGUMENTS +void BPLUSTREE_TYPE::ToGraph(BPlusTreePage *page, BufferPoolManager *bpm, std::ofstream &out) const { std::string leaf_prefix("LEAF_"); std::string internal_prefix("INT_"); if (page->IsLeafPage()) { - auto *leaf = reinterpret_cast(page); + auto *leaf = reinterpret_cast(page); // Print node name - out << leaf_prefix << page_id; + out << leaf_prefix << leaf->GetPageId(); // Print node properties out << "[shape=plain color=green "; // Print data of the node out << "label=<
\n"; // Print data - out << "\n"; + out << "\n"; out << "\n"; @@ -266,19 +669,25 @@ void BPLUSTREE_TYPE::ToGraph(page_id_t page_id, const BPlusTreePage *page, std:: out << "
GetSize() << "\">P=" << page_id << "
GetSize() << "\">P=" << leaf->GetPageId() << "
GetSize() << "\">" << "max_size=" << leaf->GetMaxSize() << ",min_size=" << leaf->GetMinSize() << ",size=" << leaf->GetSize() << "
>];\n"; // Print Leaf node link if there is a next page if (leaf->GetNextPageId() != INVALID_PAGE_ID) { - out << leaf_prefix << page_id << " -> " << leaf_prefix << leaf->GetNextPageId() << ";\n"; - out << "{rank=same " << leaf_prefix << page_id << " " << leaf_prefix << leaf->GetNextPageId() << "};\n"; + out << leaf_prefix << leaf->GetPageId() << " -> " << leaf_prefix << leaf->GetNextPageId() << ";\n"; + out << "{rank=same " << leaf_prefix << leaf->GetPageId() << " " << leaf_prefix << leaf->GetNextPageId() << "};\n"; + } + + // Print parent links if there is a parent + if (leaf->GetParentPageId() != INVALID_PAGE_ID) { + out << internal_prefix << leaf->GetParentPageId() << ":p" << leaf->GetPageId() << " -> " << leaf_prefix + << leaf->GetPageId() << ";\n"; } } else { - auto *inner = reinterpret_cast(page); + auto *inner = reinterpret_cast(page); // Print node name - out << internal_prefix << page_id; + out << internal_prefix << inner->GetPageId(); // Print node properties out << "[shape=plain color=pink "; // why not? // Print data of the node out << "label=<\n"; // Print data - out << "\n"; + out << "\n"; out << "\n"; @@ -295,78 +704,66 @@ void BPLUSTREE_TYPE::ToGraph(page_id_t page_id, const BPlusTreePage *page, std:: out << ""; // Print table end out << "
GetSize() << "\">P=" << page_id << "
GetSize() << "\">P=" << inner->GetPageId() << "
GetSize() << "\">" << "max_size=" << inner->GetMaxSize() << ",min_size=" << inner->GetMinSize() << ",size=" << inner->GetSize() << "
>];\n"; + // Print Parent link + if (inner->GetParentPageId() != INVALID_PAGE_ID) { + out << internal_prefix << inner->GetParentPageId() << ":p" << inner->GetPageId() << " -> " << internal_prefix + << inner->GetPageId() << ";\n"; + } // Print leaves for (int i = 0; i < inner->GetSize(); i++) { - auto child_guard = bpm_->FetchPageBasic(inner->ValueAt(i)); - auto child_page = child_guard.template As(); - ToGraph(child_guard.PageId(), child_page, out); + auto child_page = reinterpret_cast(bpm->FetchPage(inner->ValueAt(i))->GetData()); + ToGraph(child_page, bpm, out); if (i > 0) { - auto sibling_guard = bpm_->FetchPageBasic(inner->ValueAt(i - 1)); - auto sibling_page = sibling_guard.template As(); + auto sibling_page = reinterpret_cast(bpm->FetchPage(inner->ValueAt(i - 1))->GetData()); if (!sibling_page->IsLeafPage() && !child_page->IsLeafPage()) { - out << "{rank=same " << internal_prefix << sibling_guard.PageId() << " " << internal_prefix - << child_guard.PageId() << "};\n"; + out << "{rank=same " << internal_prefix << sibling_page->GetPageId() << " " << internal_prefix + << child_page->GetPageId() << "};\n"; } - } - out << internal_prefix << page_id << ":p" << child_guard.PageId() << " -> "; - if (child_page->IsLeafPage()) { - out << leaf_prefix << child_guard.PageId() << ";\n"; - } else { - out << internal_prefix << child_guard.PageId() << ";\n"; + bpm->UnpinPage(sibling_page->GetPageId(), false); } } } + bpm->UnpinPage(page->GetPageId(), false); } +/** + * This function is for debug only, you don't need to modify + * @tparam KeyType + * @tparam ValueType + * @tparam KeyComparator + * @param page + * @param bpm + */ INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::DrawBPlusTree() -> std::string { - if (IsEmpty()) { - return "()"; - } - - PrintableBPlusTree p_root = ToPrintableBPlusTree(GetRootPageId()); - std::ostringstream out_buf; - p_root.Print(out_buf); - - return out_buf.str(); -} - -INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_TYPE::ToPrintableBPlusTree(page_id_t root_id) -> PrintableBPlusTree { - auto root_page_guard = bpm_->FetchPageBasic(root_id); - auto root_page = root_page_guard.template As(); - PrintableBPlusTree proot; - - if (root_page->IsLeafPage()) { - auto leaf_page = root_page_guard.template As(); - proot.keys_ = leaf_page->ToString(); - proot.size_ = proot.keys_.size() + 4; // 4 more spaces for indent - - return proot; - } - - // draw internal page - auto internal_page = root_page_guard.template As(); - proot.keys_ = internal_page->ToString(); - proot.size_ = 0; - for (int i = 0; i < internal_page->GetSize(); i++) { - page_id_t child_id = internal_page->ValueAt(i); - PrintableBPlusTree child_node = ToPrintableBPlusTree(child_id); - proot.size_ += child_node.size_; - proot.children_.push_back(child_node); +void BPLUSTREE_TYPE::ToString(BPlusTreePage *page, BufferPoolManager *bpm) const { + if (page->IsLeafPage()) { + auto *leaf = reinterpret_cast(page); + std::cout << "Leaf Page: " << leaf->GetPageId() << " parent: " << leaf->GetParentPageId() + << " next: " << leaf->GetNextPageId() << std::endl; + for (int i = 0; i < leaf->GetSize(); i++) { + std::cout << leaf->KeyAt(i) << ","; + } + std::cout << std::endl; + std::cout << std::endl; + } else { + auto *internal = reinterpret_cast(page); + std::cout << "Internal Page: " << internal->GetPageId() << " parent: " << internal->GetParentPageId() << std::endl; + for (int i = 0; i < internal->GetSize(); i++) { + std::cout << internal->KeyAt(i) << ": " << internal->ValueAt(i) << ","; + } + std::cout << std::endl; + std::cout << std::endl; + for (int i = 0; i < internal->GetSize(); i++) { + ToString(reinterpret_cast(bpm->FetchPage(internal->ValueAt(i))->GetData()), bpm); + } } - - return proot; + bpm->UnpinPage(page->GetPageId(), false); } template class BPlusTree, RID, GenericComparator<4>>; - template class BPlusTree, RID, GenericComparator<8>>; - template class BPlusTree, RID, GenericComparator<16>>; - template class BPlusTree, RID, GenericComparator<32>>; - template class BPlusTree, RID, GenericComparator<64>>; } // namespace bustub diff --git a/src/storage/index/b_plus_tree_index.cpp b/src/storage/index/b_plus_tree_index.cpp index b4cefb9..3a3a87e 100644 --- a/src/storage/index/b_plus_tree_index.cpp +++ b/src/storage/index/b_plus_tree_index.cpp @@ -17,20 +17,17 @@ namespace bustub { */ INDEX_TEMPLATE_ARGUMENTS BPLUSTREE_INDEX_TYPE::BPlusTreeIndex(std::unique_ptr &&metadata, BufferPoolManager *buffer_pool_manager) - : Index(std::move(metadata)), comparator_(GetMetadata()->GetKeySchema()) { - page_id_t header_page_id; - buffer_pool_manager->NewPage(&header_page_id); - container_ = std::make_shared>(GetMetadata()->GetName(), header_page_id, - buffer_pool_manager, comparator_); -} + : Index(std::move(metadata)), + comparator_(GetMetadata()->GetKeySchema()), + container_(GetMetadata()->GetName(), buffer_pool_manager, comparator_) {} INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_INDEX_TYPE::InsertEntry(const Tuple &key, RID rid, Transaction *transaction) -> bool { +void BPLUSTREE_INDEX_TYPE::InsertEntry(const Tuple &key, RID rid, Transaction *transaction) { // construct insert index key KeyType index_key; index_key.SetFromKey(key); - return container_->Insert(index_key, rid, transaction); + container_.Insert(index_key, rid, transaction); } INDEX_TEMPLATE_ARGUMENTS @@ -39,7 +36,7 @@ void BPLUSTREE_INDEX_TYPE::DeleteEntry(const Tuple &key, RID rid, Transaction *t KeyType index_key; index_key.SetFromKey(key); - container_->Remove(index_key, transaction); + container_.Remove(index_key, transaction); } INDEX_TEMPLATE_ARGUMENTS @@ -48,17 +45,17 @@ void BPLUSTREE_INDEX_TYPE::ScanKey(const Tuple &key, std::vector *result, T KeyType index_key; index_key.SetFromKey(key); - container_->GetValue(index_key, result, transaction); + container_.GetValue(index_key, result, transaction); } INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_INDEX_TYPE::GetBeginIterator() -> INDEXITERATOR_TYPE { return container_->Begin(); } +auto BPLUSTREE_INDEX_TYPE::GetBeginIterator() -> INDEXITERATOR_TYPE { return container_.Begin(); } INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_INDEX_TYPE::GetBeginIterator(const KeyType &key) -> INDEXITERATOR_TYPE { return container_->Begin(key); } +auto BPLUSTREE_INDEX_TYPE::GetBeginIterator(const KeyType &key) -> INDEXITERATOR_TYPE { return container_.Begin(key); } INDEX_TEMPLATE_ARGUMENTS -auto BPLUSTREE_INDEX_TYPE::GetEndIterator() -> INDEXITERATOR_TYPE { return container_->End(); } +auto BPLUSTREE_INDEX_TYPE::GetEndIterator() -> INDEXITERATOR_TYPE { return container_.End(); } template class BPlusTreeIndex, RID, GenericComparator<4>>; template class BPlusTreeIndex, RID, GenericComparator<8>>; diff --git a/src/storage/index/extendible_hash_table_index.cpp b/src/storage/index/extendible_hash_table_index.cpp index 82d6da7..e61a9b1 100644 --- a/src/storage/index/extendible_hash_table_index.cpp +++ b/src/storage/index/extendible_hash_table_index.cpp @@ -15,12 +15,12 @@ HASH_TABLE_INDEX_TYPE::ExtendibleHashTableIndex(std::unique_ptr & container_(GetMetadata()->GetName(), buffer_pool_manager, comparator_, hash_fn) {} template -auto HASH_TABLE_INDEX_TYPE::InsertEntry(const Tuple &key, RID rid, Transaction *transaction) -> bool { +void HASH_TABLE_INDEX_TYPE::InsertEntry(const Tuple &key, RID rid, Transaction *transaction) { // construct insert index key KeyType index_key; index_key.SetFromKey(key); - return container_.Insert(index_key, rid, transaction); + container_.Insert(transaction, index_key, rid); } template @@ -29,7 +29,7 @@ void HASH_TABLE_INDEX_TYPE::DeleteEntry(const Tuple &key, RID rid, Transaction * KeyType index_key; index_key.SetFromKey(key); - container_.Remove(index_key, transaction); + container_.Remove(transaction, index_key, rid); } template @@ -38,7 +38,7 @@ void HASH_TABLE_INDEX_TYPE::ScanKey(const Tuple &key, std::vector *result, KeyType index_key; index_key.SetFromKey(key); - container_.GetValue(index_key, result, transaction); + container_.GetValue(transaction, index_key, result); } template class ExtendibleHashTableIndex, RID, GenericComparator<4>>; template class ExtendibleHashTableIndex, RID, GenericComparator<8>>; diff --git a/src/storage/index/index_iterator.cpp b/src/storage/index/index_iterator.cpp index 3c2ee3a..7f3364f 100644 --- a/src/storage/index/index_iterator.cpp +++ b/src/storage/index/index_iterator.cpp @@ -11,20 +11,59 @@ namespace bustub { * NOTE: you can change the destructor/constructor method here * set your own input parameters */ + +INDEX_TEMPLATE_ARGUMENTS +INDEXITERATOR_TYPE::IndexIterator(BufferPoolManager *bpm, Page *page, int index) + : buffer_pool_manager_(bpm), page_(page), index_(index) { + if (page != nullptr) { + leaf_ = reinterpret_cast(page->GetData()); + } else { + leaf_ = nullptr; + } +} + +INDEX_TEMPLATE_ARGUMENTS +INDEXITERATOR_TYPE::~IndexIterator() { + if (page_ != nullptr) { + page_->RUnlatch(); + buffer_pool_manager_->UnpinPage(page_->GetPageId(), false); + } +} + INDEX_TEMPLATE_ARGUMENTS -INDEXITERATOR_TYPE::IndexIterator() = default; +auto INDEXITERATOR_TYPE::IsEnd() -> bool { + return leaf_->GetNextPageId() == INVALID_PAGE_ID && index_ == leaf_->GetSize(); +} INDEX_TEMPLATE_ARGUMENTS -INDEXITERATOR_TYPE::~IndexIterator() = default; // NOLINT +auto INDEXITERATOR_TYPE::operator*() -> const MappingType & { return leaf_->GetItem(index_); } INDEX_TEMPLATE_ARGUMENTS -auto INDEXITERATOR_TYPE::IsEnd() -> bool { throw std::runtime_error("unimplemented"); } +auto INDEXITERATOR_TYPE::operator++() -> INDEXITERATOR_TYPE & { + if (index_ == leaf_->GetSize() - 1 && leaf_->GetNextPageId() != INVALID_PAGE_ID) { + auto next_page = buffer_pool_manager_->FetchPage(leaf_->GetNextPageId()); + + next_page->RLatch(); + page_->RUnlatch(); + buffer_pool_manager_->UnpinPage(page_->GetPageId(), false); + + page_ = next_page; + leaf_ = reinterpret_cast(page_->GetData()); + index_ = 0; + } else { + index_++; + } + + return *this; +} INDEX_TEMPLATE_ARGUMENTS -auto INDEXITERATOR_TYPE::operator*() -> const MappingType & { throw std::runtime_error("unimplemented"); } +auto INDEXITERATOR_TYPE::operator==(const IndexIterator &itr) const -> bool { + return leaf_ == nullptr || (leaf_->GetPageId() == itr.leaf_->GetPageId() && index_ == itr.index_); +} INDEX_TEMPLATE_ARGUMENTS -auto INDEXITERATOR_TYPE::operator++() -> INDEXITERATOR_TYPE & { throw std::runtime_error("unimplemented"); } +auto INDEXITERATOR_TYPE::operator!=(const IndexIterator &itr) const -> bool { return !this->operator==(itr); } template class IndexIterator, RID, GenericComparator<4>>; diff --git a/src/storage/index/linear_probe_hash_table_index.cpp b/src/storage/index/linear_probe_hash_table_index.cpp index 156f936..1ae6bc1 100644 --- a/src/storage/index/linear_probe_hash_table_index.cpp +++ b/src/storage/index/linear_probe_hash_table_index.cpp @@ -15,12 +15,12 @@ HASH_TABLE_INDEX_TYPE::LinearProbeHashTableIndex(std::unique_ptr container_(GetMetadata()->GetName(), buffer_pool_manager, comparator_, num_buckets, hash_fn) {} template -auto HASH_TABLE_INDEX_TYPE::InsertEntry(const Tuple &key, RID rid, Transaction *transaction) -> bool { +void HASH_TABLE_INDEX_TYPE::InsertEntry(const Tuple &key, RID rid, Transaction *transaction) { // construct insert index key KeyType index_key; index_key.SetFromKey(key); - return container_.Insert(transaction, index_key, rid); + container_.Insert(transaction, index_key, rid); } template diff --git a/src/storage/page/CMakeLists.txt b/src/storage/page/CMakeLists.txt index e46f3ad..42da957 100644 --- a/src/storage/page/CMakeLists.txt +++ b/src/storage/page/CMakeLists.txt @@ -7,11 +7,7 @@ add_library( hash_table_block_page.cpp hash_table_bucket_page.cpp hash_table_directory_page.cpp - extendible_htable_bucket_page.cpp - extendible_htable_directory_page.cpp - extendible_htable_header_page.cpp - extendible_htable_page_utils.cpp - page_guard.cpp + header_page.cpp table_page.cpp) set(ALL_OBJECT_FILES diff --git a/src/storage/page/b_plus_tree_internal_page.cpp b/src/storage/page/b_plus_tree_internal_page.cpp index e0ab2b3..abf6c8d 100644 --- a/src/storage/page/b_plus_tree_internal_page.cpp +++ b/src/storage/page/b_plus_tree_internal_page.cpp @@ -21,10 +21,18 @@ namespace bustub { *****************************************************************************/ /* * Init method after creating a new internal page - * Including set page type, set current size, and set max page size + * Including set page type, set current size, set page id, set parent id and set + * max page size */ INDEX_TEMPLATE_ARGUMENTS -void B_PLUS_TREE_INTERNAL_PAGE_TYPE::Init(int max_size) {} +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::Init(page_id_t page_id, page_id_t parent_id, int max_size) { + SetPageType(IndexPageType::INTERNAL_PAGE); + SetSize(0); + SetPageId(page_id); + SetParentPageId(parent_id); + SetMaxSize(max_size); +} + /* * Helper method to get/set the key associated with input "index"(a.k.a * array offset) @@ -32,19 +40,151 @@ void B_PLUS_TREE_INTERNAL_PAGE_TYPE::Init(int max_size) {} INDEX_TEMPLATE_ARGUMENTS auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::KeyAt(int index) const -> KeyType { // replace with your own code - KeyType key{}; - return key; + return array_[index].first; } INDEX_TEMPLATE_ARGUMENTS -void B_PLUS_TREE_INTERNAL_PAGE_TYPE::SetKeyAt(int index, const KeyType &key) {} +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::SetKeyAt(int index, const KeyType &key) { array_[index].first = key; } /* * Helper method to get the value associated with input "index"(a.k.a array * offset) */ INDEX_TEMPLATE_ARGUMENTS -auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::ValueAt(int index) const -> ValueType { return 0; } +auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::ValueAt(int index) const -> ValueType { return array_[index].second; } + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::SetValueAt(int index, const ValueType &value) { array_[index].second = value; } + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::ValueIndex(const ValueType &value) const -> int { + auto it = std::find_if(array_, array_ + GetSize(), [&value](const auto &pair) { return pair.second == value; }); + return std::distance(array_, it); +} + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::Lookup(const KeyType &key, const KeyComparator &comparator) const -> ValueType { + auto target = std::lower_bound(array_ + 1, array_ + GetSize(), key, + [&comparator](const auto &pair, auto k) { return comparator(pair.first, k) < 0; }); + if (target == array_ + GetSize()) { + return ValueAt(GetSize() - 1); + } + if (comparator(target->first, key) == 0) { + return target->second; + } + return std::prev(target)->second; +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::PopulateNewRoot(const ValueType &old_value, const KeyType &new_key, + const ValueType &new_value) { + SetKeyAt(1, new_key); + SetValueAt(0, old_value); + SetValueAt(1, new_value); + SetSize(2); +} + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::InsertNodeAfter(const ValueType &old_value, const KeyType &new_key, + const ValueType &new_value) -> int { + auto new_value_idx = ValueIndex(old_value) + 1; + std::move_backward(array_ + new_value_idx, array_ + GetSize(), array_ + GetSize() + 1); + + array_[new_value_idx].first = new_key; + array_[new_value_idx].second = new_value; + + IncreaseSize(1); + + return GetSize(); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::MoveHalfTo(BPlusTreeInternalPage *recipient, + BufferPoolManager *buffer_pool_manager) { + int start_split_indx = GetMinSize(); + int original_size = GetSize(); + SetSize(start_split_indx); + recipient->CopyNFrom(array_ + start_split_indx, original_size - start_split_indx, buffer_pool_manager); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::CopyNFrom(MappingType *items, int size, BufferPoolManager *buffer_pool_manager) { + std::copy(items, items + size, array_ + GetSize()); + + for (int i = 0; i < size; i++) { + auto page = buffer_pool_manager->FetchPage(ValueAt(i + GetSize())); + auto *node = reinterpret_cast(page->GetData()); + node->SetParentPageId(GetPageId()); + buffer_pool_manager->UnpinPage(page->GetPageId(), true); + } + + IncreaseSize(size); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::Remove(int index) { + std::move(array_ + index + 1, array_ + GetSize(), array_ + index); + IncreaseSize(-1); +} + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::RemoveAndReturnOnlyChild() -> ValueType { + ValueType only_value = ValueAt(0); + SetSize(0); + return only_value; +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::MoveAllTo(BPlusTreeInternalPage *recipient, const KeyType &middle_key, + BufferPoolManager *buffer_pool_manager) { + SetKeyAt(0, middle_key); + recipient->CopyNFrom(array_, GetSize(), buffer_pool_manager); + SetSize(0); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::MoveFirstToEndOf(BPlusTreeInternalPage *recipient, const KeyType &middle_key, + BufferPoolManager *buffer_pool_manager) { + SetKeyAt(0, middle_key); + auto first_item = array_[0]; + recipient->CopyLastFrom(first_item, buffer_pool_manager); + + std::move(array_ + 1, array_ + GetSize(), array_); + IncreaseSize(-1); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::CopyLastFrom(const MappingType &pair, BufferPoolManager *buffer_pool_manager) { + *(array_ + GetSize()) = pair; + IncreaseSize(1); + + auto page = buffer_pool_manager->FetchPage(pair.second); + auto *node = reinterpret_cast(page->GetData()); + node->SetParentPageId(GetPageId()); + buffer_pool_manager->UnpinPage(page->GetPageId(), true); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::MoveLastToFrontOf(BPlusTreeInternalPage *recipient, const KeyType &middle_key, + BufferPoolManager *buffer_pool_manager) { + auto last_item = array_[GetSize() - 1]; + recipient->SetKeyAt(0, middle_key); + recipient->CopyFirstFrom(last_item, buffer_pool_manager); + + IncreaseSize(-1); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_INTERNAL_PAGE_TYPE::CopyFirstFrom(const MappingType &pair, BufferPoolManager *buffer_pool_manager) { + std::move_backward(array_, array_ + GetSize(), array_ + GetSize() + 1); + *array_ = pair; + IncreaseSize(1); + + auto page = buffer_pool_manager->FetchPage(pair.second); + auto *node = reinterpret_cast(page->GetData()); + node->SetParentPageId(GetPageId()); + buffer_pool_manager->UnpinPage(page->GetPageId(), true); +} // valuetype for internalNode should be page id_t template class BPlusTreeInternalPage, page_id_t, GenericComparator<4>>; diff --git a/src/storage/page/b_plus_tree_leaf_page.cpp b/src/storage/page/b_plus_tree_leaf_page.cpp index 3b325d7..55da2c7 100644 --- a/src/storage/page/b_plus_tree_leaf_page.cpp +++ b/src/storage/page/b_plus_tree_leaf_page.cpp @@ -23,19 +23,27 @@ namespace bustub { /** * Init method after creating a new leaf page - * Including set page type, set current size to zero, set next page id and set max size + * Including set page type, set current size to zero, set page id/parent id, set + * next page id and set max size */ INDEX_TEMPLATE_ARGUMENTS -void B_PLUS_TREE_LEAF_PAGE_TYPE::Init(int max_size) {} +void B_PLUS_TREE_LEAF_PAGE_TYPE::Init(page_id_t page_id, page_id_t parent_id, int max_size) { + SetPageType(IndexPageType::LEAF_PAGE); + SetSize(0); + SetPageId(page_id); + SetParentPageId(parent_id); + SetNextPageId(INVALID_PAGE_ID); + SetMaxSize(max_size); +} /** * Helper methods to set/get next page id */ INDEX_TEMPLATE_ARGUMENTS -auto B_PLUS_TREE_LEAF_PAGE_TYPE::GetNextPageId() const -> page_id_t { return INVALID_PAGE_ID; } +auto B_PLUS_TREE_LEAF_PAGE_TYPE::GetNextPageId() const -> page_id_t { return next_page_id_; } INDEX_TEMPLATE_ARGUMENTS -void B_PLUS_TREE_LEAF_PAGE_TYPE::SetNextPageId(page_id_t next_page_id) {} +void B_PLUS_TREE_LEAF_PAGE_TYPE::SetNextPageId(page_id_t next_page_id) { next_page_id_ = next_page_id; } /* * Helper method to find and return the key associated with input "index"(a.k.a @@ -44,8 +52,108 @@ void B_PLUS_TREE_LEAF_PAGE_TYPE::SetNextPageId(page_id_t next_page_id) {} INDEX_TEMPLATE_ARGUMENTS auto B_PLUS_TREE_LEAF_PAGE_TYPE::KeyAt(int index) const -> KeyType { // replace with your own code - KeyType key{}; - return key; + return array_[index].first; +} + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_LEAF_PAGE_TYPE::GetItem(int index) -> const MappingType & { return array_[index]; } + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_LEAF_PAGE_TYPE::KeyIndex(const KeyType &key, const KeyComparator &keyComparator) const -> int { + auto target = std::lower_bound(array_, array_ + GetSize(), key, [&keyComparator](const auto &pair, auto k) { + return keyComparator(pair.first, k) < 0; + }); + return std::distance(array_, target); +} + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_LEAF_PAGE_TYPE::Insert(const KeyType &key, const ValueType &value, const KeyComparator &keyComparator) + -> int { + auto distance_in_array = KeyIndex(key, keyComparator); + if (distance_in_array == GetSize()) { + *(array_ + distance_in_array) = {key, value}; + IncreaseSize(1); + return GetSize(); + } + + if (keyComparator(array_[distance_in_array].first, key) == 0) { + return GetSize(); + } + + std::move_backward(array_ + distance_in_array, array_ + GetSize(), array_ + GetSize() + 1); + *(array_ + distance_in_array) = {key, value}; + + IncreaseSize(1); + return GetSize(); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_LEAF_PAGE_TYPE::MoveHalfTo(BPlusTreeLeafPage *recipient) { + int start_split_indx = GetMinSize(); + SetSize(start_split_indx); + recipient->CopyNFrom(array_ + start_split_indx, GetMaxSize() - start_split_indx); +} +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_LEAF_PAGE_TYPE::CopyNFrom(MappingType *items, int size) { + std::copy(items, items + size, array_ + GetSize()); + IncreaseSize(size); +} + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_LEAF_PAGE_TYPE::Lookup(const KeyType &key, ValueType *value, const KeyComparator &keyComparator) const + -> bool { + int target_in_array = KeyIndex(key, keyComparator); + if (target_in_array == GetSize() || keyComparator(array_[target_in_array].first, key) != 0) { + return false; + } + *value = array_[target_in_array].second; + return true; +} + +INDEX_TEMPLATE_ARGUMENTS +auto B_PLUS_TREE_LEAF_PAGE_TYPE::RemoveAndDeleteRecord(const KeyType &key, const KeyComparator &keyComparator) -> int { + int target_in_array = KeyIndex(key, keyComparator); + if (target_in_array == GetSize() || keyComparator(array_[target_in_array].first, key) != 0) { + return GetSize(); + } + std::move(array_ + target_in_array + 1, array_ + GetSize(), array_ + target_in_array); + IncreaseSize(-1); + return GetSize(); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_LEAF_PAGE_TYPE::MoveAllTo(BPlusTreeLeafPage *recipient) { + recipient->CopyNFrom(array_, GetSize()); + recipient->SetNextPageId(GetNextPageId()); + SetSize(0); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_LEAF_PAGE_TYPE::MoveFirstToEndOf(BPlusTreeLeafPage *recipient) { + auto first_item = GetItem(0); + std::move(array_ + 1, array_ + GetSize(), array_); + IncreaseSize(-1); + recipient->CopyLastFrom(first_item); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_LEAF_PAGE_TYPE::CopyLastFrom(const MappingType &item) { + *(array_ + GetSize()) = item; + IncreaseSize(1); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_LEAF_PAGE_TYPE::MoveLastToFrontOf(BPlusTreeLeafPage *recipient) { + auto last_item = GetItem(GetSize() - 1); + IncreaseSize(-1); + recipient->CopyFirstFrom(last_item); +} + +INDEX_TEMPLATE_ARGUMENTS +void B_PLUS_TREE_LEAF_PAGE_TYPE::CopyFirstFrom(const MappingType &item) { + std::move_backward(array_, array_ + GetSize(), array_ + GetSize() + 1); + *array_ = item; + IncreaseSize(1); } template class BPlusTreeLeafPage, RID, GenericComparator<4>>; diff --git a/src/storage/page/b_plus_tree_page.cpp b/src/storage/page/b_plus_tree_page.cpp index 0339069..a720c05 100644 --- a/src/storage/page/b_plus_tree_page.cpp +++ b/src/storage/page/b_plus_tree_page.cpp @@ -17,27 +17,51 @@ namespace bustub { * Helper methods to get/set page type * Page type enum class is defined in b_plus_tree_page.h */ -auto BPlusTreePage::IsLeafPage() const -> bool { return false; } -void BPlusTreePage::SetPageType(IndexPageType page_type) {} +auto BPlusTreePage::IsLeafPage() const -> bool { return page_type_ == IndexPageType::LEAF_PAGE; } +auto BPlusTreePage::IsRootPage() const -> bool { return parent_page_id_ == INVALID_PAGE_ID; } +void BPlusTreePage::SetPageType(IndexPageType page_type) { page_type_ = page_type; } +auto BPlusTreePage::GetPageType() -> IndexPageType { return page_type_; } /* * Helper methods to get/set size (number of key/value pairs stored in that * page) */ -auto BPlusTreePage::GetSize() const -> int { return 0; } -void BPlusTreePage::SetSize(int size) {} -void BPlusTreePage::IncreaseSize(int amount) {} +auto BPlusTreePage::GetSize() const -> int { return size_; } +void BPlusTreePage::SetSize(int size) { size_ = size; } +void BPlusTreePage::IncreaseSize(int amount) { size_ += amount; } /* * Helper methods to get/set max size (capacity) of the page */ -auto BPlusTreePage::GetMaxSize() const -> int { return 0; } -void BPlusTreePage::SetMaxSize(int size) {} +auto BPlusTreePage::GetMaxSize() const -> int { return max_size_; } +void BPlusTreePage::SetMaxSize(int size) { max_size_ = size; } /* * Helper method to get min page size * Generally, min page size == max page size / 2 */ -auto BPlusTreePage::GetMinSize() const -> int { return 0; } +auto BPlusTreePage::GetMinSize() const -> int { + if (IsLeafPage()) { + return max_size_ / 2; + } + return (max_size_ + 1) / 2; +} + +/* + * Helper methods to get/set parent page id + */ +auto BPlusTreePage::GetParentPageId() const -> page_id_t { return parent_page_id_; } +void BPlusTreePage::SetParentPageId(page_id_t parent_page_id) { parent_page_id_ = parent_page_id; } + +/* + * Helper methods to get/set self page id + */ +auto BPlusTreePage::GetPageId() const -> page_id_t { return page_id_; } +void BPlusTreePage::SetPageId(page_id_t page_id) { page_id_ = page_id; } + +/* + * Helper methods to set lsn + */ +void BPlusTreePage::SetLSN(lsn_t lsn) { lsn_ = lsn; } } // namespace bustub diff --git a/src/storage/page/extendible_htable_bucket_page.cpp b/src/storage/page/extendible_htable_bucket_page.cpp deleted file mode 100644 index 670122c..0000000 --- a/src/storage/page/extendible_htable_bucket_page.cpp +++ /dev/null @@ -1,83 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// extendible_htable_bucket_page.cpp -// -// Identification: src/storage/page/extendible_htable_bucket_page.cpp -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "common/exception.h" -#include "storage/page/extendible_htable_bucket_page.h" - -namespace bustub { - -template -void ExtendibleHTableBucketPage::Init(uint32_t max_size) { - throw NotImplementedException("ExtendibleHTableBucketPage not implemented"); -} - -template -auto ExtendibleHTableBucketPage::Lookup(const K &key, V &value, const KC &cmp) const -> bool { - return false; -} - -template -auto ExtendibleHTableBucketPage::Insert(const K &key, const V &value, const KC &cmp) -> bool { - return false; -} - -template -auto ExtendibleHTableBucketPage::Remove(const K &key, const KC &cmp) -> bool { - return false; -} - -template -void ExtendibleHTableBucketPage::RemoveAt(uint32_t bucket_idx) { - throw NotImplementedException("ExtendibleHTableBucketPage not implemented"); -} - -template -auto ExtendibleHTableBucketPage::KeyAt(uint32_t bucket_idx) const -> K { - return {}; -} - -template -auto ExtendibleHTableBucketPage::ValueAt(uint32_t bucket_idx) const -> V { - return {}; -} - -template -auto ExtendibleHTableBucketPage::EntryAt(uint32_t bucket_idx) const -> const std::pair & { - return array_[0]; -} - -template -auto ExtendibleHTableBucketPage::Size() const -> uint32_t { - return 0; -} - -template -auto ExtendibleHTableBucketPage::IsFull() const -> bool { - return false; -} - -template -auto ExtendibleHTableBucketPage::IsEmpty() const -> bool { - return false; -} - -template class ExtendibleHTableBucketPage; -template class ExtendibleHTableBucketPage, RID, GenericComparator<4>>; -template class ExtendibleHTableBucketPage, RID, GenericComparator<8>>; -template class ExtendibleHTableBucketPage, RID, GenericComparator<16>>; -template class ExtendibleHTableBucketPage, RID, GenericComparator<32>>; -template class ExtendibleHTableBucketPage, RID, GenericComparator<64>>; - -} // namespace bustub diff --git a/src/storage/page/extendible_htable_directory_page.cpp b/src/storage/page/extendible_htable_directory_page.cpp deleted file mode 100644 index 184c6f4..0000000 --- a/src/storage/page/extendible_htable_directory_page.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// extendible_htable_directory_page.cpp -// -// Identification: src/storage/page/extendible_htable_directory_page.cpp -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "storage/page/extendible_htable_directory_page.h" - -#include -#include - -#include "common/config.h" -#include "common/logger.h" - -namespace bustub { - -void ExtendibleHTableDirectoryPage::Init(uint32_t max_depth) { - throw NotImplementedException("ExtendibleHTableDirectoryPage is not implemented"); -} - -auto ExtendibleHTableDirectoryPage::HashToBucketIndex(uint32_t hash) const -> uint32_t { return 0; } - -auto ExtendibleHTableDirectoryPage::GetBucketPageId(uint32_t bucket_idx) const -> page_id_t { return INVALID_PAGE_ID; } - -void ExtendibleHTableDirectoryPage::SetBucketPageId(uint32_t bucket_idx, page_id_t bucket_page_id) { - throw NotImplementedException("ExtendibleHTableDirectoryPage is not implemented"); -} - -auto ExtendibleHTableDirectoryPage::GetSplitImageIndex(uint32_t bucket_idx) const -> uint32_t { return 0; } - -auto ExtendibleHTableDirectoryPage::GetGlobalDepth() const -> uint32_t { return 0; } - -void ExtendibleHTableDirectoryPage::IncrGlobalDepth() { - throw NotImplementedException("ExtendibleHTableDirectoryPage is not implemented"); -} - -void ExtendibleHTableDirectoryPage::DecrGlobalDepth() { - throw NotImplementedException("ExtendibleHTableDirectoryPage is not implemented"); -} - -auto ExtendibleHTableDirectoryPage::CanShrink() -> bool { return false; } - -auto ExtendibleHTableDirectoryPage::Size() const -> uint32_t { return 0; } - -auto ExtendibleHTableDirectoryPage::GetLocalDepth(uint32_t bucket_idx) const -> uint32_t { return 0; } - -void ExtendibleHTableDirectoryPage::SetLocalDepth(uint32_t bucket_idx, uint8_t local_depth) { - throw NotImplementedException("ExtendibleHTableDirectoryPage is not implemented"); -} - -void ExtendibleHTableDirectoryPage::IncrLocalDepth(uint32_t bucket_idx) { - throw NotImplementedException("ExtendibleHTableDirectoryPage is not implemented"); -} - -void ExtendibleHTableDirectoryPage::DecrLocalDepth(uint32_t bucket_idx) { - throw NotImplementedException("ExtendibleHTableDirectoryPage is not implemented"); -} - -} // namespace bustub diff --git a/src/storage/page/extendible_htable_header_page.cpp b/src/storage/page/extendible_htable_header_page.cpp deleted file mode 100644 index 901922e..0000000 --- a/src/storage/page/extendible_htable_header_page.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// extendible_htable_header_page.cpp -// -// Identification: src/storage/page/extendible_htable_header_page.cpp -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "storage/page/extendible_htable_header_page.h" - -#include "common/exception.h" - -namespace bustub { - -void ExtendibleHTableHeaderPage::Init(uint32_t max_depth) { - throw NotImplementedException("ExtendibleHTableHeaderPage is not implemented"); -} - -auto ExtendibleHTableHeaderPage::HashToDirectoryIndex(uint32_t hash) const -> uint32_t { return 0; } - -auto ExtendibleHTableHeaderPage::GetDirectoryPageId(uint32_t directory_idx) const -> uint32_t { return 0; } - -void ExtendibleHTableHeaderPage::SetDirectoryPageId(uint32_t directory_idx, page_id_t directory_page_id) { - throw NotImplementedException("ExtendibleHTableHeaderPage is not implemented"); -} - -auto ExtendibleHTableHeaderPage::MaxSize() const -> uint32_t { return 0; } - -} // namespace bustub diff --git a/src/storage/page/extendible_htable_page_utils.cpp b/src/storage/page/extendible_htable_page_utils.cpp deleted file mode 100644 index 102db11..0000000 --- a/src/storage/page/extendible_htable_page_utils.cpp +++ /dev/null @@ -1,110 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// extendible_htable_page_utils.cpp -// -// Identification: src/storage/page/extendible_htable_page_utils.cpp -// -// Copyright (c) 2015-2023, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "common/logger.h" -#include "common/macros.h" -#include "storage/page/extendible_htable_bucket_page.h" -#include "storage/page/extendible_htable_directory_page.h" -#include "storage/page/extendible_htable_header_page.h" - -namespace bustub { - -void ExtendibleHTableHeaderPage::PrintHeader() const { - LOG_DEBUG("======== HEADER (max_depth_: %u) ========", max_depth_); - LOG_DEBUG("| directory_idx | page_id |"); - for (uint32_t idx = 0; idx < static_cast(1 << max_depth_); idx++) { - LOG_DEBUG("| %u | %u |", idx, directory_page_ids_[idx]); - } - LOG_DEBUG("======== END HEADER ========"); -} - -void ExtendibleHTableDirectoryPage::PrintDirectory() const { - LOG_DEBUG("======== DIRECTORY (global_depth_: %u) ========", global_depth_); - LOG_DEBUG("| bucket_idx | page_id | local_depth |"); - for (uint32_t idx = 0; idx < static_cast(0x1 << global_depth_); idx++) { - LOG_DEBUG("| %u | %u | %u |", idx, bucket_page_ids_[idx], local_depths_[idx]); - } - LOG_DEBUG("================ END DIRECTORY ================"); -} - -template -void ExtendibleHTableBucketPage::PrintBucket() const { - std::cout << "======== BUCKET (size_: " << size_ << " | max_size_: " << max_size_ << ") ========\n"; - std::cout << ("| i | k | v |\n"); - for (uint32_t idx = 0; idx < size_; idx++) { - std::cout << "| " << idx << " | " << KeyAt(idx) << " | " << ValueAt(idx) << " |\n"; - } - std::cout << "================ END BUCKET ================\n"; - std::cout << "\n"; -} - -/** - * VerifyIntegrity - * - * Verify the following invariants: - * (1) All LD <= GD. - * (2) Each bucket has precisely 2^(GD - LD) pointers pointing to it. - * (3) The LD is the same at each index with the same bucket_page_id - */ -void ExtendibleHTableDirectoryPage::VerifyIntegrity() const { - // build maps of {bucket_page_id : pointer_count} and {bucket_page_id : local_depth} - std::unordered_map page_id_to_count = std::unordered_map(); - std::unordered_map page_id_to_ld = std::unordered_map(); - - // Verify: (3) The LD is the same at each index with the same bucket_page_id - for (uint32_t curr_idx = 0; curr_idx < Size(); curr_idx++) { - page_id_t curr_page_id = bucket_page_ids_[curr_idx]; - uint32_t curr_ld = local_depths_[curr_idx]; - - // Verify: (1) All LD <= GD. - BUSTUB_ASSERT(curr_ld <= global_depth_, "there exists a local depth greater than the global depth"); - - ++page_id_to_count[curr_page_id]; - - if (page_id_to_ld.count(curr_page_id) > 0 && curr_ld != page_id_to_ld[curr_page_id]) { - uint32_t old_ld = page_id_to_ld[curr_page_id]; - LOG_WARN("Verify Integrity: curr_local_depth: %u, old_local_depth %u, for page_id: %u", curr_ld, old_ld, - curr_page_id); - PrintDirectory(); - BUSTUB_ASSERT(curr_ld == page_id_to_ld[curr_page_id], - "local depth is not the same at each index with same bucket page id"); - } else { - page_id_to_ld[curr_page_id] = curr_ld; - } - } - - // Verify: (2) Each bucket has precisely 2^(GD - LD) pointers pointing to it. - auto it = page_id_to_count.begin(); - while (it != page_id_to_count.end()) { - page_id_t curr_page_id = it->first; - uint32_t curr_count = it->second; - uint32_t curr_ld = page_id_to_ld[curr_page_id]; - uint32_t required_count = 0x1 << (global_depth_ - curr_ld); - - if (curr_count != required_count) { - LOG_WARN("Verify Integrity: curr_count: %u, required_count %u, for page_id: %u", curr_count, required_count, - curr_page_id); - PrintDirectory(); - BUSTUB_ASSERT(curr_count == required_count, "a bucket does not have precisely 2^(GD - LD) pointers to it"); - } - it++; - } -} - -template class ExtendibleHTableBucketPage; -template class ExtendibleHTableBucketPage, RID, GenericComparator<4>>; -template class ExtendibleHTableBucketPage, RID, GenericComparator<8>>; -template class ExtendibleHTableBucketPage, RID, GenericComparator<16>>; -template class ExtendibleHTableBucketPage, RID, GenericComparator<32>>; -template class ExtendibleHTableBucketPage, RID, GenericComparator<64>>; - -} // namespace bustub diff --git a/src/storage/page/header_page.cpp b/src/storage/page/header_page.cpp new file mode 100644 index 0000000..66c7005 --- /dev/null +++ b/src/storage/page/header_page.cpp @@ -0,0 +1,104 @@ +//===----------------------------------------------------------------------===// +// +// CMU-DB Project (15-445/645) +// ***DO NO SHARE PUBLICLY*** +// +// Identification: src/page/header_page.cpp +// +// Copyright (c) 2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "storage/page/header_page.h" + +namespace bustub { + +/** + * Record related + */ +auto HeaderPage::InsertRecord(const std::string &name, const page_id_t root_id) -> bool { + assert(name.length() < 32); + assert(root_id > INVALID_PAGE_ID); + + int record_num = GetRecordCount(); + int offset = 4 + record_num * 36; + // check for duplicate name + if (FindRecord(name) != -1) { + return false; + } + // copy record content + memcpy(GetData() + offset, name.c_str(), (name.length() + 1)); + memcpy((GetData() + offset + 32), &root_id, 4); + + SetRecordCount(record_num + 1); + return true; +} + +auto HeaderPage::DeleteRecord(const std::string &name) -> bool { + int record_num = GetRecordCount(); + assert(record_num > 0); + + int index = FindRecord(name); + // record does not exsit + if (index == -1) { + return false; + } + int offset = index * 36 + 4; + memmove(GetData() + offset, GetData() + offset + 36, (record_num - index - 1) * 36); + + SetRecordCount(record_num - 1); + return true; +} + +auto HeaderPage::UpdateRecord(const std::string &name, const page_id_t root_id) -> bool { + assert(name.length() < 32); + + int index = FindRecord(name); + // record does not exsit + if (index == -1) { + return false; + } + int offset = index * 36 + 4; + // update record content, only root_id + memcpy((GetData() + offset + 32), &root_id, 4); + + return true; +} + +auto HeaderPage::GetRootId(const std::string &name, page_id_t *root_id) -> bool { + assert(name.length() < 32); + + int index = FindRecord(name); + // record does not exsit + if (index == -1) { + return false; + } + int offset = (index + 1) * 36; + *root_id = *reinterpret_cast(GetData() + offset); + + return true; +} + +/** + * helper functions + */ +// record count +auto HeaderPage::GetRecordCount() -> int { return *reinterpret_cast(GetData()); } + +void HeaderPage::SetRecordCount(int record_count) { memcpy(GetData(), &record_count, 4); } + +auto HeaderPage::FindRecord(const std::string &name) -> int { + int record_num = GetRecordCount(); + + for (int i = 0; i < record_num; i++) { + char *raw_name = reinterpret_cast(GetData() + (4 + i * 36)); + if (strcmp(raw_name, name.c_str()) == 0) { + return i; + } + } + return -1; +} +} // namespace bustub diff --git a/src/storage/page/page_guard.cpp b/src/storage/page/page_guard.cpp deleted file mode 100644 index 3ee79fd..0000000 --- a/src/storage/page/page_guard.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include "storage/page/page_guard.h" -#include "buffer/buffer_pool_manager.h" - -namespace bustub { - -BasicPageGuard::BasicPageGuard(BasicPageGuard &&that) noexcept {} - -void BasicPageGuard::Drop() {} - -auto BasicPageGuard::operator=(BasicPageGuard &&that) noexcept -> BasicPageGuard & { return *this; } - -BasicPageGuard::~BasicPageGuard(){}; // NOLINT - -auto BasicPageGuard::UpgradeRead() -> ReadPageGuard { return {bpm_, page_}; } - -auto BasicPageGuard::UpgradeWrite() -> WritePageGuard { return {bpm_, page_}; } - -ReadPageGuard::ReadPageGuard(BufferPoolManager *bpm, Page *page) {} - -ReadPageGuard::ReadPageGuard(ReadPageGuard &&that) noexcept = default; - -auto ReadPageGuard::operator=(ReadPageGuard &&that) noexcept -> ReadPageGuard & { return *this; } - -void ReadPageGuard::Drop() {} - -ReadPageGuard::~ReadPageGuard() {} // NOLINT - -WritePageGuard::WritePageGuard(BufferPoolManager *bpm, Page *page) {} - -WritePageGuard::WritePageGuard(WritePageGuard &&that) noexcept = default; - -auto WritePageGuard::operator=(WritePageGuard &&that) noexcept -> WritePageGuard & { return *this; } - -void WritePageGuard::Drop() {} - -WritePageGuard::~WritePageGuard() {} // NOLINT - -} // namespace bustub diff --git a/src/storage/page/table_page.cpp b/src/storage/page/table_page.cpp index e1d0c91..0ec2d3a 100644 --- a/src/storage/page/table_page.cpp +++ b/src/storage/page/table_page.cpp @@ -13,97 +13,342 @@ #include "storage/page/table_page.h" #include -#include -#include -#include -#include "common/config.h" -#include "common/exception.h" -#include "storage/table/tuple.h" namespace bustub { -void TablePage::Init() { - next_page_id_ = INVALID_PAGE_ID; - num_tuples_ = 0; - num_deleted_tuples_ = 0; +void TablePage::Init(page_id_t page_id, uint32_t page_size, page_id_t prev_page_id, LogManager *log_manager, + Transaction *txn) { + // Set the page ID. + memcpy(GetData(), &page_id, sizeof(page_id)); + // Log that we are creating a new page. + if (enable_logging) { + LogRecord log_record = + LogRecord(txn->GetTransactionId(), txn->GetPrevLSN(), LogRecordType::NEWPAGE, prev_page_id, page_id); + lsn_t lsn = log_manager->AppendLogRecord(&log_record); + SetLSN(lsn); + txn->SetPrevLSN(lsn); + } + // Set the previous and next page IDs. + SetPrevPageId(prev_page_id); + SetNextPageId(INVALID_PAGE_ID); + SetFreeSpacePointer(page_size); + SetTupleCount(0); } -auto TablePage::GetNextTupleOffset(const TupleMeta &meta, const Tuple &tuple) const -> std::optional { - size_t slot_end_offset; - if (num_tuples_ > 0) { - auto &[offset, size, meta] = tuple_info_[num_tuples_ - 1]; - slot_end_offset = offset; - } else { - slot_end_offset = BUSTUB_PAGE_SIZE; +auto TablePage::InsertTuple(const Tuple &tuple, RID *rid, Transaction *txn, LockManager *lock_manager, + LogManager *log_manager) -> bool { + BUSTUB_ASSERT(tuple.size_ > 0, "Cannot have empty tuples."); + // If there is not enough space, then return false. + if (GetFreeSpaceRemaining() < tuple.size_ + SIZE_TUPLE) { + return false; + } + + // Try to find a free slot to reuse. + uint32_t i; + for (i = 0; i < GetTupleCount(); i++) { + // If the slot is empty, i.e. its tuple has size 0, + if (GetTupleSize(i) == 0) { + // Then we break out of the loop at index i. + break; + } + } + + // If there was no free slot left, and we cannot claim it from the free space, then we give up. + if (i == GetTupleCount() && GetFreeSpaceRemaining() < tuple.size_ + SIZE_TUPLE) { + return false; } - auto tuple_offset = slot_end_offset - tuple.GetLength(); - auto offset_size = TABLE_PAGE_HEADER_SIZE + TUPLE_INFO_SIZE * (num_tuples_ + 1); - if (tuple_offset < offset_size) { - return std::nullopt; + + // Otherwise we claim available free space.. + SetFreeSpacePointer(GetFreeSpacePointer() - tuple.size_); + memcpy(GetData() + GetFreeSpacePointer(), tuple.data_, tuple.size_); + + // Set the tuple. + SetTupleOffsetAtSlot(i, GetFreeSpacePointer()); + SetTupleSize(i, tuple.size_); + + rid->Set(GetTablePageId(), i); + if (i == GetTupleCount()) { + SetTupleCount(GetTupleCount() + 1); } - return tuple_offset; + + /** + * Removed to support new lock manager API for p4 (multilevel locking); Big hack energy + * This clause was used in logging and recovery projects previously; not being used right now + */ + // // Write the log record. + // if (enable_logging) { + // BUSTUB_ASSERT(!txn->IsSharedLocked(*rid) && !txn->IsExclusiveLocked(*rid), "A new tuple should not be locked."); + // // Acquire an exclusive lock on the new tuple. + // bool locked = lock_manager->LockExclusive(txn, *rid); + // BUSTUB_ENSURE(locked, "Locking a new tuple should always work."); + // LogRecord log_record(txn->GetTransactionId(), txn->GetPrevLSN(), LogRecordType::INSERT, *rid, tuple); + // lsn_t lsn = log_manager->AppendLogRecord(&log_record); + // SetLSN(lsn); + // txn->SetPrevLSN(lsn); + // } + return true; } -auto TablePage::InsertTuple(const TupleMeta &meta, const Tuple &tuple) -> std::optional { - auto tuple_offset = GetNextTupleOffset(meta, tuple); - if (tuple_offset == std::nullopt) { - return std::nullopt; +auto TablePage::MarkDelete(const RID &rid, Transaction *txn, LockManager *lock_manager, LogManager *log_manager) + -> bool { + uint32_t slot_num = rid.GetSlotNum(); + // If the slot number is invalid, abort the transaction. + if (slot_num >= GetTupleCount()) { + if (enable_logging) { + txn->SetState(TransactionState::ABORTED); + } + return false; + } + + uint32_t tuple_size = GetTupleSize(slot_num); + // If the tuple is already deleted, abort the transaction. + if (IsDeleted(tuple_size)) { + if (enable_logging) { + txn->SetState(TransactionState::ABORTED); + } + return false; } - auto tuple_id = num_tuples_; - tuple_info_[tuple_id] = std::make_tuple(*tuple_offset, tuple.GetLength(), meta); - num_tuples_++; - memcpy(page_start_ + *tuple_offset, tuple.data_.data(), tuple.GetLength()); - return tuple_id; + + /** + * Removed to support new lock manager API for p4 (multilevel locking); Big hack energy + * This clause was used in logging and recovery projects previously; not being used right now + */ + // if (enable_logging) { + // // Acquire an exclusive lock, upgrading from a shared lock if necessary. + // if (txn->IsSharedLocked(rid)) { + // if (!lock_manager->LockUpgrade(txn, rid)) { + // return false; + // } + // } else if (!txn->IsExclusiveLocked(rid) && !lock_manager->LockExclusive(txn, rid)) { + // return false; + // } + // Tuple dummy_tuple; + // LogRecord log_record(txn->GetTransactionId(), txn->GetPrevLSN(), LogRecordType::MARKDELETE, rid, dummy_tuple); + // lsn_t lsn = log_manager->AppendLogRecord(&log_record); + // SetLSN(lsn); + // txn->SetPrevLSN(lsn); + // } + + // Mark the tuple as deleted. + if (tuple_size > 0) { + SetTupleSize(slot_num, SetDeletedFlag(tuple_size)); + } + return true; } -void TablePage::UpdateTupleMeta(const TupleMeta &meta, const RID &rid) { - auto tuple_id = rid.GetSlotNum(); - if (tuple_id >= num_tuples_) { - throw bustub::Exception("Tuple ID out of range"); +auto TablePage::UpdateTuple(const Tuple &new_tuple, Tuple *old_tuple, const RID &rid, Transaction *txn, + LockManager *lock_manager, LogManager *log_manager) -> bool { + BUSTUB_ASSERT(new_tuple.size_ > 0, "Cannot have empty tuples."); + uint32_t slot_num = rid.GetSlotNum(); + // If the slot number is invalid, abort the transaction. + if (slot_num >= GetTupleCount()) { + if (enable_logging) { + txn->SetState(TransactionState::ABORTED); + } + return false; + } + uint32_t tuple_size = GetTupleSize(slot_num); + // If the tuple is deleted, abort the transaction. + if (IsDeleted(tuple_size)) { + if (enable_logging) { + txn->SetState(TransactionState::ABORTED); + } + return false; } - auto &[offset, size, old_meta] = tuple_info_[tuple_id]; - if (!old_meta.is_deleted_ && meta.is_deleted_) { - num_deleted_tuples_++; + // If there is not enuogh space to update, we need to update via delete followed by an insert (not enough space). + if (GetFreeSpaceRemaining() + tuple_size < new_tuple.size_) { + return false; } - tuple_info_[tuple_id] = std::make_tuple(offset, size, meta); + + // Copy out the old value. + uint32_t tuple_offset = GetTupleOffsetAtSlot(slot_num); + old_tuple->size_ = tuple_size; + if (old_tuple->allocated_) { + delete[] old_tuple->data_; + } + old_tuple->data_ = new char[old_tuple->size_]; + memcpy(old_tuple->data_, GetData() + tuple_offset, old_tuple->size_); + old_tuple->rid_ = rid; + old_tuple->allocated_ = true; + + /** + * Removed to support new lock manager API for p4 (multilevel locking); Big hack energy + * This clause was used in logging and recovery projects previously; not being used right now + */ + // if (enable_logging) { + // // Acquire an exclusive lock, upgrading from shared if necessary. + // if (txn->IsSharedLocked(rid)) { + // if (!lock_manager->LockUpgrade(txn, rid)) { + // return false; + // } + // } else if (!txn->IsExclusiveLocked(rid) && !lock_manager->LockExclusive(txn, rid)) { + // return false; + // } + // LogRecord log_record(txn->GetTransactionId(), txn->GetPrevLSN(), LogRecordType::UPDATE, rid, *old_tuple, + // new_tuple); lsn_t lsn = log_manager->AppendLogRecord(&log_record); SetLSN(lsn); txn->SetPrevLSN(lsn); + // } + + // Perform the update. + uint32_t free_space_pointer = GetFreeSpacePointer(); + BUSTUB_ASSERT(tuple_offset >= free_space_pointer, "Offset should appear after current free space position."); + + memmove(GetData() + free_space_pointer + tuple_size - new_tuple.size_, GetData() + free_space_pointer, + tuple_offset - free_space_pointer); + SetFreeSpacePointer(free_space_pointer + tuple_size - new_tuple.size_); + memcpy(GetData() + tuple_offset + tuple_size - new_tuple.size_, new_tuple.data_, new_tuple.size_); + SetTupleSize(slot_num, new_tuple.size_); + + // Update all tuple offsets. + for (uint32_t i = 0; i < GetTupleCount(); ++i) { + uint32_t tuple_offset_i = GetTupleOffsetAtSlot(i); + if (GetTupleSize(i) > 0 && tuple_offset_i < tuple_offset + tuple_size) { + SetTupleOffsetAtSlot(i, tuple_offset_i + tuple_size - new_tuple.size_); + } + } + return true; } -auto TablePage::GetTuple(const RID &rid) const -> std::pair { - auto tuple_id = rid.GetSlotNum(); - if (tuple_id >= num_tuples_) { - throw bustub::Exception("Tuple ID out of range"); - } - auto &[offset, size, meta] = tuple_info_[tuple_id]; - Tuple tuple; - tuple.data_.resize(size); - memmove(tuple.data_.data(), page_start_ + offset, size); - tuple.rid_ = rid; - return std::make_pair(meta, std::move(tuple)); +void TablePage::ApplyDelete(const RID &rid, Transaction *txn, LogManager *log_manager) { + uint32_t slot_num = rid.GetSlotNum(); + BUSTUB_ASSERT(slot_num < GetTupleCount(), "Cannot have more slots than tuples."); + + uint32_t tuple_offset = GetTupleOffsetAtSlot(slot_num); + uint32_t tuple_size = GetTupleSize(slot_num); + // Check if this is a delete operation, i.e. commit a delete. + if (IsDeleted(tuple_size)) { + tuple_size = UnsetDeletedFlag(tuple_size); + } + // Otherwise we are rolling back an insert. + + // We need to copy out the deleted tuple for undo purposes. + Tuple delete_tuple; + delete_tuple.size_ = tuple_size; + delete_tuple.data_ = new char[delete_tuple.size_]; + memcpy(delete_tuple.data_, GetData() + tuple_offset, delete_tuple.size_); + delete_tuple.rid_ = rid; + delete_tuple.allocated_ = true; + + /** + * Removed to support new lock manager API for p4 (multilevel locking); Big hack energy + * This clause was used in logging and recovery projects previously; not being used right now + */ + // if (enable_logging) { + // BUSTUB_ASSERT(txn->IsExclusiveLocked(rid), "We must own the exclusive lock!"); + // + // LogRecord log_record(txn->GetTransactionId(), txn->GetPrevLSN(), LogRecordType::APPLYDELETE, rid, delete_tuple); + // lsn_t lsn = log_manager->AppendLogRecord(&log_record); + // SetLSN(lsn); + // txn->SetPrevLSN(lsn); + // } + + uint32_t free_space_pointer = GetFreeSpacePointer(); + BUSTUB_ASSERT(tuple_offset >= free_space_pointer, "Free space appears before tuples."); + + memmove(GetData() + free_space_pointer + tuple_size, GetData() + free_space_pointer, + tuple_offset - free_space_pointer); + SetFreeSpacePointer(free_space_pointer + tuple_size); + SetTupleSize(slot_num, 0); + SetTupleOffsetAtSlot(slot_num, 0); + + // Update all tuple offsets. + for (uint32_t i = 0; i < GetTupleCount(); ++i) { + uint32_t tuple_offset_i = GetTupleOffsetAtSlot(i); + if (GetTupleSize(i) != 0 && tuple_offset_i < tuple_offset) { + SetTupleOffsetAtSlot(i, tuple_offset_i + tuple_size); + } + } } -auto TablePage::GetTupleMeta(const RID &rid) const -> TupleMeta { - auto tuple_id = rid.GetSlotNum(); - if (tuple_id >= num_tuples_) { - throw bustub::Exception("Tuple ID out of range"); +void TablePage::RollbackDelete(const RID &rid, Transaction *txn, LogManager *log_manager) { + // Log the rollback. + /** + * Removed to support new lock manager API for p4 (multilevel locking); Big hack energy + * This clause was used in logging and recovery projects previously; not being used right now + */ + // if (enable_logging) { + // BUSTUB_ASSERT(txn->IsExclusiveLocked(rid), "We must own an exclusive lock on the RID."); + // Tuple dummy_tuple; + // LogRecord log_record(txn->GetTransactionId(), txn->GetPrevLSN(), LogRecordType::ROLLBACKDELETE, rid, + // dummy_tuple); lsn_t lsn = log_manager->AppendLogRecord(&log_record); SetLSN(lsn); txn->SetPrevLSN(lsn); + // } + + uint32_t slot_num = rid.GetSlotNum(); + BUSTUB_ASSERT(slot_num < GetTupleCount(), "We can't have more slots than tuples."); + uint32_t tuple_size = GetTupleSize(slot_num); + + // Unset the deleted flag. + if (IsDeleted(tuple_size)) { + SetTupleSize(slot_num, UnsetDeletedFlag(tuple_size)); } - auto &[_1, _2, meta] = tuple_info_[tuple_id]; - return meta; } -void TablePage::UpdateTupleInPlaceUnsafe(const TupleMeta &meta, const Tuple &tuple, RID rid) { - auto tuple_id = rid.GetSlotNum(); - if (tuple_id >= num_tuples_) { - throw bustub::Exception("Tuple ID out of range"); +auto TablePage::GetTuple(const RID &rid, Tuple *tuple, Transaction *txn, LockManager *lock_manager) -> bool { + // Get the current slot number. + uint32_t slot_num = rid.GetSlotNum(); + // If somehow we have more slots than tuples, abort the transaction. + if (slot_num >= GetTupleCount()) { + if (enable_logging) { + txn->SetState(TransactionState::ABORTED); + } + return false; } - auto &[offset, size, old_meta] = tuple_info_[tuple_id]; - if (size != tuple.GetLength()) { - throw bustub::Exception("Tuple size mismatch"); + // Otherwise get the current tuple size too. + uint32_t tuple_size = GetTupleSize(slot_num); + // If the tuple is deleted, abort the transaction. + if (IsDeleted(tuple_size)) { + if (enable_logging) { + txn->SetState(TransactionState::ABORTED); + } + return false; } - if (!old_meta.is_deleted_ && meta.is_deleted_) { - num_deleted_tuples_++; + + /** + * Removed to support new lock manager API for p4 (multilevel locking); Big hack energy + * This clause was used in logging and recovery projects previously; not being used right now + */ + // // Otherwise we have a valid tuple, try to acquire at least a shared lock. + // if (enable_logging) { + // if (!txn->IsSharedLocked(rid) && !txn->IsExclusiveLocked(rid) && !lock_manager->LockShared(txn, rid)) { + // return false; + // } + // } + + // At this point, we have at least a shared lock on the RID. Copy the tuple data into our result. + uint32_t tuple_offset = GetTupleOffsetAtSlot(slot_num); + tuple->size_ = tuple_size; + if (tuple->allocated_) { + delete[] tuple->data_; } - tuple_info_[tuple_id] = std::make_tuple(offset, size, meta); - memcpy(page_start_ + offset, tuple.data_.data(), tuple.GetLength()); + tuple->data_ = new char[tuple->size_]; + memcpy(tuple->data_, GetData() + tuple_offset, tuple->size_); + tuple->rid_ = rid; + tuple->allocated_ = true; + return true; } +auto TablePage::GetFirstTupleRid(RID *first_rid) -> bool { + // Find and return the first valid tuple. + for (uint32_t i = 0; i < GetTupleCount(); ++i) { + if (!IsDeleted(GetTupleSize(i))) { + first_rid->Set(GetTablePageId(), i); + return true; + } + } + first_rid->Set(INVALID_PAGE_ID, 0); + return false; +} + +auto TablePage::GetNextTupleRid(const RID &cur_rid, RID *next_rid) -> bool { + BUSTUB_ASSERT(cur_rid.GetPageId() == GetTablePageId(), "Wrong table!"); + // Find and return the first valid tuple after our current slot number. + for (auto i = cur_rid.GetSlotNum() + 1; i < GetTupleCount(); ++i) { + if (!IsDeleted(GetTupleSize(i))) { + next_rid->Set(GetTablePageId(), i); + return true; + } + } + // Otherwise return false as there are no more tuples. + next_rid->Set(INVALID_PAGE_ID, 0); + return false; +} } // namespace bustub diff --git a/src/storage/table/table_heap.cpp b/src/storage/table/table_heap.cpp index 5aa226b..76e01ea 100644 --- a/src/storage/table/table_heap.cpp +++ b/src/storage/table/table_heap.cpp @@ -11,146 +11,191 @@ //===----------------------------------------------------------------------===// #include -#include // NOLINT -#include -#include "common/config.h" -#include "common/exception.h" #include "common/logger.h" -#include "common/macros.h" -#include "concurrency/transaction.h" #include "fmt/format.h" -#include "storage/page/page_guard.h" -#include "storage/page/table_page.h" #include "storage/table/table_heap.h" namespace bustub { -TableHeap::TableHeap(BufferPoolManager *bpm) : bpm_(bpm) { +TableHeap::TableHeap(BufferPoolManager *buffer_pool_manager, LockManager *lock_manager, LogManager *log_manager, + page_id_t first_page_id) + : buffer_pool_manager_(buffer_pool_manager), + lock_manager_(lock_manager), + log_manager_(log_manager), + first_page_id_(first_page_id) {} + +TableHeap::TableHeap(BufferPoolManager *buffer_pool_manager, LockManager *lock_manager, LogManager *log_manager, + Transaction *txn) + : buffer_pool_manager_(buffer_pool_manager), lock_manager_(lock_manager), log_manager_(log_manager) { // Initialize the first table page. - auto guard = bpm->NewPageGuarded(&first_page_id_); - last_page_id_ = first_page_id_; - auto first_page = guard.AsMut(); + auto first_page = reinterpret_cast(buffer_pool_manager_->NewPage(&first_page_id_)); BUSTUB_ASSERT(first_page != nullptr, "Couldn't create a page for the table heap. Have you completed the buffer pool manager project?"); - first_page->Init(); + first_page->Init(first_page_id_, BUSTUB_PAGE_SIZE, INVALID_LSN, log_manager_, txn); + buffer_pool_manager_->UnpinPage(first_page_id_, true); } -TableHeap::TableHeap(bool create_table_heap) : bpm_(nullptr) {} - -auto TableHeap::InsertTuple(const TupleMeta &meta, const Tuple &tuple, LockManager *lock_mgr, Transaction *txn, - table_oid_t oid) -> std::optional { - std::unique_lock guard(latch_); - auto page_guard = bpm_->FetchPageWrite(last_page_id_); - while (true) { - auto page = page_guard.AsMut(); - if (page->GetNextTupleOffset(meta, tuple) != std::nullopt) { - break; - } - - // if there's no tuple in the page, and we can't insert the tuple, then this tuple is too large. - BUSTUB_ENSURE(page->GetNumTuples() != 0, "tuple is too large, cannot insert"); - - page_id_t next_page_id = INVALID_PAGE_ID; - auto npg = bpm_->NewPage(&next_page_id); - BUSTUB_ENSURE(next_page_id != INVALID_PAGE_ID, "cannot allocate page"); - - page->SetNextPageId(next_page_id); - - auto next_page = reinterpret_cast(npg->GetData()); - next_page->Init(); - - page_guard.Drop(); - - auto next_page_guard = WritePageGuard{bpm_, npg}; - - last_page_id_ = next_page_id; - page_guard = std::move(next_page_guard); +auto TableHeap::InsertTuple(const Tuple &tuple, RID *rid, Transaction *txn) -> bool { + if (tuple.size_ + 32 > BUSTUB_PAGE_SIZE) { // larger than one page size + txn->SetState(TransactionState::ABORTED); + return false; } - auto last_page_id = last_page_id_; - - auto page = page_guard.AsMut(); - auto slot_id = *page->InsertTuple(meta, tuple); - // only allow one insertion at a time, otherwise it will deadlock. - guard.unlock(); - -#ifndef DISABLE_LOCK_MANAGER - if (lock_mgr != nullptr) { - BUSTUB_ENSURE(lock_mgr->LockRow(txn, LockManager::LockMode::EXCLUSIVE, oid, RID{last_page_id, slot_id}), - "failed to lock when inserting new tuple"); + auto cur_page = static_cast(buffer_pool_manager_->FetchPage(first_page_id_)); + if (cur_page == nullptr) { + txn->SetState(TransactionState::ABORTED); + return false; } -#endif - - page_guard.Drop(); - return RID(last_page_id, slot_id); + cur_page->WLatch(); + + // Insert into the first page with enough space. If no such page exists, create a new page and insert into that. + // INVARIANT: cur_page is WLatched if you leave the loop normally. + while (!cur_page->InsertTuple(tuple, rid, txn, lock_manager_, log_manager_)) { + auto next_page_id = cur_page->GetNextPageId(); + // If the next page is a valid page, + if (next_page_id != INVALID_PAGE_ID) { + auto next_page = static_cast(buffer_pool_manager_->FetchPage(next_page_id)); + next_page->WLatch(); + // Unlatch and unpin the current page. + cur_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(cur_page->GetTablePageId(), false); + cur_page = next_page; + } else { + // Otherwise we have run out of valid pages. We need to create a new page. + auto new_page = static_cast(buffer_pool_manager_->NewPage(&next_page_id)); + // If we could not create a new page, + if (new_page == nullptr) { + // Then life sucks and we abort the transaction. + cur_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(cur_page->GetTablePageId(), false); + txn->SetState(TransactionState::ABORTED); + return false; + } + // Otherwise we were able to create a new page. We initialize it now. + new_page->WLatch(); + cur_page->SetNextPageId(next_page_id); + new_page->Init(next_page_id, BUSTUB_PAGE_SIZE, cur_page->GetTablePageId(), log_manager_, txn); + cur_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(cur_page->GetTablePageId(), true); + cur_page = new_page; + } + } + // This line has caused most of us to double-take and "whoa double unlatch". + // We are not, in fact, double unlatching. See the invariant above. + cur_page->WUnlatch(); + buffer_pool_manager_->UnpinPage(cur_page->GetTablePageId(), true); + // Update the transaction's write set. + txn->GetWriteSet()->emplace_back(*rid, WType::INSERT, Tuple{}, this); + return true; } -void TableHeap::UpdateTupleMeta(const TupleMeta &meta, RID rid) { - auto page_guard = bpm_->FetchPageWrite(rid.GetPageId()); - auto page = page_guard.AsMut(); - page->UpdateTupleMeta(meta, rid); +auto TableHeap::MarkDelete(const RID &rid, Transaction *txn) -> bool { + // TODO(Amadou): remove empty page + // Find the page which contains the tuple. + auto page = reinterpret_cast(buffer_pool_manager_->FetchPage(rid.GetPageId())); + // If the page could not be found, then abort the transaction. + if (page == nullptr) { + txn->SetState(TransactionState::ABORTED); + return false; + } + // Otherwise, mark the tuple as deleted. + page->WLatch(); + page->MarkDelete(rid, txn, lock_manager_, log_manager_); + page->WUnlatch(); + buffer_pool_manager_->UnpinPage(page->GetTablePageId(), true); + // Update the transaction's write set. + txn->GetWriteSet()->emplace_back(rid, WType::DELETE, Tuple{}, this); + return true; } -auto TableHeap::GetTuple(RID rid) -> std::pair { - auto page_guard = bpm_->FetchPageRead(rid.GetPageId()); - auto page = page_guard.As(); - auto [meta, tuple] = page->GetTuple(rid); - tuple.rid_ = rid; - return std::make_pair(meta, std::move(tuple)); +auto TableHeap::UpdateTuple(const Tuple &tuple, const RID &rid, Transaction *txn) -> bool { + // Find the page which contains the tuple. + auto page = reinterpret_cast(buffer_pool_manager_->FetchPage(rid.GetPageId())); + // If the page could not be found, then abort the transaction. + if (page == nullptr) { + txn->SetState(TransactionState::ABORTED); + return false; + } + // Update the tuple; but first save the old value for rollbacks. + Tuple old_tuple; + page->WLatch(); + bool is_updated = page->UpdateTuple(tuple, &old_tuple, rid, txn, lock_manager_, log_manager_); + page->WUnlatch(); + buffer_pool_manager_->UnpinPage(page->GetTablePageId(), is_updated); + // Update the transaction's write set. + if (is_updated && txn->GetState() != TransactionState::ABORTED) { + txn->GetWriteSet()->emplace_back(rid, WType::UPDATE, old_tuple, this); + } + return is_updated; } -auto TableHeap::GetTupleMeta(RID rid) -> TupleMeta { - auto page_guard = bpm_->FetchPageRead(rid.GetPageId()); - auto page = page_guard.As(); - return page->GetTupleMeta(rid); +void TableHeap::ApplyDelete(const RID &rid, Transaction *txn) { + // Find the page which contains the tuple. + auto page = reinterpret_cast(buffer_pool_manager_->FetchPage(rid.GetPageId())); + BUSTUB_ASSERT(page != nullptr, "Couldn't find a page containing that RID."); + // Delete the tuple from the page. + page->WLatch(); + page->ApplyDelete(rid, txn, log_manager_); + /** Commented out to make compatible with p4; This is called only on commit or delete, which consequently unlocks the + * tuple; so should be fine */ + // lock_manager_->Unlock(txn, rid); + page->WUnlatch(); + buffer_pool_manager_->UnpinPage(page->GetTablePageId(), true); } -auto TableHeap::MakeIterator() -> TableIterator { - std::unique_lock guard(latch_); - auto last_page_id = last_page_id_; - guard.unlock(); - - auto page_guard = bpm_->FetchPageRead(last_page_id); - auto page = page_guard.As(); - auto num_tuples = page->GetNumTuples(); - page_guard.Drop(); - return {this, {first_page_id_, 0}, {last_page_id, num_tuples}}; +void TableHeap::RollbackDelete(const RID &rid, Transaction *txn) { + // Find the page which contains the tuple. + auto page = reinterpret_cast(buffer_pool_manager_->FetchPage(rid.GetPageId())); + BUSTUB_ASSERT(page != nullptr, "Couldn't find a page containing that RID."); + // Rollback the delete. + page->WLatch(); + page->RollbackDelete(rid, txn, log_manager_); + page->WUnlatch(); + buffer_pool_manager_->UnpinPage(page->GetTablePageId(), true); } -auto TableHeap::MakeEagerIterator() -> TableIterator { return {this, {first_page_id_, 0}, {INVALID_PAGE_ID, 0}}; } - -auto TableHeap::UpdateTupleInPlace(const TupleMeta &meta, const Tuple &tuple, RID rid, - std::function &&check) - -> bool { - auto page_guard = bpm_->FetchPageWrite(rid.GetPageId()); - auto page = page_guard.AsMut(); - auto [old_meta, old_tup] = page->GetTuple(rid); - if (check == nullptr || check(old_meta, old_tup, rid)) { - page->UpdateTupleInPlaceUnsafe(meta, tuple, rid); - return true; +auto TableHeap::GetTuple(const RID &rid, Tuple *tuple, Transaction *txn, bool acquire_read_lock) -> bool { + // Find the page which contains the tuple. + auto page = static_cast(buffer_pool_manager_->FetchPage(rid.GetPageId())); + // If the page could not be found, then abort the transaction. + if (page == nullptr) { + txn->SetState(TransactionState::ABORTED); + return false; } - return false; -} - -auto TableHeap::AcquireTablePageReadLock(RID rid) -> ReadPageGuard { return bpm_->FetchPageRead(rid.GetPageId()); } - -auto TableHeap::AcquireTablePageWriteLock(RID rid) -> WritePageGuard { return bpm_->FetchPageWrite(rid.GetPageId()); } - -void TableHeap::UpdateTupleInPlaceWithLockAcquired(const TupleMeta &meta, const Tuple &tuple, RID rid, - TablePage *page) { - page->UpdateTupleInPlaceUnsafe(meta, tuple, rid); + // Read the tuple from the page. + if (acquire_read_lock) { + page->RLatch(); + } + bool res = page->GetTuple(rid, tuple, txn, lock_manager_); + if (acquire_read_lock) { + page->RUnlatch(); + } + buffer_pool_manager_->UnpinPage(rid.GetPageId(), false); + return res; } -auto TableHeap::GetTupleWithLockAcquired(RID rid, const TablePage *page) -> std::pair { - auto [meta, tuple] = page->GetTuple(rid); - tuple.rid_ = rid; - return std::make_pair(meta, std::move(tuple)); +auto TableHeap::Begin(Transaction *txn) -> TableIterator { + // Start an iterator from the first page. + // TODO(Wuwen): Hacky fix for now. Removing empty pages is a better way to handle this. + RID rid; + auto page_id = first_page_id_; + while (page_id != INVALID_PAGE_ID) { + auto page = static_cast(buffer_pool_manager_->FetchPage(page_id)); + page->RLatch(); + // If this fails because there is no tuple, then RID will be the default-constructed value, which means EOF. + auto found_tuple = page->GetFirstTupleRid(&rid); + page->RUnlatch(); + buffer_pool_manager_->UnpinPage(page_id, false); + if (found_tuple) { + break; + } + page_id = page->GetNextPageId(); + } + return {this, rid, txn}; } -auto TableHeap::GetTupleMetaWithLockAcquired(RID rid, const TablePage *page) -> TupleMeta { - return page->GetTupleMeta(rid); -} +auto TableHeap::End() -> TableIterator { return {this, RID(INVALID_PAGE_ID, 0), nullptr}; } } // namespace bustub diff --git a/src/storage/table/table_iterator.cpp b/src/storage/table/table_iterator.cpp index 7d69b78..3d347bb 100644 --- a/src/storage/table/table_iterator.cpp +++ b/src/storage/table/table_iterator.cpp @@ -11,64 +11,73 @@ //===----------------------------------------------------------------------===// #include -#include -#include "common/config.h" #include "common/exception.h" #include "concurrency/transaction.h" #include "storage/table/table_heap.h" namespace bustub { -TableIterator::TableIterator(TableHeap *table_heap, RID rid, RID stop_at_rid) - : table_heap_(table_heap), rid_(rid), stop_at_rid_(stop_at_rid) { - // If the rid doesn't correspond to a tuple (i.e., the table has just been initialized), then - // we set rid_ to invalid. - if (rid.GetPageId() == INVALID_PAGE_ID) { - rid_ = RID{INVALID_PAGE_ID, 0}; - } else { - auto page_guard = table_heap_->bpm_->FetchPageRead(rid_.GetPageId()); - auto page = page_guard.As(); - if (rid_.GetSlotNum() >= page->GetNumTuples()) { - rid_ = RID{INVALID_PAGE_ID, 0}; +TableIterator::TableIterator(TableHeap *table_heap, RID rid, Transaction *txn) + : table_heap_(table_heap), tuple_(new Tuple(rid)), txn_(txn) { + if (rid.GetPageId() != INVALID_PAGE_ID) { + if (!table_heap_->GetTuple(tuple_->rid_, tuple_, txn_)) { + throw bustub::Exception("read non-existing tuple"); } } } -auto TableIterator::GetTuple() -> std::pair { return table_heap_->GetTuple(rid_); } - -auto TableIterator::GetRID() -> RID { return rid_; } +auto TableIterator::operator*() -> const Tuple & { + assert(*this != table_heap_->End()); + return *tuple_; +} -auto TableIterator::IsEnd() -> bool { return rid_.GetPageId() == INVALID_PAGE_ID; } +auto TableIterator::operator->() -> Tuple * { + assert(*this != table_heap_->End()); + return tuple_; +} auto TableIterator::operator++() -> TableIterator & { - auto page_guard = table_heap_->bpm_->FetchPageRead(rid_.GetPageId()); - auto page = page_guard.As(); - auto next_tuple_id = rid_.GetSlotNum() + 1; + BufferPoolManager *buffer_pool_manager = table_heap_->buffer_pool_manager_; + auto cur_page = static_cast(buffer_pool_manager->FetchPage(tuple_->rid_.GetPageId())); + BUSTUB_ENSURE(cur_page != nullptr, "BPM full"); // all pages are pinned - if (stop_at_rid_.GetPageId() != INVALID_PAGE_ID) { - BUSTUB_ASSERT( - /* case 1: cursor before the page of the stop tuple */ rid_.GetPageId() < stop_at_rid_.GetPageId() || - /* case 2: cursor at the page before the tuple */ - (rid_.GetPageId() == stop_at_rid_.GetPageId() && next_tuple_id <= stop_at_rid_.GetSlotNum()), - "iterate out of bound"); + cur_page->RLatch(); + RID next_tuple_rid; + if (!cur_page->GetNextTupleRid(tuple_->rid_, + &next_tuple_rid)) { // end of this page + while (cur_page->GetNextPageId() != INVALID_PAGE_ID) { + auto next_page = static_cast(buffer_pool_manager->FetchPage(cur_page->GetNextPageId())); + cur_page->RUnlatch(); + buffer_pool_manager->UnpinPage(cur_page->GetTablePageId(), false); + cur_page = next_page; + cur_page->RLatch(); + if (cur_page->GetFirstTupleRid(&next_tuple_rid)) { + break; + } + } } + tuple_->rid_ = next_tuple_rid; - rid_ = RID{rid_.GetPageId(), next_tuple_id}; - - if (rid_ == stop_at_rid_) { - rid_ = RID{INVALID_PAGE_ID, 0}; - } else if (next_tuple_id < page->GetNumTuples()) { - // that's fine - } else { - auto next_page_id = page->GetNextPageId(); - // if next page is invalid, RID is set to invalid page; otherwise, it's the first tuple in that page. - rid_ = RID{next_page_id, 0}; + if (*this != table_heap_->End()) { + // DO NOT ACQUIRE READ LOCK twice in a single thread otherwise it may deadlock. + // See https://users.rust-lang.org/t/how-bad-is-the-potential-deadlock-mentioned-in-rwlocks-document/67234 + if (!table_heap_->GetTuple(tuple_->rid_, tuple_, txn_, false)) { + cur_page->RUnlatch(); + buffer_pool_manager->UnpinPage(cur_page->GetTablePageId(), false); + throw bustub::Exception("read non-existing tuple"); + } } - - page_guard.Drop(); - + // release until copy the tuple + cur_page->RUnlatch(); + buffer_pool_manager->UnpinPage(cur_page->GetTablePageId(), false); return *this; } +auto TableIterator::operator++(int) -> TableIterator { + TableIterator clone(*this); + ++(*this); + return clone; +} + } // namespace bustub diff --git a/src/storage/table/tuple.cpp b/src/storage/table/tuple.cpp index de36e1c..79ca812 100644 --- a/src/storage/table/tuple.cpp +++ b/src/storage/table/tuple.cpp @@ -21,47 +21,83 @@ namespace bustub { // TODO(Amadou): It does not look like nulls are supported. Add a null bitmap? -Tuple::Tuple(std::vector values, const Schema *schema) { +Tuple::Tuple(std::vector values, const Schema *schema) : allocated_(true) { assert(values.size() == schema->GetColumnCount()); // 1. Calculate the size of the tuple. - uint32_t tuple_size = schema->GetInlinedStorageSize(); + uint32_t tuple_size = schema->GetLength(); for (auto &i : schema->GetUnlinedColumns()) { - auto len = values[i].GetStorageSize(); + auto len = values[i].GetLength(); if (len == BUSTUB_VALUE_NULL) { len = 0; } - tuple_size += sizeof(uint32_t) + len; + tuple_size += (len + sizeof(uint32_t)); } // 2. Allocate memory. - data_.resize(tuple_size); - std::fill(data_.begin(), data_.end(), 0); + size_ = tuple_size; + data_ = new char[size_]; + std::memset(data_, 0, size_); // 3. Serialize each attribute based on the input value. uint32_t column_count = schema->GetColumnCount(); - uint32_t offset = schema->GetInlinedStorageSize(); + uint32_t offset = schema->GetLength(); for (uint32_t i = 0; i < column_count; i++) { const auto &col = schema->GetColumn(i); if (!col.IsInlined()) { // Serialize relative offset, where the actual varchar data is stored. - *reinterpret_cast(data_.data() + col.GetOffset()) = offset; + *reinterpret_cast(data_ + col.GetOffset()) = offset; // Serialize varchar value, in place (size+data). - values[i].SerializeTo(data_.data() + offset); - auto len = values[i].GetStorageSize(); + values[i].SerializeTo(data_ + offset); + auto len = values[i].GetLength(); if (len == BUSTUB_VALUE_NULL) { len = 0; } - offset += sizeof(uint32_t) + len; + offset += (len + sizeof(uint32_t)); } else { - values[i].SerializeTo(data_.data() + col.GetOffset()); + values[i].SerializeTo(data_ + col.GetOffset()); } } } +Tuple::Tuple(const Tuple &other) : allocated_(other.allocated_), rid_(other.rid_), size_(other.size_) { + if (allocated_) { + delete[] data_; + } + if (allocated_) { + // Deep copy. + data_ = new char[size_]; + memcpy(data_, other.data_, size_); + } else { + // Shallow copy. + data_ = other.data_; + } +} + +auto Tuple::operator=(const Tuple &other) -> Tuple & { + if (allocated_) { + delete[] data_; + } + allocated_ = other.allocated_; + rid_ = other.rid_; + size_ = other.size_; + + if (allocated_) { + // Deep copy. + data_ = new char[size_]; + memcpy(data_, other.data_, size_); + } else { + // Shallow copy. + data_ = other.data_; + } + + return *this; +} + auto Tuple::GetValue(const Schema *schema, const uint32_t column_idx) const -> Value { assert(schema); + assert(data_); const TypeId column_type = schema->GetColumn(column_idx).GetType(); const char *data_ptr = GetDataPtr(schema, column_idx); // the third parameter "is_inlined" is unused @@ -80,16 +116,17 @@ auto Tuple::KeyFromTuple(const Schema &schema, const Schema &key_schema, const s auto Tuple::GetDataPtr(const Schema *schema, const uint32_t column_idx) const -> const char * { assert(schema); + assert(data_); const auto &col = schema->GetColumn(column_idx); bool is_inlined = col.IsInlined(); // For inline type, data is stored where it is. if (is_inlined) { - return (data_.data() + col.GetOffset()); + return (data_ + col.GetOffset()); } // We read the relative offset from the tuple data. - int32_t offset = *reinterpret_cast(data_.data() + col.GetOffset()); + int32_t offset = *reinterpret_cast(data_ + col.GetOffset()); // And return the beginning address of the real data for the VARCHAR type. - return (data_.data() + offset); + return (data_ + offset); } auto Tuple::ToString(const Schema *schema) const -> std::string { @@ -112,20 +149,26 @@ auto Tuple::ToString(const Schema *schema) const -> std::string { } } os << ")"; + os << " Tuple size is " << size_; return os.str(); } void Tuple::SerializeTo(char *storage) const { - int32_t sz = data_.size(); - memcpy(storage, &sz, sizeof(int32_t)); - memcpy(storage + sizeof(int32_t), data_.data(), sz); + memcpy(storage, &size_, sizeof(int32_t)); + memcpy(storage + sizeof(int32_t), data_, size_); } void Tuple::DeserializeFrom(const char *storage) { uint32_t size = *reinterpret_cast(storage); - this->data_.resize(size); - memcpy(this->data_.data(), storage + sizeof(int32_t), size); + // Construct a tuple. + this->size_ = size; + if (this->allocated_) { + delete[] this->data_; + } + this->data_ = new char[this->size_]; + memcpy(this->data_, storage + sizeof(int32_t), this->size_); + this->allocated_ = true; } } // namespace bustub diff --git a/src/type/CMakeLists.txt b/src/type/CMakeLists.txt index 23baca0..2cb28d5 100644 --- a/src/type/CMakeLists.txt +++ b/src/type/CMakeLists.txt @@ -11,8 +11,7 @@ add_library( tinyint_type.cpp type.cpp value.cpp - varlen_type.cpp - vector_type.cpp) + varlen_type.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/type/type.cpp b/src/type/type.cpp index 6d75c32..788efaa 100644 --- a/src/type/type.cpp +++ b/src/type/type.cpp @@ -17,25 +17,16 @@ #include "type/decimal_type.h" #include "type/integer_type.h" #include "type/smallint_type.h" -#include "type/timestamp_type.h" #include "type/tinyint_type.h" -#include "type/type_id.h" #include "type/value.h" #include "type/varlen_type.h" -#include "type/vector_type.h" namespace bustub { -Type *Type::k_types[] = {new Type(TypeId::INVALID), - new BooleanType(), - new TinyintType(), - new SmallintType(), - new IntegerType(TypeId::INTEGER), - new BigintType(), - new DecimalType(), - new VarlenType(TypeId::VARCHAR), - new TimestampType(), - new VectorType()}; +Type *Type::k_types[] = { + new Type(TypeId::INVALID), new BooleanType(), new TinyintType(), new SmallintType(), + new IntegerType(TypeId::INTEGER), new BigintType(), new DecimalType(), new VarlenType(TypeId::VARCHAR), +}; // Get the size of this data type in bytes auto Type::GetTypeSize(const TypeId type_id) -> uint64_t { @@ -124,8 +115,6 @@ auto Type::TypeIdToString(const TypeId type_id) -> std::string { return "TIMESTAMP"; case VARCHAR: return "VARCHAR"; - case VECTOR: - return "VECTOR"; default: return "INVALID"; } @@ -292,8 +281,13 @@ auto Type::GetData(const Value &val __attribute__((unused))) const -> const char } // Get the length of the variable length data -auto Type::GetStorageSize(const Value &val __attribute__((unused))) const -> uint32_t { - throw NotImplementedException("GetStorageSize not implemented"); +auto Type::GetLength(const Value &val __attribute__((unused))) const -> uint32_t { + throw NotImplementedException("GetLength not implemented"); +} + +// Access the raw varlen data stored from the tuple storage +auto Type::GetData(char *storage __attribute__((unused))) -> char * { + throw NotImplementedException("GetData not implemented"); } } // namespace bustub diff --git a/src/type/value.cpp b/src/type/value.cpp index e1aad6a..4ff5411 100644 --- a/src/type/value.cpp +++ b/src/type/value.cpp @@ -14,11 +14,8 @@ #include #include -#include "catalog/column.h" #include "common/exception.h" -#include "type/type.h" #include "type/value.h" -#include "type/vector_type.h" namespace bustub { Value::Value(const Value &other) { @@ -28,7 +25,6 @@ Value::Value(const Value &other) { value_ = other.value_; switch (type_id_) { case TypeId::VARCHAR: - case TypeId::VECTOR: if (size_.len_ == BUSTUB_VALUE_NULL) { value_.varlen_ = nullptr; } else { @@ -218,7 +214,6 @@ Value::Value(TypeId type, float f) : Value(type) { Value::Value(TypeId type, const char *data, uint32_t len, bool manage_data) : Value(type) { switch (type) { case TypeId::VARCHAR: - case TypeId::VECTOR: if (data == nullptr) { value_.varlen_ = nullptr; size_.len_ = BUSTUB_VALUE_NULL; @@ -259,27 +254,10 @@ Value::Value(TypeId type, const std::string &data) : Value(type) { } } -Value::Value(TypeId type, const std::vector &data) : Value(type) { - switch (type) { - case TypeId::VECTOR: { - manage_data_ = true; - auto len = data.size() * sizeof(double); - value_.varlen_ = new char[len]; - assert(value_.varlen_ != nullptr); - size_.len_ = len; - memcpy(value_.varlen_, data.data(), len); - break; - } - default: - throw Exception(ExceptionType::INCOMPATIBLE_TYPE, "Invalid Type for variable-length Value constructor"); - } -} - // delete allocated char array space Value::~Value() { switch (type_id_) { case TypeId::VARCHAR: - case TypeId::VECTOR: if (manage_data_) { delete[] value_.varlen_; } @@ -332,20 +310,4 @@ auto Value::CheckInteger() const -> bool { } return false; } - -auto Value::GetColumn() const -> Column { - switch (GetTypeId()) { - case TypeId::VARCHAR: - case TypeId::VECTOR: { - return Column{"", GetTypeId(), GetStorageSize()}; - } - default: - return Column{"", GetTypeId()}; - } -} - -auto Value::GetVector() const -> std::vector { - return reinterpret_cast(Type::GetInstance(type_id_))->GetVector(*this); -} - } // namespace bustub diff --git a/src/type/varlen_type.cpp b/src/type/varlen_type.cpp index 9206c69..871deec 100644 --- a/src/type/varlen_type.cpp +++ b/src/type/varlen_type.cpp @@ -20,18 +20,18 @@ namespace bustub { #define VARLEN_COMPARE_FUNC(OP) \ const char *str1 = left.GetData(); \ - uint32_t len1 = GetStorageSize(left) - 1; \ + uint32_t len1 = GetLength(left) - 1; \ const char *str2; \ uint32_t len2; \ if (right.GetTypeId() == TypeId::VARCHAR) { \ str2 = right.GetData(); \ - len2 = GetStorageSize(right) - 1; \ + len2 = GetLength(right) - 1; \ /* NOLINTNEXTLINE */ \ return GetCmpBool(TypeUtil::CompareStrings(str1, len1, str2, len2) OP 0); \ } else { \ auto r_value = right.CastAs(TypeId::VARCHAR); \ str2 = r_value.GetData(); \ - len2 = GetStorageSize(r_value) - 1; \ + len2 = GetLength(r_value) - 1; \ /* NOLINTNEXTLINE */ \ return GetCmpBool(TypeUtil::CompareStrings(str1, len1, str2, len2) OP 0); \ } @@ -44,13 +44,16 @@ VarlenType::~VarlenType() = default; auto VarlenType::GetData(const Value &val) const -> const char * { return val.value_.varlen_; } // Get the length of the variable length data (including the length field) -auto VarlenType::GetStorageSize(const Value &val) const -> uint32_t { return val.size_.len_; } +auto VarlenType::GetLength(const Value &val) const -> uint32_t { return val.size_.len_; } auto VarlenType::CompareEquals(const Value &left, const Value &right) const -> CmpBool { assert(left.CheckComparable(right)); if (left.IsNull() || right.IsNull()) { return CmpBool::CmpNull; } + if (GetLength(left) == BUSTUB_VARCHAR_MAX_LEN || GetLength(right) == BUSTUB_VARCHAR_MAX_LEN) { + return GetCmpBool(GetLength(left) == GetLength(right)); + } VARLEN_COMPARE_FUNC(==); // NOLINT } @@ -60,6 +63,9 @@ auto VarlenType::CompareNotEquals(const Value &left, const Value &right) const - if (left.IsNull() || right.IsNull()) { return CmpBool::CmpNull; } + if (GetLength(left) == BUSTUB_VARCHAR_MAX_LEN || GetLength(right) == BUSTUB_VARCHAR_MAX_LEN) { + return GetCmpBool(GetLength(left) != GetLength(right)); + } VARLEN_COMPARE_FUNC(!=); // NOLINT } @@ -69,6 +75,9 @@ auto VarlenType::CompareLessThan(const Value &left, const Value &right) const -> if (left.IsNull() || right.IsNull()) { return CmpBool::CmpNull; } + if (GetLength(left) == BUSTUB_VARCHAR_MAX_LEN || GetLength(right) == BUSTUB_VARCHAR_MAX_LEN) { + return GetCmpBool(GetLength(left) < GetLength(right)); + } VARLEN_COMPARE_FUNC(<); // NOLINT } @@ -78,6 +87,9 @@ auto VarlenType::CompareLessThanEquals(const Value &left, const Value &right) co if (left.IsNull() || right.IsNull()) { return CmpBool::CmpNull; } + if (GetLength(left) == BUSTUB_VARCHAR_MAX_LEN || GetLength(right) == BUSTUB_VARCHAR_MAX_LEN) { + return GetCmpBool(GetLength(left) <= GetLength(right)); + } VARLEN_COMPARE_FUNC(<=); // NOLINT } @@ -87,6 +99,9 @@ auto VarlenType::CompareGreaterThan(const Value &left, const Value &right) const if (left.IsNull() || right.IsNull()) { return CmpBool::CmpNull; } + if (GetLength(left) == BUSTUB_VARCHAR_MAX_LEN || GetLength(right) == BUSTUB_VARCHAR_MAX_LEN) { + return GetCmpBool(GetLength(left) > GetLength(right)); + } VARLEN_COMPARE_FUNC(>); // NOLINT } @@ -96,6 +111,9 @@ auto VarlenType::CompareGreaterThanEquals(const Value &left, const Value &right) if (left.IsNull() || right.IsNull()) { return CmpBool::CmpNull; } + if (GetLength(left) == BUSTUB_VARCHAR_MAX_LEN || GetLength(right) == BUSTUB_VARCHAR_MAX_LEN) { + return GetCmpBool(GetLength(left) >= GetLength(right)); + } VARLEN_COMPARE_FUNC(>=); // NOLINT } @@ -123,7 +141,7 @@ auto VarlenType::Max(const Value &left, const Value &right) const -> Value { } auto VarlenType::ToString(const Value &val) const -> std::string { - uint32_t len = GetStorageSize(val); + uint32_t len = GetLength(val); if (val.IsNull()) { return "varlen_null"; @@ -138,7 +156,7 @@ auto VarlenType::ToString(const Value &val) const -> std::string { } void VarlenType::SerializeTo(const Value &val, char *storage) const { - uint32_t len = GetStorageSize(val); + uint32_t len = GetLength(val); if (len == BUSTUB_VALUE_NULL) { memcpy(storage, &len, sizeof(uint32_t)); return; diff --git a/src/type/vector_type.cpp b/src/type/vector_type.cpp deleted file mode 100644 index 25a755e..0000000 --- a/src/type/vector_type.cpp +++ /dev/null @@ -1,117 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// BusTub -// -// varlen_type.cpp -// -// Identification: src/type/varlen_type.cpp -// -// Copyright (c) 2015-2019, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "common/exception.h" -#include "common/macros.h" -#include "type/type_id.h" -#include "type/type_util.h" -#include "type/vector_type.h" - -namespace bustub { - -VectorType::VectorType() : Type(TypeId::VECTOR) {} - -VectorType::~VectorType() = default; - -// Access the raw variable length data -auto VectorType::GetData(const Value &val) const -> const char * { return val.value_.varlen_; } - -auto VectorType::GetVector(const Value &val) const -> std::vector { - auto *base_ptr = reinterpret_cast(val.value_.varlen_); - auto size = val.size_.len_ / sizeof(double); - std::vector data; - data.reserve(size); - for (unsigned i = 0; i < size; i++) { - data.push_back(base_ptr[i]); - } - return data; -} - -// Get the length of the variable length data (including the length field) -auto VectorType::GetStorageSize(const Value &val) const -> uint32_t { return val.size_.len_; } - -auto VectorType::CompareEquals(const Value &left, const Value &right) const -> CmpBool { - UNIMPLEMENTED("vector type comparison not supported"); -} - -auto VectorType::CompareNotEquals(const Value &left, const Value &right) const -> CmpBool { - UNIMPLEMENTED("vector type comparison not supported"); -} - -auto VectorType::CompareLessThan(const Value &left, const Value &right) const -> CmpBool { - UNIMPLEMENTED("vector type comparison not supported"); -} - -auto VectorType::CompareLessThanEquals(const Value &left, const Value &right) const -> CmpBool { - UNIMPLEMENTED("vector type comparison not supported"); -} - -auto VectorType::CompareGreaterThan(const Value &left, const Value &right) const -> CmpBool { - UNIMPLEMENTED("vector type comparison not supported"); -} - -auto VectorType::CompareGreaterThanEquals(const Value &left, const Value &right) const -> CmpBool { - UNIMPLEMENTED("vector type comparison not supported"); -} - -auto VectorType::Min(const Value &left, const Value &right) const -> Value { - UNIMPLEMENTED("vector type comparison not supported"); -} - -auto VectorType::Max(const Value &left, const Value &right) const -> Value { - UNIMPLEMENTED("vector type comparison not supported"); -} - -auto VectorType::ToString(const Value &val) const -> std::string { - uint32_t len = GetStorageSize(val); - - if (val.IsNull()) { - return "vector_null"; - } - if (len == BUSTUB_VARCHAR_MAX_LEN) { - return "vector_max"; - } - if (len == 0) { - return ""; - } - return fmt::format("[{}]", fmt::join(GetVector(val), ",")); -} - -void VectorType::SerializeTo(const Value &val, char *storage) const { - uint32_t len = GetStorageSize(val); - if (len == BUSTUB_VALUE_NULL) { - memcpy(storage, &len, sizeof(uint32_t)); - return; - } - memcpy(storage, &len, sizeof(uint32_t)); - memcpy(storage + sizeof(uint32_t), val.value_.varlen_, len); -} - -// Deserialize a value of the given type from the given storage space. -auto VectorType::DeserializeFrom(const char *storage) const -> Value { - uint32_t len = *reinterpret_cast(storage); - if (len == BUSTUB_VALUE_NULL) { - return {type_id_, nullptr, len, false}; - } - // set manage_data as true - return {type_id_, storage + sizeof(uint32_t), len, true}; -} - -auto VectorType::Copy(const Value &val) const -> Value { return {val}; } - -auto VectorType::CastAs(const Value &value, const TypeId type_id) const -> Value { - UNIMPLEMENTED("vector type cast not supported"); -} -} // namespace bustub