Skip to content

Commit

Permalink
added benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
shrshi committed Feb 26, 2025
1 parent 54c15b2 commit 6a3bd1d
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 2 deletions.
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ ConfigureNVBench(
# * join benchmark --------------------------------------------------------------------------------
ConfigureNVBench(
JOIN_NVBENCH join/left_join.cu join/conditional_join.cu join/join.cu join/mixed_join.cu
join/distinct_join.cu
join/distinct_join.cu join/cardinality_join.cu
)

# ##################################################################################################
Expand Down
81 changes: 81 additions & 0 deletions cpp/benchmarks/join/cardinality_join.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/join/join_common.hpp>

template <typename Key, bool Nullable>
void nvbench_inner_join(nvbench::state& state,
nvbench::type_list<Key, nvbench::enum_type<Nullable>>)
{
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const selectivity = static_cast<float>(state.get_float64("selectivity"));
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls) {
return cudf::inner_join(left_input, right_input, compare_nulls);
};
BM_join<Key, Nullable>(state, join, selectivity, cardinality);
}

template <typename Key, bool Nullable>
void nvbench_left_join(nvbench::state& state, nvbench::type_list<Key, nvbench::enum_type<Nullable>>)
{
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const selectivity = static_cast<float>(state.get_float64("selectivity"));
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls) {
return cudf::left_join(left_input, right_input, compare_nulls);
};
BM_join<Key, Nullable>(state, join, selectivity, cardinality);
}

template <typename Key, bool Nullable>
void nvbench_full_join(nvbench::state& state, nvbench::type_list<Key, nvbench::enum_type<Nullable>>)
{
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const selectivity = static_cast<float>(state.get_float64("selectivity"));
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls) {
return cudf::full_join(left_input, right_input, compare_nulls);
};
BM_join<Key, Nullable>(state, join, selectivity, cardinality);
}

NVBENCH_BENCH_TYPES(nvbench_inner_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE))
.set_name("low_cardinality_inner_join")
.set_type_axes_names({"Key", "Nullable"})
.add_int64_axis("left_size", JOIN_SIZE_RANGE)
.add_int64_axis("right_size", JOIN_SIZE_RANGE)
.add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000})
.add_float64_axis("selectivity", {0.3, 0.6, 0.9});

NVBENCH_BENCH_TYPES(nvbench_left_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE))
.set_name("low_cardinality_left_join")
.set_type_axes_names({"Key", "Nullable"})
.add_int64_axis("left_size", JOIN_SIZE_RANGE)
.add_int64_axis("right_size", JOIN_SIZE_RANGE)
.add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000})
.add_float64_axis("selectivity", {0.3, 0.6, 0.9});

NVBENCH_BENCH_TYPES(nvbench_full_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE))
.set_name("low_cardinality_full_join")
.set_type_axes_names({"Key", "Nullable"})
.add_int64_axis("left_size", JOIN_SIZE_RANGE)
.add_int64_axis("right_size", JOIN_SIZE_RANGE)
.add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000})
.add_float64_axis("selectivity", {0.3, 0.6, 0.9});
25 changes: 24 additions & 1 deletion cpp/benchmarks/join/join_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <cudf/filling.hpp>
#include <cudf/join.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/stream_compaction.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -60,12 +61,27 @@ struct null75_generator {

enum class join_t { CONDITIONAL, MIXED, HASH };

namespace {
void print_statistics(cudf::table_view t)
{
std::cout << "=====================================\n";
std::cout << "Number of rows = " << t.num_rows() << ", number of columns = " << t.num_columns()
<< "\n";
for (cudf::size_type i = 0; i < t.num_columns(); i++) {
auto num_unique =
cudf::distinct_count(t.column(i), cudf::null_policy::EXCLUDE, cudf::nan_policy::NAN_IS_NULL);
std::cout << "Number of unique elements in row " << i << " = " << num_unique << std::endl;
}
std::cout << "=====================================\n";
}
} // namespace

template <typename Key,
bool Nullable,
join_t join_type = join_t::HASH,
typename state_type,
typename Join>
void BM_join(state_type& state, Join JoinFunc)
void BM_join(state_type& state, Join JoinFunc, double selectivity = 0.3, int multiplicity = 1)
{
auto const right_size = static_cast<cudf::size_type>(state.get_int64("right_size"));
auto const left_size = static_cast<cudf::size_type>(state.get_int64("left_size"));
Expand All @@ -75,8 +91,10 @@ void BM_join(state_type& state, Join JoinFunc)
return;
}

/*
double const selectivity = 0.3;
int const multiplicity = 1;
*/

// Generate build and probe tables
auto right_random_null_mask = [](int size) {
Expand Down Expand Up @@ -147,6 +165,11 @@ void BM_join(state_type& state, Join JoinFunc)
cudf::table_view left_table(
{left_key_column0->view(), left_key_column1->view(), *left_payload_column});

std::cout << "Probe table stats\n";
print_statistics(left_table);
std::cout << "Build table stats\n";
print_statistics(right_table);

// Setup join parameters and result table
[[maybe_unused]] std::vector<cudf::size_type> columns_to_join = {0};
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
Expand Down

0 comments on commit 6a3bd1d

Please sign in to comment.