From 611e96edeb197d17de1bf1051c45f8f30f4396d2 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 15 Nov 2024 13:20:37 -0500 Subject: [PATCH 1/3] Move make_strings_column benchmark to nvbench --- cpp/benchmarks/CMakeLists.txt | 3 +- cpp/benchmarks/string/factory.cpp | 60 +++++++++++++++++++++++++++++++ cpp/benchmarks/string/factory.cu | 43 +++++++++++++--------- 3 files changed, 88 insertions(+), 18 deletions(-) create mode 100644 cpp/benchmarks/string/factory.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index ae78b206810..e7a13e08e98 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -355,7 +355,7 @@ ConfigureNVBench( # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- ConfigureBench( - STRINGS_BENCH string/factory.cu string/repeat_strings.cpp string/replace.cpp string/translate.cpp + STRINGS_BENCH string/repeat_strings.cpp string/replace.cpp string/translate.cpp string/url_decode.cu ) @@ -374,6 +374,7 @@ ConfigureNVBench( string/copy_range.cpp string/count.cpp string/extract.cpp + string/factory.cpp string/filter.cpp string/find.cpp string/find_multiple.cpp diff --git a/cpp/benchmarks/string/factory.cpp b/cpp/benchmarks/string/factory.cpp new file mode 100644 index 00000000000..03870b0ae23 --- /dev/null +++ b/cpp/benchmarks/string/factory.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +static void bench_factory(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); + auto const sv = cudf::strings_column_view(column->view()); + + auto stream = cudf::get_default_stream(); + auto mr = cudf::get_current_device_resource_ref(); + auto d_strings = cudf::strings::detail::create_string_vector_from_column(sv, stream, mr); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = sv.chars_size(stream); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::make_strings_column(d_strings, cudf::string_view{nullptr, 0}); + }); +} + +NVBENCH_BENCH(bench_factory) + .set_name("factory") + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}); diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu index c4e74c4d97e..271301d1dad 100644 --- a/cpp/benchmarks/string/factory.cu +++ b/cpp/benchmarks/string/factory.cu @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "string_bench_args.hpp" #include @@ -22,6 +21,8 @@ #include +#include +#include #include #include #include @@ -34,15 +35,16 @@ #include -namespace { -using string_pair = thrust::pair; -struct string_view_to_pair { - __device__ string_pair operator()(thrust::pair const& p) - { - return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, 0}; - } -}; -} // namespace +// namespace { +// using string_pair = thrust::pair; +// struct string_view_to_pair { +// string_pair operator()(thrust::pair const& p) +// { +// return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, +// 0}; +// } +// }; +// } // namespace class StringsFactory : public cudf::benchmark {}; @@ -54,16 +56,23 @@ static void BM_factory(benchmark::State& state) cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); auto d_column = cudf::column_device_view::create(column->view()); - rmm::device_uvector pairs(d_column->size(), cudf::get_default_stream()); - thrust::transform(thrust::device, - d_column->pair_begin(), - d_column->pair_end(), - pairs.data(), - string_view_to_pair{}); + auto stream = cudf::get_default_stream(); + auto mr = cudf::get_current_device_resource_ref(); + auto d_strings = cudf::strings::detail::create_string_vector_from_column( + cudf::strings_column_view(column->view()), stream, mr); + + // rmm::device_uvector pairs(d_column->size(), cudf::get_default_stream()); + // thrust::transform(thrust::device, + // d_column->pair_begin(), + // d_column->pair_end(), + // pairs.data(), + // string_view_to_pair{}); + // for (auto _ : state) { cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::make_strings_column(pairs, cudf::get_default_stream()); + // cudf::make_strings_column(pairs, cudf::get_default_stream()); + cudf::make_strings_column(d_strings, cudf::string_view{nullptr, 0}); } cudf::strings_column_view input(column->view()); From 972ba61a94b69ccc081c539a302157c504187db4 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 15 Nov 2024 13:23:02 -0500 Subject: [PATCH 2/3] remove factory.cu file --- cpp/benchmarks/string/factory.cu | 101 ------------------------------- 1 file changed, 101 deletions(-) delete mode 100644 cpp/benchmarks/string/factory.cu diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu deleted file mode 100644 index 271301d1dad..00000000000 --- a/cpp/benchmarks/string/factory.cu +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "string_bench_args.hpp" - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include - -// namespace { -// using string_pair = thrust::pair; -// struct string_view_to_pair { -// string_pair operator()(thrust::pair const& p) -// { -// return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, -// 0}; -// } -// }; -// } // namespace - -class StringsFactory : public cudf::benchmark {}; - -static void BM_factory(benchmark::State& state) -{ - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; - data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); - auto d_column = cudf::column_device_view::create(column->view()); - - auto stream = cudf::get_default_stream(); - auto mr = cudf::get_current_device_resource_ref(); - auto d_strings = cudf::strings::detail::create_string_vector_from_column( - cudf::strings_column_view(column->view()), stream, mr); - - // rmm::device_uvector pairs(d_column->size(), cudf::get_default_stream()); - // thrust::transform(thrust::device, - // d_column->pair_begin(), - // d_column->pair_end(), - // pairs.data(), - // string_view_to_pair{}); - // - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - // cudf::make_strings_column(pairs, cudf::get_default_stream()); - cudf::make_strings_column(d_strings, cudf::string_view{nullptr, 0}); - } - - cudf::strings_column_view input(column->view()); - state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); -} - -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); -} - -#define STRINGS_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(StringsFactory, name) \ - (::benchmark::State & st) { BM_factory(st); } \ - BENCHMARK_REGISTER_F(StringsFactory, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -STRINGS_BENCHMARK_DEFINE(factory) From 89345465732fae6f46cd919c36342625e5efb6a0 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 18 Nov 2024 11:18:20 -0500 Subject: [PATCH 3/3] fix style violation --- cpp/benchmarks/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 00f2f95b17d..f50730b2e98 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -354,9 +354,7 @@ ConfigureNVBench( # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- -ConfigureBench( - STRINGS_BENCH string/repeat_strings.cpp string/replace.cpp string/url_decode.cu -) +ConfigureBench(STRINGS_BENCH string/repeat_strings.cpp string/replace.cpp string/url_decode.cu) ConfigureNVBench( STRINGS_NVBENCH