From effc21cee5e4e93e6e9bd8eccd385de02d465bc0 Mon Sep 17 00:00:00 2001 From: "Rossi(Ruoxi) Sun" Date: Tue, 26 Dec 2023 09:14:32 -0800 Subject: [PATCH] GH-39357: [C++] Reduce function.h includes (#39312) ### Rationale for this change As proposed in #36246 , by splitting function option structs from `function.h`, we can reduce the including of `function.h`. So that the total build time could be reduced. The total parser time could be reduced from 722.3s to 709.7s. And the `function.h` along with its transitive inclusion of `kernel.h` don't show up in expensive headers any more. The detailed analysis result before and after this PR are attached: [analyze-before.txt](https://github.com/apache/arrow/files/13756923/analyze-before.txt) [analyze-after.txt](https://github.com/apache/arrow/files/13756924/analyze-after.txt) Disclaimer (quote from https://github.com/apache/arrow/issues/36246#issuecomment-1866974963): > Note that the time diff is not absolute. The ClangBuildAnalyzer result differs from time to time. I guess it depends on the idle-ness of the building machine when doing the experiment. But the time reduction is almost certain, though sometimes more sometimes less. And the inclusion times of the questioning headers are reduced for sure, as shown in the attachments in my other comment. ### What changes are included in this PR? Move function option structs into own `compute/options.h`, and change including `function.h` to including `options.h` wherever fits. ### Are these changes tested? Build is testing. ### Are there any user-facing changes? There could be potential build failures for user code (quote from https://github.com/apache/arrow/issues/36246#issuecomment-1866980969): > The header function.h remains in compute/api.h, with and without this PR. The proposed PR removes function.h from api_xxx.h (then includes options.h instead), as proposed in the initial description of this issue. This results in compile failures for user code which includes only compute/api_xxx.h but not compute/api.h, and meanwhile uses CallFunction which is declared in function.h. But I think it's OK as described in https://github.com/apache/arrow/issues/36246#issuecomment-1867018578. * Closes: #39357 Authored-by: zanmato Signed-off-by: Felipe Oliveira Carvalho --- .../arrow/compute_and_write_csv_example.cc | 2 +- cpp/src/arrow/acero/aggregate_internal.cc | 1 + cpp/src/arrow/acero/scalar_aggregate_node.cc | 1 + cpp/src/arrow/compute/api.h | 21 +++-- cpp/src/arrow/compute/api_aggregate.h | 2 +- cpp/src/arrow/compute/api_scalar.h | 2 +- cpp/src/arrow/compute/api_vector.h | 3 +- cpp/src/arrow/compute/cast.h | 1 + cpp/src/arrow/compute/function.cc | 1 + cpp/src/arrow/compute/function.h | 46 +---------- cpp/src/arrow/compute/function_options.h | 81 +++++++++++++++++++ .../kernels/scalar_if_else_benchmark.cc | 1 + cpp/src/arrow/compute/kernels/vector_rank.cc | 1 + .../kernels/vector_replace_benchmark.cc | 1 + .../kernels/vector_run_end_encode_test.cc | 1 + .../arrow/compute/kernels/vector_select_k.cc | 1 + cpp/src/arrow/compute/kernels/vector_sort.cc | 1 + cpp/src/arrow/compute/registry_test.cc | 1 + cpp/src/arrow/compute/type_fwd.h | 1 + 19 files changed, 111 insertions(+), 58 deletions(-) create mode 100644 cpp/src/arrow/compute/function_options.h diff --git a/cpp/examples/arrow/compute_and_write_csv_example.cc b/cpp/examples/arrow/compute_and_write_csv_example.cc index edf21e45b2bb7..7e0f6cdf1ce16 100644 --- a/cpp/examples/arrow/compute_and_write_csv_example.cc +++ b/cpp/examples/arrow/compute_and_write_csv_example.cc @@ -16,7 +16,7 @@ // under the License. #include -#include +#include #include #include #include diff --git a/cpp/src/arrow/acero/aggregate_internal.cc b/cpp/src/arrow/acero/aggregate_internal.cc index 3cd5491720dcd..9c4b7fe5ae98c 100644 --- a/cpp/src/arrow/acero/aggregate_internal.cc +++ b/cpp/src/arrow/acero/aggregate_internal.cc @@ -25,6 +25,7 @@ #include "arrow/acero/exec_plan.h" #include "arrow/acero/options.h" #include "arrow/compute/exec.h" +#include "arrow/compute/function.h" #include "arrow/compute/registry.h" #include "arrow/compute/row/grouper.h" #include "arrow/datum.h" diff --git a/cpp/src/arrow/acero/scalar_aggregate_node.cc b/cpp/src/arrow/acero/scalar_aggregate_node.cc index ae59aa692096a..c7805f4d24eb2 100644 --- a/cpp/src/arrow/acero/scalar_aggregate_node.cc +++ b/cpp/src/arrow/acero/scalar_aggregate_node.cc @@ -25,6 +25,7 @@ #include "arrow/acero/options.h" #include "arrow/acero/util.h" #include "arrow/compute/exec.h" +#include "arrow/compute/function.h" #include "arrow/compute/registry.h" #include "arrow/compute/row/grouper.h" #include "arrow/datum.h" diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h index 5b5dfdf69eb94..b701d9928691f 100644 --- a/cpp/src/arrow/compute/api.h +++ b/cpp/src/arrow/compute/api.h @@ -20,18 +20,23 @@ #pragma once +/// \defgroup compute-functions Abstract compute function API +/// @{ +/// @} + /// \defgroup compute-concrete-options Concrete option classes for compute functions /// @{ /// @} -#include "arrow/compute/api_aggregate.h" // IWYU pragma: export -#include "arrow/compute/api_scalar.h" // IWYU pragma: export -#include "arrow/compute/api_vector.h" // IWYU pragma: export -#include "arrow/compute/cast.h" // IWYU pragma: export -#include "arrow/compute/function.h" // IWYU pragma: export -#include "arrow/compute/kernel.h" // IWYU pragma: export -#include "arrow/compute/registry.h" // IWYU pragma: export -#include "arrow/datum.h" // IWYU pragma: export +#include "arrow/compute/api_aggregate.h" // IWYU pragma: export +#include "arrow/compute/api_scalar.h" // IWYU pragma: export +#include "arrow/compute/api_vector.h" // IWYU pragma: export +#include "arrow/compute/cast.h" // IWYU pragma: export +#include "arrow/compute/function.h" // IWYU pragma: export +#include "arrow/compute/function_options.h" // IWYU pragma: export +#include "arrow/compute/kernel.h" // IWYU pragma: export +#include "arrow/compute/registry.h" // IWYU pragma: export +#include "arrow/datum.h" // IWYU pragma: export #include "arrow/compute/expression.h" // IWYU pragma: export diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h index 3493c3146310d..4d2c814a69bbb 100644 --- a/cpp/src/arrow/compute/api_aggregate.h +++ b/cpp/src/arrow/compute/api_aggregate.h @@ -22,7 +22,7 @@ #include -#include "arrow/compute/function.h" +#include "arrow/compute/function_options.h" #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 9f12471ddca14..26fbe64f74293 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -24,7 +24,7 @@ #include #include -#include "arrow/compute/function.h" +#include "arrow/compute/function_options.h" #include "arrow/compute/type_fwd.h" #include "arrow/datum.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 0233090ef6fb9..759f9e5c1a408 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -20,9 +20,8 @@ #include #include -#include "arrow/compute/function.h" +#include "arrow/compute/function_options.h" #include "arrow/compute/ordering.h" -#include "arrow/datum.h" #include "arrow/result.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h index 613e8a55addd2..18e56092dda2a 100644 --- a/cpp/src/arrow/compute/cast.h +++ b/cpp/src/arrow/compute/cast.h @@ -22,6 +22,7 @@ #include #include "arrow/compute/function.h" +#include "arrow/compute/function_options.h" #include "arrow/compute/type_fwd.h" #include "arrow/result.h" #include "arrow/status.h" diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc index c0433145dd1d0..e1a2e8c5d8879 100644 --- a/cpp/src/arrow/compute/function.cc +++ b/cpp/src/arrow/compute/function.cc @@ -26,6 +26,7 @@ #include "arrow/compute/exec.h" #include "arrow/compute/exec_internal.h" #include "arrow/compute/function_internal.h" +#include "arrow/compute/function_options.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/compute/registry.h" #include "arrow/datum.h" diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h index 333c9a65c56c4..be934a3c5abfc 100644 --- a/cpp/src/arrow/compute/function.h +++ b/cpp/src/arrow/compute/function.h @@ -36,53 +36,9 @@ namespace arrow { namespace compute { -/// \defgroup compute-functions Abstract compute function API -/// +/// \addtogroup compute-functions /// @{ -/// \brief Extension point for defining options outside libarrow (but -/// still within this project). -class ARROW_EXPORT FunctionOptionsType { - public: - virtual ~FunctionOptionsType() = default; - - virtual const char* type_name() const = 0; - virtual std::string Stringify(const FunctionOptions&) const = 0; - virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0; - virtual Result> Serialize(const FunctionOptions&) const; - virtual Result> Deserialize( - const Buffer& buffer) const; - virtual std::unique_ptr Copy(const FunctionOptions&) const = 0; -}; - -/// \brief Base class for specifying options configuring a function's behavior, -/// such as error handling. -class ARROW_EXPORT FunctionOptions : public util::EqualityComparable { - public: - virtual ~FunctionOptions() = default; - - const FunctionOptionsType* options_type() const { return options_type_; } - const char* type_name() const { return options_type()->type_name(); } - - bool Equals(const FunctionOptions& other) const; - std::string ToString() const; - std::unique_ptr Copy() const; - /// \brief Serialize an options struct to a buffer. - Result> Serialize() const; - /// \brief Deserialize an options struct from a buffer. - /// Note: this will only look for `type_name` in the default FunctionRegistry; - /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then - /// call FunctionOptionsType::Deserialize(). - static Result> Deserialize( - const std::string& type_name, const Buffer& buffer); - - protected: - explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {} - const FunctionOptionsType* options_type_; -}; - -ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*); - /// \brief Contains the number of required arguments for the function. /// /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity. diff --git a/cpp/src/arrow/compute/function_options.h b/cpp/src/arrow/compute/function_options.h new file mode 100644 index 0000000000000..88ec2fd2d0679 --- /dev/null +++ b/cpp/src/arrow/compute/function_options.h @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// NOTE: API is EXPERIMENTAL and will change without going through a +// deprecation cycle. + +#pragma once + +#include "arrow/compute/type_fwd.h" +#include "arrow/result.h" +#include "arrow/status.h" +#include "arrow/type_fwd.h" +#include "arrow/util/visibility.h" + +namespace arrow { +namespace compute { + +/// \addtogroup compute-functions +/// @{ + +/// \brief Extension point for defining options outside libarrow (but +/// still within this project). +class ARROW_EXPORT FunctionOptionsType { + public: + virtual ~FunctionOptionsType() = default; + + virtual const char* type_name() const = 0; + virtual std::string Stringify(const FunctionOptions&) const = 0; + virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0; + virtual Result> Serialize(const FunctionOptions&) const; + virtual Result> Deserialize( + const Buffer& buffer) const; + virtual std::unique_ptr Copy(const FunctionOptions&) const = 0; +}; + +/// \brief Base class for specifying options configuring a function's behavior, +/// such as error handling. +class ARROW_EXPORT FunctionOptions : public util::EqualityComparable { + public: + virtual ~FunctionOptions() = default; + + const FunctionOptionsType* options_type() const { return options_type_; } + const char* type_name() const { return options_type()->type_name(); } + + bool Equals(const FunctionOptions& other) const; + std::string ToString() const; + std::unique_ptr Copy() const; + /// \brief Serialize an options struct to a buffer. + Result> Serialize() const; + /// \brief Deserialize an options struct from a buffer. + /// Note: this will only look for `type_name` in the default FunctionRegistry; + /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then + /// call FunctionOptionsType::Deserialize(). + static Result> Deserialize( + const std::string& type_name, const Buffer& buffer); + + protected: + explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {} + const FunctionOptionsType* options_type_; +}; + +ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*); + +/// @} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc index b72402bbccd4e..58bc560f52842 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc @@ -21,6 +21,7 @@ #include "arrow/array/concatenate.h" #include "arrow/array/util.h" #include "arrow/compute/api_scalar.h" +#include "arrow/compute/function.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/util/key_value_metadata.h" diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc index 780ae25d96360..0cea7246e516c 100644 --- a/cpp/src/arrow/compute/kernels/vector_rank.cc +++ b/cpp/src/arrow/compute/kernels/vector_rank.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include "arrow/compute/function.h" #include "arrow/compute/kernels/vector_sort_internal.h" #include "arrow/compute/registry.h" diff --git a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc index 719969d46ea7c..971a841de0773 100644 --- a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc @@ -18,6 +18,7 @@ #include #include "arrow/array.h" +#include "arrow/datum.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc index 0bd8e3386e7cc..f02aee1b35996 100644 --- a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc @@ -21,6 +21,7 @@ #include "arrow/array/validate.h" #include "arrow/builder.h" #include "arrow/compute/api_vector.h" +#include "arrow/datum.h" #include "arrow/testing/gtest_util.h" #include "arrow/type_fwd.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc index 5000de8996280..1740a9b7f0bb4 100644 --- a/cpp/src/arrow/compute/kernels/vector_select_k.cc +++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc @@ -17,6 +17,7 @@ #include +#include "arrow/compute/function.h" #include "arrow/compute/kernels/vector_sort_internal.h" #include "arrow/compute/registry.h" diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index 8ddcbb9905cb2..e08a2bc10372f 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -17,6 +17,7 @@ #include +#include "arrow/compute/function.h" #include "arrow/compute/kernels/vector_sort_internal.h" #include "arrow/compute/registry.h" diff --git a/cpp/src/arrow/compute/registry_test.cc b/cpp/src/arrow/compute/registry_test.cc index 7fee136de7a0b..2d69f119df1f4 100644 --- a/cpp/src/arrow/compute/registry_test.cc +++ b/cpp/src/arrow/compute/registry_test.cc @@ -22,6 +22,7 @@ #include #include "arrow/compute/function.h" +#include "arrow/compute/function_options.h" #include "arrow/compute/registry.h" #include "arrow/result.h" #include "arrow/status.h" diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h index 3f990b1814311..89f32ceb0f906 100644 --- a/cpp/src/arrow/compute/type_fwd.h +++ b/cpp/src/arrow/compute/type_fwd.h @@ -27,6 +27,7 @@ struct TypeHolder; namespace compute { class Function; +class ScalarAggregateFunction; class FunctionExecutor; class FunctionOptions; class FunctionRegistry;