From 1286f87c787833f942d156d61ce31e45a4dc316f Mon Sep 17 00:00:00 2001 From: fbusato Date: Thu, 20 Feb 2025 12:07:00 -0800 Subject: [PATCH] document maximum, minimum --- docs/libcudacxx/extended_api/functional.rst | 22 +++++-- .../functional/maximum_minimum.rst | 65 +++++++++++++++++++ 2 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 docs/libcudacxx/extended_api/functional/maximum_minimum.rst diff --git a/docs/libcudacxx/extended_api/functional.rst b/docs/libcudacxx/extended_api/functional.rst index 16a15907c62..101f19a5153 100644 --- a/docs/libcudacxx/extended_api/functional.rst +++ b/docs/libcudacxx/extended_api/functional.rst @@ -1,7 +1,7 @@ .. _libcudacxx-extended-api-functional: -Function wrapper ------------------ +Functional +---------- .. toctree:: :hidden: @@ -9,19 +9,33 @@ Function wrapper functional/proclaim_return_type functional/get_device_address + functional/maximum_minimum .. list-table:: - :widths: 25 45 30 + :widths: 25 45 30 30 :header-rows: 0 + * - :ref:`cuda::maximum ` + - Returns the maximum of two values + - CCCL 2.8.0 + - CUDA 12.9 + + * - :ref:`cuda::minimum ` + - Returns the minimum of two values + - CCCL 2.8.0 + - CUDA 12.9 + * - :ref:`cuda::proclaim_return_type ` - Creates a forwarding call wrapper that proclaims return type - - libcu++ 1.9.0 / CCCL 2.0.0 / CUDA 11.8 + - libcu++ 1.9.0 / CCCL 2.0.0 + - CUDA 11.8 * - ``cuda::proclaim_copyable_arguments`` - Creates a forwarding call wrapper that proclaims that arguments can be freely copied before an invocation of the wrapped callable - CCCL 2.8.0 + - CUDA 12.9 * - :ref:`cuda::get_device_address ` - Returns a valid address to a device object - CCCL 2.8.0 + - CUDA 12.9 diff --git a/docs/libcudacxx/extended_api/functional/maximum_minimum.rst b/docs/libcudacxx/extended_api/functional/maximum_minimum.rst new file mode 100644 index 00000000000..5ba239f5509 --- /dev/null +++ b/docs/libcudacxx/extended_api/functional/maximum_minimum.rst @@ -0,0 +1,65 @@ +.. _libcudacxx-extended-api-functional-maximum-minimum: + +``cuda::maximum`` and ``cuda::minimum`` +======================================= + +.. code:: cuda + + template + struct maximum { + [[nodiscard]] __host__ __device__ inline + T operator()(T a, T b) const; + }; + + template <> + struct maximum { + template + [[nodiscard]] __host__ __device__ inline + cuda::std::common_type_t operator()(T1 a, T2 b) const; + }; + + template + struct minimum { + [[nodiscard]] __host__ __device__ inline + T operator()(T a, T b) const; + }; + + template <> + struct minimum { + template + [[nodiscard]] __host__ __device__ inline + cuda::std::common_type_t operator()(T1 a, T2 b) const; + }; + +Function objects for performing maximum and minimum. The functions behave as ``noexcept`` when the comparison between the values is also ``noexcept``. + +.. note:: + + Differently from ``std::plus`` and other functional operators, ``cuda::maximum`` and ``cuda::minimum`` specialized for ``void`` returns ``cuda::std::common_type_t`` and not the implicit promotion + +Example +------- + +.. code:: cuda + + #include + #include + #include + #include + + __global__ void maximum_minimum_kernel() { + uint16_t v1 = 7; + uint16_t v2 = 3; + printf("%d\n", cuda::maximum{}(v1, v2)); // print "7" (uint16_t) + printf("%d\n", cuda::minimum{}(v1, v2)); // print "3" (int) + } + + int main() { + maximum_minimum_kernel<<<1, 1>>>(); + cudaDeviceSynchronize(); + int array[] = {3, 7, 5, 2}; + printf("%d\n", std::accumulate(array, array + 4, 0, cuda::maximum{})); // 7 + return 0; + } + +`See it on Godbolt 🔗 `_