-
Notifications
You must be signed in to change notification settings - Fork 197
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Document cuda::maximum, cuda::minimum (#3883)
- Loading branch information
Showing
2 changed files
with
83 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,41 @@ | ||
.. _libcudacxx-extended-api-functional: | ||
|
||
Function wrapper | ||
----------------- | ||
Functional | ||
---------- | ||
|
||
.. toctree:: | ||
:hidden: | ||
:maxdepth: 1 | ||
|
||
functional/proclaim_return_type | ||
functional/get_device_address | ||
functional/maximum_minimum | ||
|
||
.. list-table:: | ||
:widths: 25 45 30 | ||
:widths: 25 45 30 30 | ||
:header-rows: 0 | ||
|
||
* - :ref:`cuda::maximum <libcudacxx-extended-api-functional-maximum-minimum>` | ||
- Returns the maximum of two values | ||
- CCCL 2.8.0 | ||
- CUDA 12.9 | ||
|
||
* - :ref:`cuda::minimum <libcudacxx-extended-api-functional-maximum-minimum>` | ||
- Returns the minimum of two values | ||
- CCCL 2.8.0 | ||
- CUDA 12.9 | ||
|
||
* - :ref:`cuda::proclaim_return_type <libcudacxx-extended-api-functional-proclaim-return-type>` | ||
- Creates a forwarding call wrapper that proclaims return type | ||
- libcu++ 1.9.0 / CCCL 2.0.0 / CUDA 11.8 | ||
- libcu++ 1.9.0 / CCCL 2.0.0 | ||
- CUDA 11.8 | ||
|
||
* - ``cuda::proclaim_copyable_arguments`` | ||
- Creates a forwarding call wrapper that proclaims that arguments can be freely copied before an invocation of the wrapped callable | ||
- CCCL 2.8.0 | ||
- CUDA 12.9 | ||
|
||
* - :ref:`cuda::get_device_address <libcudacxx-extended-api-functional-get-device-address>` | ||
- Returns a valid address to a device object | ||
- CCCL 2.8.0 | ||
- CUDA 12.9 |
65 changes: 65 additions & 0 deletions
65
docs/libcudacxx/extended_api/functional/maximum_minimum.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
.. _libcudacxx-extended-api-functional-maximum-minimum: | ||
|
||
``cuda::maximum`` and ``cuda::minimum`` | ||
======================================= | ||
|
||
.. code:: cuda | ||
template <typename T> | ||
struct maximum { | ||
[[nodiscard]] __host__ __device__ inline | ||
T operator()(T a, T b) const; | ||
}; | ||
template <> | ||
struct maximum<void> { | ||
template <typename T1, typename T2> | ||
[[nodiscard]] __host__ __device__ inline | ||
cuda::std::common_type_t<T1, T2> operator()(T1 a, T2 b) const; | ||
}; | ||
template <typename T> | ||
struct minimum { | ||
[[nodiscard]] __host__ __device__ inline | ||
T operator()(T a, T b) const; | ||
}; | ||
template <> | ||
struct minimum<void> { | ||
template <typename T1, typename T2> | ||
[[nodiscard]] __host__ __device__ inline | ||
cuda::std::common_type_t<T1, T2> operator()(T1 a, T2 b) const; | ||
}; | ||
Function objects for performing maximum and minimum. The functions behave as ``noexcept`` when the comparison between the values is also ``noexcept``. | ||
|
||
.. note:: | ||
|
||
Differently from ``std::plus`` and other functional operators, ``cuda::maximum`` and ``cuda::minimum`` specialized for ``void`` returns ``cuda::std::common_type_t`` and not the implicit promotion | ||
|
||
Example | ||
------- | ||
|
||
.. code:: cuda | ||
#include <cuda/functional> | ||
#include <cuda/std/cstdint> | ||
#include <cstdio> | ||
#include <numeric> | ||
__global__ void maximum_minimum_kernel() { | ||
uint16_t v1 = 7; | ||
uint16_t v2 = 3; | ||
printf("%d\n", cuda::maximum<uint16_t>{}(v1, v2)); // print "7" (uint16_t) | ||
printf("%d\n", cuda::minimum{}(v1, v2)); // print "3" (int) | ||
} | ||
int main() { | ||
maximum_minimum_kernel<<<1, 1>>>(); | ||
cudaDeviceSynchronize(); | ||
int array[] = {3, 7, 5, 2}; | ||
printf("%d\n", std::accumulate(array, array + 4, 0, cuda::maximum{})); // 7 | ||
return 0; | ||
} | ||
`See it on Godbolt 🔗 <https://godbolt.org/z/44fdTerre>`_ |