From fd0cd249ed993af0b1c3ab02e153bd57c412996f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Fri, 21 Feb 2025 13:04:28 -0600 Subject: [PATCH] Fix CCCL C headers to be compileable by C compiler (#3885) * Closes gh-3882 This change ensures that cccl/c/parallel headers are compilable by C compiler. 1. Corrected typedef struct and typedef enum so that C types have the same names as C++. That is `typdef struct name {...} otherName;` was changed to `typedef struct name {...} name;` 2. `noexcept` qualifier was removed from function declarations. 3. Implementation was changed to move `noexcept` implementations into a namespace, and implement publicly declared functions as thin shims calling these noexcept functions from a namespace. 4. Adds c/parallel/test/test_header.c This is a C file which includes all public headers, and contains a trivial `int main(void)` function. The file is compiled using C compiler to produce an object file. Hence it does not result in a dedicate test to run, but any non C-compliant changes to these headers would break test compilation step. N.B.: Every new header file must be included in `test_headers.c` file for it to be tested. 5. Piggy-backing on this PR, added runtime error condition checking in test_main.cpp to verify that `cudaSetDevice` call did not report an error. 6. Modified cccl/c/parallel CMake project list of languages to include C. * Revert using noexcept implementations and call them from C declared function Instead simply remove use of noexcept in C++ definitions of these functions per Bernhard's suggestion --- c/parallel/CMakeLists.txt | 2 +- c/parallel/include/cccl/c/for.h | 12 +++---- c/parallel/include/cccl/c/merge_sort.h | 10 +++--- c/parallel/include/cccl/c/reduce.h | 10 +++--- c/parallel/include/cccl/c/scan.h | 12 +++---- c/parallel/include/cccl/c/segmented_reduce.h | 19 ++++++----- c/parallel/include/cccl/c/types.h | 14 ++++----- c/parallel/src/for.cu | 28 +++++++---------- c/parallel/src/merge_sort.cu | 29 ++++++++--------- c/parallel/src/reduce.cu | 32 +++++++++---------- c/parallel/src/scan.cu | 33 ++++++++++---------- c/parallel/src/segmented_reduce.cu | 28 ++++++++--------- c/parallel/test/CMakeLists.txt | 20 ++++++++++++ c/parallel/test/test_header.c | 12 +++++++ c/parallel/test/test_main.cpp | 6 +++- 15 files changed, 146 insertions(+), 121 deletions(-) create mode 100644 c/parallel/test/test_header.c diff --git a/c/parallel/CMakeLists.txt b/c/parallel/CMakeLists.txt index 33a450fbc45..0406f54dcda 100644 --- a/c/parallel/CMakeLists.txt +++ b/c/parallel/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.21) -project(CCCL_C_Parallel LANGUAGES CUDA CXX) +project(CCCL_C_Parallel LANGUAGES CUDA CXX C) option(CCCL_C_Parallel_ENABLE_TESTING "Build CUDA Experimental's tests." OFF) option(CCCL_C_Parallel_ENABLE_HEADER_TESTING "Build CUDA Experimental's standalone headers." OFF) diff --git a/c/parallel/include/cccl/c/for.h b/c/parallel/include/cccl/c/for.h index 56d1ab3402f..625c4f0572e 100644 --- a/c/parallel/include/cccl/c/for.h +++ b/c/parallel/include/cccl/c/for.h @@ -21,14 +21,14 @@ CCCL_C_EXTERN_C_BEGIN -struct cccl_device_for_build_result_t +typedef struct cccl_device_for_build_result_t { int cc; void* cubin; size_t cubin_size; CUlibrary library; CUkernel static_kernel; -}; +} cccl_device_for_build_result_t; CCCL_C_API CUresult cccl_device_for_build( cccl_device_for_build_result_t* build, @@ -39,14 +39,10 @@ CCCL_C_API CUresult cccl_device_for_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept; + const char* ctk_path); CCCL_C_API CUresult cccl_device_for( - cccl_device_for_build_result_t build, - cccl_iterator_t d_data, - int64_t num_items, - cccl_op_t op, - CUstream stream) noexcept; + cccl_device_for_build_result_t build, cccl_iterator_t d_data, int64_t num_items, cccl_op_t op, CUstream stream); CCCL_C_API CUresult cccl_device_for_cleanup(cccl_device_for_build_result_t* bld_ptr); diff --git a/c/parallel/include/cccl/c/merge_sort.h b/c/parallel/include/cccl/c/merge_sort.h index 7dc35c97209..9bf5f4fd2f8 100644 --- a/c/parallel/include/cccl/c/merge_sort.h +++ b/c/parallel/include/cccl/c/merge_sort.h @@ -21,7 +21,7 @@ CCCL_C_EXTERN_C_BEGIN -struct cccl_device_merge_sort_build_result_t +typedef struct cccl_device_merge_sort_build_result_t { int cc; void* cubin; @@ -30,7 +30,7 @@ struct cccl_device_merge_sort_build_result_t CUkernel block_sort_kernel; CUkernel partition_kernel; CUkernel merge_kernel; -}; +} cccl_device_merge_sort_build_result_t; CCCL_C_API CUresult cccl_device_merge_sort_build( cccl_device_merge_sort_build_result_t* build, @@ -44,7 +44,7 @@ CCCL_C_API CUresult cccl_device_merge_sort_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept; + const char* ctk_path); CCCL_C_API CUresult cccl_device_merge_sort( cccl_device_merge_sort_build_result_t build, @@ -56,8 +56,8 @@ CCCL_C_API CUresult cccl_device_merge_sort( cccl_iterator_t d_out_items, unsigned long long num_items, cccl_op_t op, - CUstream stream) noexcept; + CUstream stream); -CCCL_C_API CUresult cccl_device_merge_sort_cleanup(cccl_device_merge_sort_build_result_t* bld_ptr) noexcept; +CCCL_C_API CUresult cccl_device_merge_sort_cleanup(cccl_device_merge_sort_build_result_t* bld_ptr); CCCL_C_EXTERN_C_END diff --git a/c/parallel/include/cccl/c/reduce.h b/c/parallel/include/cccl/c/reduce.h index 870f219c7a3..01c28f3736a 100644 --- a/c/parallel/include/cccl/c/reduce.h +++ b/c/parallel/include/cccl/c/reduce.h @@ -21,7 +21,7 @@ CCCL_C_EXTERN_C_BEGIN -struct cccl_device_reduce_build_result_t +typedef struct cccl_device_reduce_build_result_t { int cc; void* cubin; @@ -31,7 +31,7 @@ struct cccl_device_reduce_build_result_t CUkernel single_tile_kernel; CUkernel single_tile_second_kernel; CUkernel reduction_kernel; -}; +} cccl_device_reduce_build_result_t; // TODO return a union of nvtx/cuda/nvrtc errors or a string? CCCL_C_API CUresult cccl_device_reduce_build( @@ -45,7 +45,7 @@ CCCL_C_API CUresult cccl_device_reduce_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept; + const char* ctk_path); CCCL_C_API CUresult cccl_device_reduce( cccl_device_reduce_build_result_t build, @@ -56,8 +56,8 @@ CCCL_C_API CUresult cccl_device_reduce( unsigned long long num_items, cccl_op_t op, cccl_value_t init, - CUstream stream) noexcept; + CUstream stream); -CCCL_C_API CUresult cccl_device_reduce_cleanup(cccl_device_reduce_build_result_t* bld_ptr) noexcept; +CCCL_C_API CUresult cccl_device_reduce_cleanup(cccl_device_reduce_build_result_t* bld_ptr); CCCL_C_EXTERN_C_END diff --git a/c/parallel/include/cccl/c/scan.h b/c/parallel/include/cccl/c/scan.h index 3c3ab3cb581..bdf6c2934db 100644 --- a/c/parallel/include/cccl/c/scan.h +++ b/c/parallel/include/cccl/c/scan.h @@ -21,7 +21,7 @@ CCCL_C_EXTERN_C_BEGIN -struct cccl_device_scan_build_result_t +typedef struct cccl_device_scan_build_result_t { int cc; void* cubin; @@ -32,10 +32,10 @@ struct cccl_device_scan_build_result_t CUkernel scan_kernel; size_t description_bytes_per_tile; size_t payload_bytes_per_tile; -}; +} cccl_device_scan_build_result_t; CCCL_C_API CUresult cccl_device_scan_build( - cccl_device_scan_build_result_t* build, + cccl_device_scan_build_result_t* build_ptr, cccl_iterator_t d_in, cccl_iterator_t d_out, cccl_op_t op, @@ -45,7 +45,7 @@ CCCL_C_API CUresult cccl_device_scan_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept; + const char* ctk_path); CCCL_C_API CUresult cccl_device_scan( cccl_device_scan_build_result_t build, @@ -56,8 +56,8 @@ CCCL_C_API CUresult cccl_device_scan( unsigned long long num_items, cccl_op_t op, cccl_value_t init, - CUstream stream) noexcept; + CUstream stream); -CCCL_C_API CUresult cccl_device_scan_cleanup(cccl_device_scan_build_result_t* bld_ptr) noexcept; +CCCL_C_API CUresult cccl_device_scan_cleanup(cccl_device_scan_build_result_t* bld_ptr); CCCL_C_EXTERN_C_END diff --git a/c/parallel/include/cccl/c/segmented_reduce.h b/c/parallel/include/cccl/c/segmented_reduce.h index ee320aa7ba3..39414b6f0a4 100644 --- a/c/parallel/include/cccl/c/segmented_reduce.h +++ b/c/parallel/include/cccl/c/segmented_reduce.h @@ -21,24 +21,23 @@ CCCL_C_EXTERN_C_BEGIN -struct cccl_device_segmented_reduce_build_result_t +typedef struct cccl_device_segmented_reduce_build_result_t { int cc; void* cubin; size_t cubin_size; CUlibrary library; unsigned long long accumulator_size; - unsigned long long offset_size; CUkernel segmented_reduce_kernel; -}; +} cccl_device_segmented_reduce_build_result_t; // TODO return a union of nvtx/cuda/nvrtc errors or a string? CCCL_C_API CUresult cccl_device_segmented_reduce_build( cccl_device_segmented_reduce_build_result_t* build, cccl_iterator_t d_in, cccl_iterator_t d_out, - cccl_iterator_t begin_offset_it, - cccl_iterator_t end_offset_it, + cccl_iterator_t begin_offset_in, + cccl_iterator_t end_offset_in, cccl_op_t op, cccl_value_t init, int cc_major, @@ -46,7 +45,7 @@ CCCL_C_API CUresult cccl_device_segmented_reduce_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept; + const char* ctk_path); CCCL_C_API CUresult cccl_device_segmented_reduce( cccl_device_segmented_reduce_build_result_t build, @@ -55,12 +54,12 @@ CCCL_C_API CUresult cccl_device_segmented_reduce( cccl_iterator_t d_in, cccl_iterator_t d_out, unsigned long long num_offsets, - cccl_iterator_t start_offset_it, - cccl_iterator_t end_offset_it, + cccl_iterator_t start_offset_in, + cccl_iterator_t end_offset_in, cccl_op_t op, cccl_value_t init, - CUstream stream) noexcept; + CUstream stream); -CCCL_C_API CUresult cccl_device_segmented_reduce_cleanup(cccl_device_segmented_reduce_build_result_t* bld_ptr) noexcept; +CCCL_C_API CUresult cccl_device_segmented_reduce_cleanup(cccl_device_segmented_reduce_build_result_t* bld_ptr); CCCL_C_EXTERN_C_END diff --git a/c/parallel/include/cccl/c/types.h b/c/parallel/include/cccl/c/types.h index bc4f6716ee7..66415469880 100644 --- a/c/parallel/include/cccl/c/types.h +++ b/c/parallel/include/cccl/c/types.h @@ -37,20 +37,20 @@ typedef enum cccl_type_enum CCCL_FLOAT32 = 8, CCCL_FLOAT64 = 9, CCCL_STORAGE = 10 -} ccclType; +} cccl_type_enum; typedef struct cccl_type_info { int size; int alignment; cccl_type_enum type; -} ccclTypeInfo; +} cccl_type_info; typedef enum cccl_op_kind_t { CCCL_STATELESS = 0, CCCL_STATEFUL = 1 -} ccclOpKind; +} cccl_op_kind_t; typedef struct cccl_op_t { @@ -61,19 +61,19 @@ typedef struct cccl_op_t int size; int alignment; void* state; -} ccclOp; +} cccl_op_t; typedef enum cccl_iterator_kind_t { CCCL_POINTER = 0, CCCL_ITERATOR = 1 -} ccclIteratorKind; +} cccl_iterator_kind_t; typedef struct cccl_value_t { cccl_type_info type; void* state; -} ccclValue; +} cccl_value_t; typedef struct cccl_iterator_t { @@ -84,6 +84,6 @@ typedef struct cccl_iterator_t cccl_op_t dereference; cccl_type_info value_type; void* state; -} ccclIterator; +} cccl_iterator_t; CCCL_C_EXTERN_C_END diff --git a/c/parallel/src/for.cu b/c/parallel/src/for.cu index 48c264028d2..ffe11dccbdd 100644 --- a/c/parallel/src/for.cu +++ b/c/parallel/src/for.cu @@ -66,7 +66,7 @@ static std::string get_device_for_kernel_name() } CUresult cccl_device_for_build( - cccl_device_for_build_result_t* build, + cccl_device_for_build_result_t* build_ptr, cccl_iterator_t d_data, cccl_op_t op, int cc_major, @@ -74,7 +74,7 @@ CUresult cccl_device_for_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept + const char* ctk_path) { CUresult error = CUDA_SUCCESS; @@ -124,12 +124,12 @@ CUresult cccl_device_for_build( result = cl.finalize_program(num_lto_args, lopts); } - cuLibraryLoadData(&build->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); - check(cuLibraryGetKernel(&build->static_kernel, build->library, lowered_name.c_str())); + cuLibraryLoadData(&build_ptr->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); + check(cuLibraryGetKernel(&build_ptr->static_kernel, build_ptr->library, lowered_name.c_str())); - build->cc = cc; - build->cubin = (void*) result.data.release(); - build->cubin_size = result.size; + build_ptr->cc = cc; + build_ptr->cubin = (void*) result.data.release(); + build_ptr->cubin_size = result.size; } catch (...) { @@ -139,11 +139,7 @@ CUresult cccl_device_for_build( } CUresult cccl_device_for( - cccl_device_for_build_result_t build, - cccl_iterator_t d_data, - int64_t num_items, - cccl_op_t op, - CUstream stream) noexcept + cccl_device_for_build_result_t build, cccl_iterator_t d_data, int64_t num_items, cccl_op_t op, CUstream stream) { bool pushed = false; CUresult error = CUDA_SUCCESS; @@ -167,17 +163,17 @@ CUresult cccl_device_for( return error; } -CUresult cccl_device_for_cleanup(cccl_device_for_build_result_t* bld_ptr) +CUresult cccl_device_for_cleanup(cccl_device_for_build_result_t* build_ptr) { try { - if (bld_ptr == nullptr) + if (build_ptr == nullptr) { return CUDA_ERROR_INVALID_VALUE; } - std::unique_ptr cubin(reinterpret_cast(bld_ptr->cubin)); - check(cuLibraryUnload(bld_ptr->library)); + std::unique_ptr cubin(reinterpret_cast(build_ptr->cubin)); + check(cuLibraryUnload(build_ptr->library)); } catch (...) { diff --git a/c/parallel/src/merge_sort.cu b/c/parallel/src/merge_sort.cu index 8a3e31fff41..0b003a804c2 100644 --- a/c/parallel/src/merge_sort.cu +++ b/c/parallel/src/merge_sort.cu @@ -268,7 +268,7 @@ private: } // namespace merge_sort CUresult cccl_device_merge_sort_build( - cccl_device_merge_sort_build_result_t* build, + cccl_device_merge_sort_build_result_t* build_ptr, cccl_iterator_t input_keys_it, cccl_iterator_t input_items_it, cccl_iterator_t output_keys_it, @@ -279,7 +279,7 @@ CUresult cccl_device_merge_sort_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept + const char* ctk_path) { CUresult error = CUDA_SUCCESS; try @@ -406,14 +406,15 @@ CUresult cccl_device_merge_sort_build( .add_link_list(ltoir_list) .finalize_program(num_lto_args, lopts); - cuLibraryLoadData(&build->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); - check(cuLibraryGetKernel(&build->block_sort_kernel, build->library, block_sort_kernel_lowered_name.c_str())); - check(cuLibraryGetKernel(&build->partition_kernel, build->library, partition_kernel_lowered_name.c_str())); - check(cuLibraryGetKernel(&build->merge_kernel, build->library, merge_kernel_lowered_name.c_str())); + cuLibraryLoadData(&build_ptr->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); + check( + cuLibraryGetKernel(&build_ptr->block_sort_kernel, build_ptr->library, block_sort_kernel_lowered_name.c_str())); + check(cuLibraryGetKernel(&build_ptr->partition_kernel, build_ptr->library, partition_kernel_lowered_name.c_str())); + check(cuLibraryGetKernel(&build_ptr->merge_kernel, build_ptr->library, merge_kernel_lowered_name.c_str())); - build->cc = cc; - build->cubin = (void*) result.data.release(); - build->cubin_size = result.size; + build_ptr->cc = cc; + build_ptr->cubin = (void*) result.data.release(); + build_ptr->cubin_size = result.size; } catch (const std::exception& exc) { @@ -436,7 +437,7 @@ CUresult cccl_device_merge_sort( cccl_iterator_t d_out_items, unsigned long long num_items, cccl_op_t op, - CUstream stream) noexcept + CUstream stream) { if (cccl_iterator_kind_t::CCCL_ITERATOR == d_out_keys.type || cccl_iterator_kind_t::CCCL_ITERATOR == d_out_items.type) { @@ -499,17 +500,17 @@ CUresult cccl_device_merge_sort( return error; } -CUresult cccl_device_merge_sort_cleanup(cccl_device_merge_sort_build_result_t* bld_ptr) noexcept +CUresult cccl_device_merge_sort_cleanup(cccl_device_merge_sort_build_result_t* build_ptr) { try { - if (bld_ptr == nullptr) + if (build_ptr == nullptr) { return CUDA_ERROR_INVALID_VALUE; } - std::unique_ptr cubin(reinterpret_cast(bld_ptr->cubin)); - check(cuLibraryUnload(bld_ptr->library)); + std::unique_ptr cubin(reinterpret_cast(build_ptr->cubin)); + check(cuLibraryUnload(build_ptr->library)); } catch (const std::exception& exc) { diff --git a/c/parallel/src/reduce.cu b/c/parallel/src/reduce.cu index 113db28dc19..383cfc0a895 100644 --- a/c/parallel/src/reduce.cu +++ b/c/parallel/src/reduce.cu @@ -240,11 +240,10 @@ struct reduce_kernel_source return build.reduction_kernel; } }; - } // namespace reduce CUresult cccl_device_reduce_build( - cccl_device_reduce_build_result_t* build, + cccl_device_reduce_build_result_t* build_ptr, cccl_iterator_t input_it, cccl_iterator_t output_it, cccl_op_t op, @@ -254,7 +253,7 @@ CUresult cccl_device_reduce_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept + const char* ctk_path) { CUresult error = CUDA_SUCCESS; @@ -356,16 +355,17 @@ struct device_reduce_policy {{ .add_link_list(ltoir_list) .finalize_program(num_lto_args, lopts); - cuLibraryLoadData(&build->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); - check(cuLibraryGetKernel(&build->single_tile_kernel, build->library, single_tile_kernel_lowered_name.c_str())); + cuLibraryLoadData(&build_ptr->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); + check( + cuLibraryGetKernel(&build_ptr->single_tile_kernel, build_ptr->library, single_tile_kernel_lowered_name.c_str())); check(cuLibraryGetKernel( - &build->single_tile_second_kernel, build->library, single_tile_second_kernel_lowered_name.c_str())); - check(cuLibraryGetKernel(&build->reduction_kernel, build->library, reduction_kernel_lowered_name.c_str())); + &build_ptr->single_tile_second_kernel, build_ptr->library, single_tile_second_kernel_lowered_name.c_str())); + check(cuLibraryGetKernel(&build_ptr->reduction_kernel, build_ptr->library, reduction_kernel_lowered_name.c_str())); - build->cc = cc; - build->cubin = (void*) result.data.release(); - build->cubin_size = result.size; - build->accumulator_size = accum_t.size; + build_ptr->cc = cc; + build_ptr->cubin = (void*) result.data.release(); + build_ptr->cubin_size = result.size; + build_ptr->accumulator_size = accum_t.size; } catch (const std::exception& exc) { @@ -387,7 +387,7 @@ CUresult cccl_device_reduce( unsigned long long num_items, cccl_op_t op, cccl_value_t init, - CUstream stream) noexcept + CUstream stream) { bool pushed = false; CUresult error = CUDA_SUCCESS; @@ -439,17 +439,17 @@ CUresult cccl_device_reduce( return error; } -CUresult cccl_device_reduce_cleanup(cccl_device_reduce_build_result_t* bld_ptr) noexcept +CUresult cccl_device_reduce_cleanup(cccl_device_reduce_build_result_t* build_ptr) { try { - if (bld_ptr == nullptr) + if (build_ptr == nullptr) { return CUDA_ERROR_INVALID_VALUE; } - std::unique_ptr cubin(reinterpret_cast(bld_ptr->cubin)); - check(cuLibraryUnload(bld_ptr->library)); + std::unique_ptr cubin(reinterpret_cast(build_ptr->cubin)); + check(cuLibraryUnload(build_ptr->library)); } catch (const std::exception& exc) { diff --git a/c/parallel/src/scan.cu b/c/parallel/src/scan.cu index cc5bbacbfe1..3a0c1ad8dcc 100644 --- a/c/parallel/src/scan.cu +++ b/c/parallel/src/scan.cu @@ -275,11 +275,10 @@ struct scan_kernel_source return {build.description_bytes_per_tile, build.payload_bytes_per_tile}; } }; - } // namespace scan CUresult cccl_device_scan_build( - cccl_device_scan_build_result_t* build, + cccl_device_scan_build_result_t* build_ptr, cccl_iterator_t input_it, cccl_iterator_t output_it, cccl_op_t op, @@ -289,7 +288,7 @@ CUresult cccl_device_scan_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept + const char* ctk_path) { CUresult error = CUDA_SUCCESS; @@ -389,9 +388,9 @@ struct device_scan_policy {{ .add_link_list(ltoir_list) .finalize_program(num_lto_args, lopts); - cuLibraryLoadData(&build->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); - check(cuLibraryGetKernel(&build->init_kernel, build->library, init_kernel_lowered_name.c_str())); - check(cuLibraryGetKernel(&build->scan_kernel, build->library, scan_kernel_lowered_name.c_str())); + cuLibraryLoadData(&build_ptr->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); + check(cuLibraryGetKernel(&build_ptr->init_kernel, build_ptr->library, init_kernel_lowered_name.c_str())); + check(cuLibraryGetKernel(&build_ptr->scan_kernel, build_ptr->library, scan_kernel_lowered_name.c_str())); constexpr size_t num_ptx_args = 7; const char* ptx_args[num_ptx_args] = { @@ -431,12 +430,12 @@ __device__ size_t payload_bytes_per_tile = cub::ScanTileState<{2}>::payload_byte } payload_bytes_per_tile = scan::find_size_t(ptx_code, "payload_bytes_per_tile").value_or(0); - build->cc = cc; - build->cubin = (void*) result.data.release(); - build->cubin_size = result.size; - build->accumulator_type = accum_t; - build->description_bytes_per_tile = description_bytes_per_tile; - build->payload_bytes_per_tile = payload_bytes_per_tile; + build_ptr->cc = cc; + build_ptr->cubin = (void*) result.data.release(); + build_ptr->cubin_size = result.size; + build_ptr->accumulator_type = accum_t; + build_ptr->description_bytes_per_tile = description_bytes_per_tile; + build_ptr->payload_bytes_per_tile = payload_bytes_per_tile; } catch (const std::exception& exc) { @@ -458,7 +457,7 @@ CUresult cccl_device_scan( unsigned long long num_items, cccl_op_t op, cccl_value_t init, - CUstream stream) noexcept + CUstream stream) { bool pushed = false; CUresult error = CUDA_SUCCESS; @@ -512,16 +511,16 @@ CUresult cccl_device_scan( return error; } -CUresult cccl_device_scan_cleanup(cccl_device_scan_build_result_t* bld_ptr) noexcept +CUresult cccl_device_scan_cleanup(cccl_device_scan_build_result_t* build_ptr) { try { - if (bld_ptr == nullptr) + if (build_ptr == nullptr) { return CUDA_ERROR_INVALID_VALUE; } - std::unique_ptr cubin(reinterpret_cast(bld_ptr->cubin)); - check(cuLibraryUnload(bld_ptr->library)); + std::unique_ptr cubin(reinterpret_cast(build_ptr->cubin)); + check(cuLibraryUnload(build_ptr->library)); } catch (const std::exception& exc) { diff --git a/c/parallel/src/segmented_reduce.cu b/c/parallel/src/segmented_reduce.cu index 9f0319d7e53..f4e2edbba53 100644 --- a/c/parallel/src/segmented_reduce.cu +++ b/c/parallel/src/segmented_reduce.cu @@ -18,7 +18,6 @@ #include // std::string #include // std::string_view #include // std::is_same_v -#include // std::move #include "kernels/iterators.h" #include "kernels/operators.h" @@ -239,11 +238,10 @@ struct segmented_reduce_kernel_source return build.segmented_reduce_kernel; } }; - } // namespace segmented_reduce CUresult cccl_device_segmented_reduce_build( - cccl_device_segmented_reduce_build_result_t* build, + cccl_device_segmented_reduce_build_result_t* build_ptr, cccl_iterator_t input_it, cccl_iterator_t output_it, cccl_iterator_t start_offset_it, @@ -255,7 +253,7 @@ CUresult cccl_device_segmented_reduce_build( const char* cub_path, const char* thrust_path, const char* libcudacxx_path, - const char* ctk_path) noexcept + const char* ctk_path) { CUresult error = CUDA_SUCCESS; @@ -361,14 +359,14 @@ struct device_segmented_reduce_policy {{ .finalize_program(num_lto_args, lopts); // populate build struct members - cuLibraryLoadData(&build->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); + cuLibraryLoadData(&build_ptr->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0); check(cuLibraryGetKernel( - &build->segmented_reduce_kernel, build->library, segmented_reduce_kernel_lowered_name.c_str())); + &build_ptr->segmented_reduce_kernel, build_ptr->library, segmented_reduce_kernel_lowered_name.c_str())); - build->cc = cc; - build->cubin = (void*) result.data.release(); - build->cubin_size = result.size; - build->accumulator_size = accum_t.size; + build_ptr->cc = cc; + build_ptr->cubin = (void*) result.data.release(); + build_ptr->cubin_size = result.size; + build_ptr->accumulator_size = accum_t.size; } catch (const std::exception& exc) { @@ -392,7 +390,7 @@ CUresult cccl_device_segmented_reduce( cccl_iterator_t end_offset, cccl_op_t op, cccl_value_t init, - CUstream stream) noexcept + CUstream stream) { bool pushed = false; CUresult error = CUDA_SUCCESS; @@ -447,18 +445,18 @@ CUresult cccl_device_segmented_reduce( return error; } -CUresult cccl_device_segmented_reduce_cleanup(cccl_device_segmented_reduce_build_result_t* bld_ptr) noexcept +CUresult cccl_device_segmented_reduce_cleanup(cccl_device_segmented_reduce_build_result_t* build_ptr) { try { - if (bld_ptr == nullptr) + if (build_ptr == nullptr) { return CUDA_ERROR_INVALID_VALUE; } // allocation behind cubin is owned by unique_ptr with delete[] deleter now - std::unique_ptr cubin(reinterpret_cast(bld_ptr->cubin)); - check(cuLibraryUnload(bld_ptr->library)); + std::unique_ptr cubin(reinterpret_cast(build_ptr->cubin)); + check(cuLibraryUnload(build_ptr->library)); } catch (const std::exception& exc) { diff --git a/c/parallel/test/CMakeLists.txt b/c/parallel/test/CMakeLists.txt index 361e9b77230..2e56f83e1de 100644 --- a/c/parallel/test/CMakeLists.txt +++ b/c/parallel/test/CMakeLists.txt @@ -38,3 +38,23 @@ list(REMOVE_ITEM test_srcs test_main.cpp) foreach(test_src IN LISTS test_srcs) cccl_c_parallel_add_test(test_target "${test_src}") endforeach() + + +function(cccl_c_parallel_add_c_test target_name_var) + # check that C compiler can compile CCCL.C headers + set(target_name "cccl.c.parallel.test.header") + set(target_name_var ${target_name} PARENT_SCOPE) + set(source test_header.c) + + set_source_files_properties(${source} PROPERTIES LANGUAGE C) + + add_library(${target_name} OBJECT + "${source}" + ) + target_include_directories(${target_name} PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) + target_link_libraries(${target_name} PRIVATE + cccl.c.parallel + ) +endfunction() + +cccl_c_parallel_add_c_test(test_target) diff --git a/c/parallel/test/test_header.c b/c/parallel/test/test_header.c new file mode 100644 index 00000000000..a082fda95a8 --- /dev/null +++ b/c/parallel/test/test_header.c @@ -0,0 +1,12 @@ +#include + +#include +#include +#include +#include +#include + +int main(void) +{ + return 0; +} diff --git a/c/parallel/test/test_main.cpp b/c/parallel/test/test_main.cpp index d1fb01d96bd..f776e011916 100644 --- a/c/parallel/test/test_main.cpp +++ b/c/parallel/test/test_main.cpp @@ -49,6 +49,10 @@ int main(int argc, char* argv[]) return returnCode; } - cudaSetDevice(device_guard(device_id)); + if (cudaSuccess != cudaSetDevice(device_guard(device_id))) + { + std::cerr << "Can't set device." << std::endl; + std::exit(-1); + }; return session.run(argc, argv); }