Skip to content

Commit

Permalink
Fix CCCL C headers to be compileable by C compiler (#3885)
Browse files Browse the repository at this point in the history
* Closes gh-3882

This change ensures that cccl/c/parallel headers are compilable by C compiler.

1. Corrected typedef struct and typedef enum so that C types have the same
   names as C++.  That is `typdef struct name {...} otherName;` was changed to
   `typedef struct name {...} name;`
2. `noexcept` qualifier was removed from function declarations.
3. Implementation was changed to move `noexcept` implementations into
   a namespace, and implement publicly declared functions as thin
   shims calling these noexcept functions from a namespace.
4. Adds c/parallel/test/test_header.c
   This is a C file which includes all public headers, and contains
   a trivial `int main(void)` function.
   The file is compiled using C compiler to produce an object file.
   Hence it does not result in a dedicate test to run, but any
   non C-compliant changes to these headers would break test
   compilation step.

   N.B.: Every new header file must be included in `test_headers.c`
   file for it to be tested.

5. Piggy-backing on this PR, added runtime error condition checking
   in test_main.cpp to verify that `cudaSetDevice` call did not
   report an error.

6. Modified cccl/c/parallel CMake project list of languages to include
   C.

* Revert using noexcept implementations and call them from C declared function

Instead simply remove use of noexcept in C++ definitions of these functions
per Bernhard's suggestion
  • Loading branch information
oleksandr-pavlyk authored Feb 21, 2025
1 parent ca004f5 commit fd0cd24
Show file tree
Hide file tree
Showing 15 changed files with 146 additions and 121 deletions.
2 changes: 1 addition & 1 deletion c/parallel/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.21)

project(CCCL_C_Parallel LANGUAGES CUDA CXX)
project(CCCL_C_Parallel LANGUAGES CUDA CXX C)

option(CCCL_C_Parallel_ENABLE_TESTING "Build CUDA Experimental's tests." OFF)
option(CCCL_C_Parallel_ENABLE_HEADER_TESTING "Build CUDA Experimental's standalone headers." OFF)
Expand Down
12 changes: 4 additions & 8 deletions c/parallel/include/cccl/c/for.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@

CCCL_C_EXTERN_C_BEGIN

struct cccl_device_for_build_result_t
typedef struct cccl_device_for_build_result_t
{
int cc;
void* cubin;
size_t cubin_size;
CUlibrary library;
CUkernel static_kernel;
};
} cccl_device_for_build_result_t;

CCCL_C_API CUresult cccl_device_for_build(
cccl_device_for_build_result_t* build,
Expand All @@ -39,14 +39,10 @@ CCCL_C_API CUresult cccl_device_for_build(
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path) noexcept;
const char* ctk_path);

CCCL_C_API CUresult cccl_device_for(
cccl_device_for_build_result_t build,
cccl_iterator_t d_data,
int64_t num_items,
cccl_op_t op,
CUstream stream) noexcept;
cccl_device_for_build_result_t build, cccl_iterator_t d_data, int64_t num_items, cccl_op_t op, CUstream stream);

CCCL_C_API CUresult cccl_device_for_cleanup(cccl_device_for_build_result_t* bld_ptr);

Expand Down
10 changes: 5 additions & 5 deletions c/parallel/include/cccl/c/merge_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

CCCL_C_EXTERN_C_BEGIN

struct cccl_device_merge_sort_build_result_t
typedef struct cccl_device_merge_sort_build_result_t
{
int cc;
void* cubin;
Expand All @@ -30,7 +30,7 @@ struct cccl_device_merge_sort_build_result_t
CUkernel block_sort_kernel;
CUkernel partition_kernel;
CUkernel merge_kernel;
};
} cccl_device_merge_sort_build_result_t;

CCCL_C_API CUresult cccl_device_merge_sort_build(
cccl_device_merge_sort_build_result_t* build,
Expand All @@ -44,7 +44,7 @@ CCCL_C_API CUresult cccl_device_merge_sort_build(
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path) noexcept;
const char* ctk_path);

CCCL_C_API CUresult cccl_device_merge_sort(
cccl_device_merge_sort_build_result_t build,
Expand All @@ -56,8 +56,8 @@ CCCL_C_API CUresult cccl_device_merge_sort(
cccl_iterator_t d_out_items,
unsigned long long num_items,
cccl_op_t op,
CUstream stream) noexcept;
CUstream stream);

CCCL_C_API CUresult cccl_device_merge_sort_cleanup(cccl_device_merge_sort_build_result_t* bld_ptr) noexcept;
CCCL_C_API CUresult cccl_device_merge_sort_cleanup(cccl_device_merge_sort_build_result_t* bld_ptr);

CCCL_C_EXTERN_C_END
10 changes: 5 additions & 5 deletions c/parallel/include/cccl/c/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

CCCL_C_EXTERN_C_BEGIN

struct cccl_device_reduce_build_result_t
typedef struct cccl_device_reduce_build_result_t
{
int cc;
void* cubin;
Expand All @@ -31,7 +31,7 @@ struct cccl_device_reduce_build_result_t
CUkernel single_tile_kernel;
CUkernel single_tile_second_kernel;
CUkernel reduction_kernel;
};
} cccl_device_reduce_build_result_t;

// TODO return a union of nvtx/cuda/nvrtc errors or a string?
CCCL_C_API CUresult cccl_device_reduce_build(
Expand All @@ -45,7 +45,7 @@ CCCL_C_API CUresult cccl_device_reduce_build(
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path) noexcept;
const char* ctk_path);

CCCL_C_API CUresult cccl_device_reduce(
cccl_device_reduce_build_result_t build,
Expand All @@ -56,8 +56,8 @@ CCCL_C_API CUresult cccl_device_reduce(
unsigned long long num_items,
cccl_op_t op,
cccl_value_t init,
CUstream stream) noexcept;
CUstream stream);

CCCL_C_API CUresult cccl_device_reduce_cleanup(cccl_device_reduce_build_result_t* bld_ptr) noexcept;
CCCL_C_API CUresult cccl_device_reduce_cleanup(cccl_device_reduce_build_result_t* bld_ptr);

CCCL_C_EXTERN_C_END
12 changes: 6 additions & 6 deletions c/parallel/include/cccl/c/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

CCCL_C_EXTERN_C_BEGIN

struct cccl_device_scan_build_result_t
typedef struct cccl_device_scan_build_result_t
{
int cc;
void* cubin;
Expand All @@ -32,10 +32,10 @@ struct cccl_device_scan_build_result_t
CUkernel scan_kernel;
size_t description_bytes_per_tile;
size_t payload_bytes_per_tile;
};
} cccl_device_scan_build_result_t;

CCCL_C_API CUresult cccl_device_scan_build(
cccl_device_scan_build_result_t* build,
cccl_device_scan_build_result_t* build_ptr,
cccl_iterator_t d_in,
cccl_iterator_t d_out,
cccl_op_t op,
Expand All @@ -45,7 +45,7 @@ CCCL_C_API CUresult cccl_device_scan_build(
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path) noexcept;
const char* ctk_path);

CCCL_C_API CUresult cccl_device_scan(
cccl_device_scan_build_result_t build,
Expand All @@ -56,8 +56,8 @@ CCCL_C_API CUresult cccl_device_scan(
unsigned long long num_items,
cccl_op_t op,
cccl_value_t init,
CUstream stream) noexcept;
CUstream stream);

CCCL_C_API CUresult cccl_device_scan_cleanup(cccl_device_scan_build_result_t* bld_ptr) noexcept;
CCCL_C_API CUresult cccl_device_scan_cleanup(cccl_device_scan_build_result_t* bld_ptr);

CCCL_C_EXTERN_C_END
19 changes: 9 additions & 10 deletions c/parallel/include/cccl/c/segmented_reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,31 @@

CCCL_C_EXTERN_C_BEGIN

struct cccl_device_segmented_reduce_build_result_t
typedef struct cccl_device_segmented_reduce_build_result_t
{
int cc;
void* cubin;
size_t cubin_size;
CUlibrary library;
unsigned long long accumulator_size;
unsigned long long offset_size;
CUkernel segmented_reduce_kernel;
};
} cccl_device_segmented_reduce_build_result_t;

// TODO return a union of nvtx/cuda/nvrtc errors or a string?
CCCL_C_API CUresult cccl_device_segmented_reduce_build(
cccl_device_segmented_reduce_build_result_t* build,
cccl_iterator_t d_in,
cccl_iterator_t d_out,
cccl_iterator_t begin_offset_it,
cccl_iterator_t end_offset_it,
cccl_iterator_t begin_offset_in,
cccl_iterator_t end_offset_in,
cccl_op_t op,
cccl_value_t init,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path) noexcept;
const char* ctk_path);

CCCL_C_API CUresult cccl_device_segmented_reduce(
cccl_device_segmented_reduce_build_result_t build,
Expand All @@ -55,12 +54,12 @@ CCCL_C_API CUresult cccl_device_segmented_reduce(
cccl_iterator_t d_in,
cccl_iterator_t d_out,
unsigned long long num_offsets,
cccl_iterator_t start_offset_it,
cccl_iterator_t end_offset_it,
cccl_iterator_t start_offset_in,
cccl_iterator_t end_offset_in,
cccl_op_t op,
cccl_value_t init,
CUstream stream) noexcept;
CUstream stream);

CCCL_C_API CUresult cccl_device_segmented_reduce_cleanup(cccl_device_segmented_reduce_build_result_t* bld_ptr) noexcept;
CCCL_C_API CUresult cccl_device_segmented_reduce_cleanup(cccl_device_segmented_reduce_build_result_t* bld_ptr);

CCCL_C_EXTERN_C_END
14 changes: 7 additions & 7 deletions c/parallel/include/cccl/c/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,20 @@ typedef enum cccl_type_enum
CCCL_FLOAT32 = 8,
CCCL_FLOAT64 = 9,
CCCL_STORAGE = 10
} ccclType;
} cccl_type_enum;

typedef struct cccl_type_info
{
int size;
int alignment;
cccl_type_enum type;
} ccclTypeInfo;
} cccl_type_info;

typedef enum cccl_op_kind_t
{
CCCL_STATELESS = 0,
CCCL_STATEFUL = 1
} ccclOpKind;
} cccl_op_kind_t;

typedef struct cccl_op_t
{
Expand All @@ -61,19 +61,19 @@ typedef struct cccl_op_t
int size;
int alignment;
void* state;
} ccclOp;
} cccl_op_t;

typedef enum cccl_iterator_kind_t
{
CCCL_POINTER = 0,
CCCL_ITERATOR = 1
} ccclIteratorKind;
} cccl_iterator_kind_t;

typedef struct cccl_value_t
{
cccl_type_info type;
void* state;
} ccclValue;
} cccl_value_t;

typedef struct cccl_iterator_t
{
Expand All @@ -84,6 +84,6 @@ typedef struct cccl_iterator_t
cccl_op_t dereference;
cccl_type_info value_type;
void* state;
} ccclIterator;
} cccl_iterator_t;

CCCL_C_EXTERN_C_END
28 changes: 12 additions & 16 deletions c/parallel/src/for.cu
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ static std::string get_device_for_kernel_name()
}

CUresult cccl_device_for_build(
cccl_device_for_build_result_t* build,
cccl_device_for_build_result_t* build_ptr,
cccl_iterator_t d_data,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path) noexcept
const char* ctk_path)
{
CUresult error = CUDA_SUCCESS;

Expand Down Expand Up @@ -124,12 +124,12 @@ CUresult cccl_device_for_build(
result = cl.finalize_program(num_lto_args, lopts);
}

cuLibraryLoadData(&build->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0);
check(cuLibraryGetKernel(&build->static_kernel, build->library, lowered_name.c_str()));
cuLibraryLoadData(&build_ptr->library, result.data.get(), nullptr, nullptr, 0, nullptr, nullptr, 0);
check(cuLibraryGetKernel(&build_ptr->static_kernel, build_ptr->library, lowered_name.c_str()));

build->cc = cc;
build->cubin = (void*) result.data.release();
build->cubin_size = result.size;
build_ptr->cc = cc;
build_ptr->cubin = (void*) result.data.release();
build_ptr->cubin_size = result.size;
}
catch (...)
{
Expand All @@ -139,11 +139,7 @@ CUresult cccl_device_for_build(
}

CUresult cccl_device_for(
cccl_device_for_build_result_t build,
cccl_iterator_t d_data,
int64_t num_items,
cccl_op_t op,
CUstream stream) noexcept
cccl_device_for_build_result_t build, cccl_iterator_t d_data, int64_t num_items, cccl_op_t op, CUstream stream)
{
bool pushed = false;
CUresult error = CUDA_SUCCESS;
Expand All @@ -167,17 +163,17 @@ CUresult cccl_device_for(
return error;
}

CUresult cccl_device_for_cleanup(cccl_device_for_build_result_t* bld_ptr)
CUresult cccl_device_for_cleanup(cccl_device_for_build_result_t* build_ptr)
{
try
{
if (bld_ptr == nullptr)
if (build_ptr == nullptr)
{
return CUDA_ERROR_INVALID_VALUE;
}

std::unique_ptr<char[]> cubin(reinterpret_cast<char*>(bld_ptr->cubin));
check(cuLibraryUnload(bld_ptr->library));
std::unique_ptr<char[]> cubin(reinterpret_cast<char*>(build_ptr->cubin));
check(cuLibraryUnload(build_ptr->library));
}
catch (...)
{
Expand Down
Loading

0 comments on commit fd0cd24

Please sign in to comment.