Skip to content

Commit

Permalink
[NBody] Add CUDA-OpenGL interop for buffer storage (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
rafbiels authored Apr 4, 2024
1 parent 65802a5 commit f811144
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 18 deletions.
10 changes: 9 additions & 1 deletion src/nbody/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
corrade_add_resource(NBody_RESOURCES assets/resources.conf)

if (ENABLE_CUDA)
find_package(CUDAToolkit QUIET)
if (CUDAToolkit_FOUND)
list(APPEND BackendLibs CUDA::cudart)
endif()
endif()

add_library(NBodyResourceLib SHARED ${NBody_RESOURCES})
target_link_libraries(NBodyResourceLib PRIVATE Corrade::Utility)
# Ignore unused variable in the file automatically generated by Corrade
Expand All @@ -11,7 +18,8 @@ add_executable(NBody main.cpp
target_link_libraries(NBody PRIVATE
NBodyResourceLib
Magnum::Magnum Magnum::GL Magnum::Application
Magnum::Trade MagnumIntegration::ImGui)
Magnum::Trade MagnumIntegration::ImGui
${BackendLibs})
add_dependencies(NBody Magnum::AnyImageImporter MagnumPlugins::StbImageImporter)

target_compile_options(NBody PUBLIC ${SYCL_FLAGS})
Expand Down
158 changes: 158 additions & 0 deletions src/nbody/InteropGLBuffer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#pragma once

#include <Magnum/GL/Buffer.h>

#if __has_include(<cuda.h>) && __has_include(<cuda_gl_interop.h>)
#include <cuda.h>
#include <cuda_gl_interop.h>
#define CUDA_GL_INTEROP_API_AVAILABLE 1
#else
#define CUDA_GL_INTEROP_API_AVAILABLE 0
#endif

/// @brief Magnum GL Buffer wrapper implementing backend-specific interop to
/// work directly on OpenGL device buffers instead of using host memory.
///
/// Currently supports only CUDA-GL interop, but the implementation aims to
/// make the addition of further backends easy.
///
/// If no supported interop is available at runtime or compile time, the
/// implementation falls back to regular GL Buffer with host memory storage.
template <typename T>
class InteropGLBuffer : public Magnum::GL::Buffer {
public:
/// Default constructor, creates invalid buffer
InteropGLBuffer() : m_storage{nullptr} {}

/// Standard constructor with specified size
InteropGLBuffer(size_t numElements)
: m_type{testCudaGL() ? InteropType::CUDA : InteropType::None},
m_storage{m_type == InteropType::CUDA
? Corrade::Containers::Array<T>(nullptr, numElements)
: Corrade::Containers::Array<T>(Corrade::ValueInit,
numElements)} {
setData(m_storage, Magnum::GL::BufferUsage::DynamicDraw);
mapResources();
}

/// Destructor, unmaps resources if necessary
virtual ~InteropGLBuffer() { unmapResources(); }

/// No copies allowed
InteropGLBuffer(const InteropGLBuffer&) = delete;
/// No copies allowed
InteropGLBuffer& operator=(const InteropGLBuffer&) = delete;

/// Move constructor
InteropGLBuffer(InteropGLBuffer&& other)
: m_type{other.m_type},
m_storage{std::move(other.m_storage)},
m_devPtr{other.m_devPtr},
m_devPtrSize{other.m_devPtrSize},
m_backendResource{other.m_backendResource} {
setData(m_storage, Magnum::GL::BufferUsage::DynamicDraw);
other.m_devPtr = nullptr;
other.m_devPtrSize = 0;
other.m_backendResource = nullptr;
};

/// Move assignment
InteropGLBuffer& operator=(InteropGLBuffer&& other) {
unmapResources();

m_type = other.m_type;
m_storage = std::move(other.m_storage);
setData(m_storage, Magnum::GL::BufferUsage::DynamicDraw);

m_devPtr = other.m_devPtr;
m_devPtrSize = other.m_devPtrSize;
m_backendResource = other.m_backendResource;

other.m_devPtr = nullptr;
other.m_devPtrSize = 0;
other.m_backendResource = nullptr;

return *this;
};

/// Return a pointer to the underlying storage which is either a GL buffer
/// device pointer or, in case of no interop, a host memory pointer
T* getStorage() {
return m_type == InteropType::CUDA ? m_devPtr : m_storage.data();
}

private:
enum class InteropType { None, CUDA };
InteropType m_type{InteropType::None};
Corrade::Containers::Array<T> m_storage;
T* m_devPtr{nullptr};
size_t m_devPtrSize{0};

#if CUDA_GL_INTEROP_API_AVAILABLE
cudaGraphicsResource* m_backendResource{nullptr};
#else
void* m_backendResource{nullptr};
#endif

/// Register a GL-device interop buffer and store the associated pointers
void mapResources() {
#if CUDA_GL_INTEROP_API_AVAILABLE
if (m_type == InteropType::CUDA) {
checkError(cudaGraphicsGLRegisterBuffer(&m_backendResource, id(),
cudaGraphicsRegisterFlagsNone));
checkError(cudaGraphicsMapResources(1, &m_backendResource, NULL));
checkError(cudaGraphicsResourceGetMappedPointer(
reinterpret_cast<void**>(&m_devPtr), &m_devPtrSize,
m_backendResource));
}
#endif
}

/// Unregister the GL-device interop buffer
void unmapResources() {
#if CUDA_GL_INTEROP_API_AVAILABLE
if (m_type == InteropType::CUDA) {
if (m_devPtr != nullptr) {
checkError(cudaGraphicsUnmapResources(1, &m_backendResource));
m_devPtr = nullptr;
m_devPtrSize = 0;
}
if (m_backendResource != nullptr) {
checkError(cudaGraphicsUnregisterResource(m_backendResource));
m_backendResource = nullptr;
}
}
#endif
}

/// Return true if CUDA-OpenGL interop is possible
/// (i.e. cudaGLGetDevices finds at least one device)
static bool testCudaGL() {
#if CUDA_GL_INTEROP_API_AVAILABLE
constexpr static unsigned int maxDevices{10};
unsigned int cudaDeviceCount{0};
int cudaDevices[maxDevices] = {0};
cudaError_t code = cudaGLGetDevices(&cudaDeviceCount, &cudaDevices[0],
maxDevices, cudaGLDeviceListAll);
return code == cudaError_t::cudaSuccess && cudaDeviceCount > 0;
#endif
return false;
}

/// Helper function to check errors from device API
template <typename ErrorType>
static void checkError(ErrorType code) {
#if CUDA_GL_INTEROP_API_AVAILABLE
if constexpr (std::is_same_v<ErrorType, cudaError_t>) {
if (code != cudaError_t::cudaSuccess) {
std::cout << "CUDA Error: " << cudaGetErrorString(code) << std::endl;
return;
}
}
#endif
if (code != static_cast<ErrorType>(0)) {
std::cout << "Non-zero error code: " << code << std::endl;
return;
}
}
};
27 changes: 10 additions & 17 deletions src/nbody/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

#include <sycl/sycl.hpp>

#include "InteropGLBuffer.hpp"
#include "sim.hpp"

using num_t = float;
Expand Down Expand Up @@ -175,7 +176,7 @@ class NBodyApp : public Magnum::Platform::Application {

Magnum::Matrix4 m_view;
Magnum::Matrix4 m_viewProjection;
Corrade::Containers::Array<char> m_vboStorage;
InteropGLBuffer<char> m_vbo{};
Magnum::GL::Mesh m_mesh;
Magnum::GL::Texture2D m_star_tex;
NBodyShader m_shader;
Expand Down Expand Up @@ -238,8 +239,7 @@ class NBodyApp : public Magnum::Platform::Application {
// bodies
void init_gl_bufs() {
const size_t arraySize{m_n_bodies * sizeof(sycl::vec<num_t, 3>)};
m_vboStorage =
Corrade::Containers::Array<char>(Corrade::ValueInit, 2 * arraySize);
m_vbo = InteropGLBuffer<char>{2 * arraySize};
m_mesh = Magnum::GL::Mesh{Magnum::GL::MeshPrimitive::Points};
m_mesh.setCount(m_n_bodies);
}
Expand Down Expand Up @@ -377,20 +377,13 @@ class NBodyApp : public Magnum::Platform::Application {

// Update star buffer data with new positions
const size_t arraySize{m_n_bodies * sizeof(sycl::vec<num_t, 3>)};
m_sim.with_mapped(read_bufs_t<1>{},
[&](sycl::vec<num_t, 3> const* positions) {
std::copy_n(reinterpret_cast<const char*>(positions),
arraySize, m_vboStorage.data());
});
m_sim.with_mapped(read_bufs_t<0>{},
[&](sycl::vec<num_t, 3> const* velocities) {
std::copy_n(reinterpret_cast<const char*>(velocities),
arraySize, m_vboStorage.data() + arraySize);
});

Magnum::GL::Buffer vbo{m_vboStorage};
m_mesh.addVertexBuffer(vbo, 0, NBodyShader::Position{})
.addVertexBuffer(vbo, arraySize, NBodyShader::Velocity{});

sycl::event::wait_and_throw(
{m_sim.copyTo<1>(m_vbo.getStorage()),
m_sim.copyTo<0>(m_vbo.getStorage() + arraySize)});

m_mesh.addVertexBuffer(m_vbo, 0, NBodyShader::Position{})
.addVertexBuffer(m_vbo, arraySize, NBodyShader::Velocity{});

// Draw bodies
m_shader.setView({&m_view, 1})
Expand Down
10 changes: 10 additions & 0 deletions src/nbody/sim.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,16 @@ class GravSim {
func(std::get<0>(acc).get_pointer());
}

// Copy buffer contents into the dest pointer (host or device)
template <size_t VarId>
sycl::event copyTo(void* dest) {
return m_q.submit([&](sycl::handler& cgh) {
cgh.copy(
std::get<0>(m_bufs.read().gen_read_accs(cgh, read_bufs_t<VarId>{})),
dest);
});
}

private:
void internal_step() {
m_q.submit([&](sycl::handler& cgh) {
Expand Down

0 comments on commit f811144

Please sign in to comment.