Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU: CMake updates to pull in only the CXX files needed per kernel, and some fixes for deterministic mode on large TFs #12784

Merged
merged 4 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GPU/Common/GPUCommonAlgorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ typedef GPUCommonAlgorithm CAAlgo;
} // namespace gpu
} // namespace GPUCA_NAMESPACE

#if (((defined(__CUDACC__) && !defined(__clang__)) || defined(__HIPCC__))) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) && !defined(GPUCA_GPUCODE_HOSTONLY)
#if (((defined(__CUDACC__) && !defined(__clang__)) || defined(__HIPCC__))) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_HOSTONLY)

#include "GPUCommonAlgorithmThrust.h"

Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/GPUGeneralKernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
/// \author David Rohr

#include "GPUGeneralKernels.h"
#include "GPUConstantMem.h"
using namespace GPUCA_NAMESPACE::gpu;

template <>
Expand Down
117 changes: 0 additions & 117 deletions GPU/GPUTracking/Base/GPUReconstructionIncludesDevice.h

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUReconstructionIncludesDevice.h
/// \author David Rohr

#ifndef GPURECONSTRUCTIONINCLUDESDEVICE_H
#define GPURECONSTRUCTIONINCLUDESDEVICE_H

#include "GPUDef.h"

namespace GPUCA_NAMESPACE
{
namespace gpu
{
}
} // namespace GPUCA_NAMESPACE
using namespace GPUCA_NAMESPACE::gpu;

#if !defined(GPUCA_OPENCL1) && (!defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE))
#define GPUCA_KRNL_NOOCL1
#endif

// clang-format off
$<JOIN:$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<LIST:REMOVE_DUPLICATES,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_FILES>>,APPEND,">,PREPEND,#include ">,
>
// clang-format on

#endif // GPURECONSTRUCTIONINCLUDESDEVICE_H
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ int GPUReconstructionCUDA::InitDevice_Runtime()
GPUFailedMsgI(cudaDeviceReset());
return (1);
}
if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, GPUCA_GPU_HEAP_SIZE))) {
if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max<size_t>(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) {
GPUError("Error setting CUDA stack size");
GPUFailedMsgI(cudaDeviceReset());
return (1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUReconstructionCUDIncludes.h
/// \file GPUReconstructionCUDAIncludes.h
/// \author David Rohr

#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ texture<cahit2, cudaTextureType1D, cudaReadModeElementType> gAliTexRefu2;
texture<calink, cudaTextureType1D, cudaReadModeElementType> gAliTexRefu;
#endif

#include "GPUReconstructionIncludesDevice.h"
#include "GPUReconstructionIncludesDeviceAll.h"

template <>
void GPUReconstructionCUDABackend::runKernelBackendInternal<GPUMemClean16, 0>(krnlSetup& _xyz, void* const& ptr, unsigned long const& size)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
#define GPUCA_GPUCODE_COMPILEKERNELS
#include "GPUReconstructionCUDAIncludes.h"
#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionIncludesDevice.h"
#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, x_class, x_attributes, x_arguments, x_forward)
#define GPUCA_KRNL_LOAD_single(x_class, x_attributes, x_arguments, x_forward) GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward);
#define GPUCA_KRNL_LOAD_multi(x_class, x_attributes, x_arguments, x_forward) GPUCA_KRNLGPU_MULTI(x_class, x_attributes, x_arguments, x_forward);
#include "GPUReconstructionKernelMacros.h"

// clang-format off
@O2_GPU_KERNEL_TEMPLATE_FILES@
// clang-format on

extern "C" {
// clang-format off
@O2_GPU_KERNEL_TEMPLATE_REPLACE@
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#define GPUCA_GPUCODE_GENRTC
#define GPUCA_GPUCODE_COMPILEKERNELS
#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionIncludesDevice.h"
#include "GPUReconstructionIncludesDeviceAll.h"

#ifndef GPUCA_GPUCODE_DEVICE
#error RTC Preprocessing must run on device code
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/hip/GPUReconstructionHIP.hip
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class GPUDebugTiming
bool mDo;
};

#include "GPUReconstructionIncludesDevice.h"
#include "GPUReconstructionIncludesDeviceAll.h"

#undef GPUCA_KRNL_REG
#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
Expand Down
21 changes: 20 additions & 1 deletion GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,27 @@
#define GPUCA_OPENCL1
#endif

#include "GPUReconstructionIncludesDevice.h"
#include "GPUConstantMem.h"
#ifdef __OPENCLCPP__
#include "GPUReconstructionIncludesDeviceAll.h"
#else // Workaround, since OpenCL1 cannot digest all files
#include "GPUTPCTrackParam.cxx"
#include "GPUTPCTrack.cxx"
#include "GPUTPCGrid.cxx"
#include "GPUTPCRow.cxx"
#include "GPUTPCTracker.cxx"

#include "GPUGeneralKernels.cxx"
#include "GPUErrors.cxx"

#include "GPUTPCTrackletSelector.cxx"
#include "GPUTPCNeighboursFinder.cxx"
#include "GPUTPCNeighboursCleaner.cxx"
#include "GPUTPCStartHitsFinder.cxx"
#include "GPUTPCStartHitsSorter.cxx"
#include "GPUTPCTrackletConstructor.cxx"
#include "GPUTPCGlobalTracking.cxx"
#endif

// if (gpu_mem != pTracker.GPUParametersConst()->gpumem) return; //TODO!

Expand Down
5 changes: 4 additions & 1 deletion GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR CONFIG_O2_EXTENSIONS)
TPCClusterFinder/CfUtils.h
TPCClusterFinder/ChargePos.h
Definitions/clusterFinderDefs.h
TPCClusterFinder/GPUTPCClusterFinderKernels.h
TPCClusterFinder/PackedCharge.h
TPCClusterFinder/GPUTPCCFChainContext.h)
endif()
Expand Down Expand Up @@ -251,6 +250,10 @@ file(GENERATE
OUTPUT include_gpu_onthefly/GPUReconstructionKernelIncludes.h
INPUT Base/GPUReconstructionKernelIncludes.template.h
)
file(GENERATE
OUTPUT include_gpu_onthefly/GPUReconstructionIncludesDeviceAll.h
INPUT Base/GPUReconstructionIncludesDeviceAll.template.h
)
if(NOT ALIGPU_BUILD_TYPE STREQUAL "O2")
include_directories(${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly)
endif()
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Definitions/GPUDefGPUParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@
#ifndef GPUCA_LB_GPUTPCGMO2Output_output
#define GPUCA_LB_GPUTPCGMO2Output_output 256
#endif
#ifndef GPUCA_LB_GPUITSFitterKernel
#define GPUCA_LB_GPUITSFitterKernel 256
#ifndef GPUCA_LB_GPUITSFitterKernels
#define GPUCA_LB_GPUITSFitterKernels 256
#endif
#ifndef GPUCA_LB_GPUTPCStartHitsFinder
#define GPUCA_LB_GPUTPCStartHitsFinder 256
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ using namespace GPUCA_NAMESPACE::gpu;
using namespace o2;
using namespace o2::its;

GPUdii() bool GPUITSFitterKernel::fitTrack(GPUITSFitter& GPUrestrict() Fitter, GPUTPCGMPropagator& GPUrestrict() prop, GPUITSTrack& GPUrestrict() track, int start, int end, int step)
GPUdii() bool GPUITSFitterKernels::fitTrack(GPUITSFitter& GPUrestrict() Fitter, GPUTPCGMPropagator& GPUrestrict() prop, GPUITSTrack& GPUrestrict() track, int start, int end, int step)
{
for (int iLayer{start}; iLayer != end; iLayer += step) {
if (track.mClusters[iLayer] == o2::its::constants::its::UnusedIndex) {
Expand All @@ -56,7 +56,7 @@ GPUdii() bool GPUITSFitterKernel::fitTrack(GPUITSFitter& GPUrestrict() Fitter, G
}

template <>
GPUdii() void GPUITSFitterKernel::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors)
GPUdii() void GPUITSFitterKernels::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors)
{
GPUITSFitter& Fitter = processors.itsFitter;

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/ITS/GPUITSFitterKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class GPUTPCGMPropagator;
class GPUITSFitter;
class GPUITSTrack;

class GPUITSFitterKernel : public GPUKernelTemplate
class GPUITSFitterKernels : public GPUKernelTemplate
{
public:
GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::ITSTracking; }
Expand Down
1 change: 0 additions & 1 deletion GPU/GPUTracking/TPCClusterFinder/CfUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "GPUCommonAlgorithm.h"
#include "Array2D.h"
#include "CfConsts.h"
#include "GPUTPCClusterFinderKernels.h"

namespace GPUCA_NAMESPACE::gpu
{
Expand Down
2 changes: 2 additions & 0 deletions GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "ClusterAccumulator.h"
#include "GPUTPCGeometry.h"
#include "CfUtils.h"
#include "GPUParam.h"
#include "DataFormatsTPC/ClusterNative.h"

using namespace GPUCA_NAMESPACE::gpu;
using namespace GPUCA_NAMESPACE::gpu::tpccf;
Expand Down
30 changes: 0 additions & 30 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderKernels.h

This file was deleted.

Loading
Loading