Skip to content

Commit

Permalink
[6.3] OMPT Target Offload Support (#26)
Browse files Browse the repository at this point in the history
* OMPT Target Offload Support

- Porting from ROCm/omnitrace#411
- Improve OMPT support
- Add OpenMP target example to testing
- Update Timemory submodule to use ROCm/Timemory rather than NERSC/Timemory
- Standardize the `cmake_minimum_required` to 3.18.4 across workflows, project, and examples

Signed-off-by: David Galiffi <David.Galiffi@amd.com>

* Resolve compilation warning with gcc 13

* Add test for ompt-target

---------

Signed-off-by: David Galiffi <David.Galiffi@amd.com>
  • Loading branch information
dgaliffiAMD authored Dec 9, 2024
1 parent 1ec47cd commit 04a84dd
Show file tree
Hide file tree
Showing 38 changed files with 808 additions and 108 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "external/timemory"]
path = external/timemory
url = https://github.com/NERSC/timemory.git
url = https://github.com/ROCm/timemory.git
[submodule "external/perfetto"]
path = external/perfetto
url = https://github.com/google/perfetto.git
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND CMAKE_CURRENT_SOURCE_DIR STREQUAL
CMAKE_SOURCE_DIR)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ export LD_LIBRARY_PATH=/opt/rocprofiler-systems/lib:${LD_LIBRARY_PATH}

Generate a rocprofiler-systems configuration file using `rocprof-sys-avail -G rocprof-sys.cfg`. Optionally, use `rocprof-sys-avail -G rocprof-sys.cfg --all` for
a verbose configuration file with descriptions, categories, etc. Modify the configuration file as desired, e.g. enable
[perfetto](https://perfetto.dev/), [timemory](https://github.com/NERSC/timemory), sampling, and process-level sampling by default
[perfetto](https://perfetto.dev/), [timemory](https://github.com/ROCm/timemory), sampling, and process-level sampling by default
and tweak some sampling default values:

```console
Expand Down
2 changes: 1 addition & 1 deletion cmake/Packages.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ mark_as_advanced(TIMEMORY_PROJECT_NAME)
rocprofiler_systems_checkout_git_submodule(
RELATIVE_PATH external/timemory
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://github.com/NERSC/timemory.git
REPO_URL https://github.com/ROCm/timemory.git
REPO_BRANCH omnitrace)

rocprofiler_systems_save_variables(
Expand Down
4 changes: 3 additions & 1 deletion docker/Dockerfile.opensuse
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ RUN set +e; \
RUN zypper --non-interactive update -y && \
zypper --non-interactive dist-upgrade -y && \
zypper --non-interactive install -y -t pattern devel_basis && \
zypper --non-interactive install -y binutils-gold cmake curl dpkg-devel gcc-c++ git libnuma-devel openmpi3-devel python3-pip rpm-build wget
zypper --non-interactive install -y binutils-gold cmake curl dpkg-devel \
gcc-c++ git libnuma-devel openmpi3-devel python3-pip rpm-build wget && \
python3 -m pip install 'cmake==3.18.4'

ARG ROCM_VERSION=0.0
ARG AMDGPU_RPM=6.2/sle/15.6/amdgpu-install-6.2.60200-1.noarch.rpm
Expand Down
3 changes: 2 additions & 1 deletion docker/Dockerfile.opensuse.ci
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ RUN zypper --non-interactive update -y && \
zypper --non-interactive install -y binutils-gold cmake curl dpkg-devel \
gcc-c++ git libnuma-devel openmpi3-devel papi-devel python3-pip \
rpm-build wget && \
zypper --non-interactive clean --all
zypper --non-interactive clean --all && \
python3 -m pip install 'cmake==3.18.4'

COPY ./dyninst-source /tmp/dyninst

Expand Down
6 changes: 4 additions & 2 deletions docker/Dockerfile.rhel
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ ENV LIBRARY_PATH ${LIBRARY_PATH}:/opt/amdgpu/lib64

RUN yum groupinstall -y "Development Tools" && \
yum install -y epel-release && crb enable && \
yum install -y --allowerasing cmake curl dpkg-devel numactl-devel openmpi-devel papi-devel python3-pip texinfo wget which zlib-devel && \
yum clean all
yum install -y --allowerasing cmake curl dpkg-devel numactl-devel openmpi-devel \
papi-devel python3-pip texinfo wget which zlib-devel && \
yum clean all && \
python3 -m pip install 'cmake==3.18.4'

ARG ROCM_VERSION=0.0
ARG AMDGPU_RPM=6.2/rhel/9.4/amdgpu-install-6.2.60202-1.el9.noarch.rpm
Expand Down
3 changes: 2 additions & 1 deletion docker/Dockerfile.rhel.ci
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ RUN yum groupinstall -y "Development Tools" && \
yum install -y epel-release && crb enable && \
yum install -y --allowerasing cmake curl dpkg-devel numactl-devel \
openmpi-devel papi-devel python3-pip texinfo wget which zlib-devel && \
yum clean all
yum clean all && \
python3 -m pip install 'cmake==3.18.4'

COPY ./dyninst-source /tmp/dyninst

Expand Down
6 changes: 5 additions & 1 deletion docker/Dockerfile.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ ENV PATH ${HOME}/.local/bin:${PATH}

RUN apt-get update && \
apt-get dist-upgrade -y && \
apt-get install -y apt-utils autoconf autotools-dev bash-completion bison build-essential cmake curl git-core gnupg2 libnuma1 libopenmpi-dev libpapi-dev libpfm4-dev librpm-dev libtool libudev1 lsb-release m4 python3-pip rpm texinfo wget
apt-get install -y apt-utils autoconf autotools-dev bash-completion bison \
build-essential cmake curl git-core gnupg2 libnuma1 libopenmpi-dev \
libpapi-dev libpfm4-dev librpm-dev libtool libudev1 lsb-release m4 \
python3-pip rpm texinfo wget && \
python3 -m pip install 'cmake==3.18.4'

RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \
Expand Down
3 changes: 2 additions & 1 deletion docker/Dockerfile.ubuntu.ci
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ RUN apt-get update && \
bzip2 cmake curl environment-modules git-core gnupg2 gzip libiberty-dev \
libpapi-dev libpfm4-dev libtool locales lsb-release m4 python3-pip texinfo \
unzip wget zip zlib1g-dev && \
apt-get autoclean
apt-get autoclean && \
python3 -m pip install 'cmake==3.18.4'

COPY ./dyninst-source /tmp/dyninst

Expand Down
2 changes: 1 addition & 1 deletion docs/conceptual/rocprof-sys-feature-set.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ The ROCm Systems Profiler feature set and use cases
***************************************

`ROCm Systems Profiler <https://github.com/ROCm/rocprofiler-systems>`_ is designed to be highly extensible.
Internally, it leverages the `Timemory performance analysis toolkit <https://github.com/NERSC/timemory>`_
Internally, it leverages the `Timemory performance analysis toolkit <https://github.com/ROCm/timemory>`_
to manage extensions, resources, data, and other items. It supports the following features,
modes, metrics, and APIs.

Expand Down
4 changes: 2 additions & 2 deletions docs/how-to/configuring-runtime-options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use the ``rocprof-sys-avail -G ~/.rocprof-sys.cfg --all`` option
for a verbose configuration file with descriptions, categories, and additional information.

Modify ``${HOME}/.rocprof-sys.cfg`` as required. For example, enable `Perfetto <https://perfetto.dev/>`_,
`Timemory <https://github.com/NERSC/timemory>`_, sampling, and process-level sampling by default
`Timemory <https://github.com/ROCm/timemory>`_, sampling, and process-level sampling by default
and tweak the default sampling values.

.. code-block:: shell
Expand Down Expand Up @@ -64,7 +64,7 @@ accepts a case insensitive match for nearly all common Boolean logic expressions
Exploring components
-----------------------------------

ROCm Systems Profiler uses `Timemory <https://github.com/NERSC/timemory>`_ extensively to provide
ROCm Systems Profiler uses `Timemory <https://github.com/ROCm/timemory>`_ extensively to provide
various capabilities and manage
data and resources. By default, with ``ROCPROFSYS_PROFILE=ON``, ROCm Systems Profiler only collects wall-clock
timing values. However, by modifying the ``ROCPROFSYS_TIMEMORY_COMPONENTS`` setting,
Expand Down
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-examples LANGUAGES C CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/causal/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-causal-example LANGUAGES CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/code-coverage/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-code-coverage-example LANGUAGES CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/fork/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-fork LANGUAGES CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/lulesh/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-lulesh-example LANGUAGES C CXX)

Expand Down
2 changes: 1 addition & 1 deletion examples/mpi/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-mpi-examples LANGUAGES C CXX)

Expand Down
4 changes: 3 additions & 1 deletion examples/openmp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

project(rocprofiler-systems-openmp LANGUAGES CXX)

Expand Down Expand Up @@ -56,3 +56,5 @@ if(ROCPROFSYS_INSTALL_EXAMPLES)
DESTINATION bin
COMPONENT rocprofiler-systems-examples)
endif()

add_subdirectory(target)
110 changes: 110 additions & 0 deletions examples/openmp/target/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#
#
#
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

# try to find a compatible HIP version
if(ROCmVersion_MAJOR_VERSION AND ROCmVersion_MAJOR_VERSION GREATER_EQUAL 6)
find_package(hip ${ROCmVersion_MAJOR_VERSION}.0.0)
else()
find_package(hip)
endif()

if(NOT hip_FOUND)
message(WARNING "ROCm >= 5.6 not found. Skipping OpenMP target example.")
return()
elseif(hip_FOUND AND hip_VERSION VERSION_LESS 5.6.0)
message(
WARNING
"ROCm >= 5.6 not found (found ${hip_VERSION}). Skipping OpenMP target example."
)
return()
endif()

if(NOT OMP_TARGET_COMPILER)
find_program(
amdclangpp_EXECUTABLE
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin)
mark_as_advanced(amdclangpp_EXECUTABLE)

if(amdclangpp_EXECUTABLE)
set(OMP_TARGET_COMPILER
"${amdclangpp_EXECUTABLE}"
CACHE FILEPATH "OpenMP target compiler")
else()
message(WARNING "OpenMP target compiler not found. Skipping this example.")
return()
endif()
endif()

project(rocprofiler-systems-example-openmp-target-lib LANGUAGES CXX)

set(CMAKE_BUILD_TYPE "RelWithDebInfo")

set(DEFAULT_GPU_TARGETS
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx940"
"gfx941"
"gfx942"
"gfx1030"
"gfx1010"
"gfx1100"
"gfx1101"
"gfx1102")

set(GPU_TARGETS
"${DEFAULT_GPU_TARGETS}"
CACHE STRING "GPU targets to compile for")

find_package(Threads REQUIRED)

add_library(openmp-target-lib SHARED)
target_sources(openmp-target-lib PRIVATE library.cpp)
target_link_libraries(openmp-target-lib PUBLIC Threads::Threads)
target_compile_options(openmp-target-lib PRIVATE -fopenmp -ggdb)
target_link_options(openmp-target-lib PUBLIC -fopenmp)

foreach(_TARGET ${GPU_TARGETS})
target_compile_options(openmp-target-lib PRIVATE --offload-arch=${_TARGET})
target_link_options(openmp-target-lib PUBLIC --offload-arch=${_TARGET})
endforeach()

message(STATUS "Using OpenMP target compiler: ${OMP_TARGET_COMPILER}")

get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER} PATH)
get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER_DIR} PATH)

message(STATUS "Using OpemMP target compiler directory: ${OMP_TARGET_COMPILER_DIR}")

if(NOT EXISTS ${OMP_TARGET_COMPILER_DIR}/llvm/lib)
message(FATAL_ERROR "${OMP_TARGET_COMPILER_DIR}/llvm/lib does not exist")
endif()
set_target_properties(
openmp-target-lib
PROPERTIES BUILD_RPATH
"${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib"
OUTPUT_NAME "openmp-target"
POSITION_INDEPENDENT_CODE ON)

rocprofiler_systems_custom_compilation(TARGET openmp-target-lib COMPILER
${OMP_TARGET_COMPILER})

add_executable(openmp-target)
target_sources(openmp-target PRIVATE main.cpp)
target_link_libraries(openmp-target PRIVATE openmp-target-lib)
target_compile_options(openmp-target PRIVATE -ggdb)

set_target_properties(
openmp-target
PROPERTIES BUILD_RPATH
"${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib"
POSITION_INDEPENDENT_CODE ON)

rocprofiler_systems_custom_compilation(TARGET openmp-target COMPILER
${OMP_TARGET_COMPILER})
Loading

0 comments on commit 04a84dd

Please sign in to comment.