From 27df2d4686f88efcbc2909a9fba595c7e399030d Mon Sep 17 00:00:00 2001 From: Aleksandr Mikhalev Date: Sat, 21 Nov 2020 14:08:05 +0100 Subject: [PATCH] Release v0.3.0 Add GPU support via KBLAS+StarPU New application: acoustic wave scattering New application: mesh deformations --- CMakeLists.txt | 40 +- Data.md | 18 + Jenkinsfile | 1 - README.md | 31 +- SARS-CoV-2-meshes/GeneratePopulation.py | 2 +- .../HierarchicalPopulationCluster.py | 2 +- VERSION.txt | 2 +- docs/STARS-H-final.png | Bin 1207436 -> 1485990 bytes examples/CMakeLists.txt | 2 +- examples/approximation/CMakeLists.txt | 2 +- examples/approximation/dense.c | 2 +- examples/approximation/minimal.c | 2 +- examples/approximation/randtlr.c | 2 +- examples/approximation/spatial.c | 2 +- examples/problem/CMakeLists.txt | 2 +- examples/problem/acoustic.c | 2 +- examples/problem/dense.c | 2 +- examples/problem/minimal.c | 2 +- examples/problem/particles.c | 2 +- examples/problem/randtlr.c | 2 +- examples/problem/rbf_cube.c | 2 +- examples/problem/rbf_virus.c | 2 +- examples/problem/spatial.c | 2 +- examples/problem/spatial_bivariate.c | 2 +- include/CMakeLists.txt | 2 +- include/common.h | 2 +- include/control/init.h | 52 +- include/starsh-acoustic.h | 2 +- include/starsh-cauchy.h | 2 +- include/starsh-constants.h | 10 +- include/starsh-electrodynamics.h | 2 +- include/starsh-electrostatics.h | 2 +- include/starsh-minimal.h | 2 +- include/starsh-mpi-starpu-kblas.h | 67 ++ include/starsh-mpi-starpu.h | 2 +- include/starsh-mpi.h | 2 +- include/starsh-particles.h | 2 +- include/starsh-randtlr.h | 2 +- include/starsh-rbf.h | 2 +- include/starsh-spatial-gsl.h | 2 +- include/starsh-spatial.h | 6 + include/starsh-starpu-cuda.h | 66 ++ include/starsh-starpu-kblas.h | 74 ++ include/starsh-starpu.h | 2 +- .../applications/particles/kernel_nd.py | 2 +- src/CMakeLists.txt | 10 +- src/applications/CMakeLists.txt | 2 +- src/applications/cauchy.c | 2 +- src/applications/common.c | 2 +- src/applications/electrodynamics.c | 2 +- .../electrodynamics/CMakeLists.txt | 2 +- src/applications/electrodynamics/kernel_cos.c | 2 +- src/applications/electrodynamics/kernel_sin.c | 2 +- src/applications/electrostatics.c | 2 +- .../electrostatics/CMakeLists.txt | 2 +- .../electrostatics/kernel_coulomb_potential.c | 2 +- src/applications/mesh_deformation/cube.c | 2 +- .../mesh_deformation/kernels_rbf.c | 2 +- src/applications/minimal.c | 2 +- src/applications/randtlr.c | 2 +- src/applications/spatial.c | 36 + src/applications/spatial/CMakeLists.txt | 2 +- src/backends/CMakeLists.txt | 36 +- src/backends/mpi/CMakeLists.txt | 2 +- src/backends/mpi/blrm/CMakeLists.txt | 2 +- src/backends/mpi/blrm/dfe.c | 4 +- src/backends/mpi/blrm/dmml.c | 2 +- src/backends/mpi/blrm/dna.c | 2 +- src/backends/mpi/blrm/dqp3.c | 2 +- src/backends/mpi/blrm/drsdd.c | 4 +- src/backends/mpi/blrm/dsdd.c | 2 +- src/backends/mpi_starpu/CMakeLists.txt | 2 +- src/backends/mpi_starpu/blrm/CMakeLists.txt | 2 +- src/backends/mpi_starpu/blrm/dmml.c | 2 +- src/backends/mpi_starpu/blrm/dqp3.c | 2 +- src/backends/mpi_starpu/blrm/drsdd.c | 4 +- src/backends/mpi_starpu/blrm/dsdd.c | 2 +- src/backends/mpi_starpu_kblas/CMakeLists.txt | 25 + .../mpi_starpu_kblas/blrm/CMakeLists.txt | 21 + src/backends/mpi_starpu_kblas/blrm/drsdd.c | 682 ++++++++++++++++++ src/backends/mpi_starpu_kblas2/CMakeLists.txt | 25 + .../mpi_starpu_kblas2/blrm/CMakeLists.txt | 21 + src/backends/mpi_starpu_kblas2/blrm/drsdd.c | 600 +++++++++++++++ .../mpi_starpu_kblas3_spatial/CMakeLists.txt | 25 + .../blrm/CMakeLists.txt | 21 + .../mpi_starpu_kblas3_spatial/blrm/drsdd.c | 630 ++++++++++++++++ src/backends/openmp/CMakeLists.txt | 2 +- src/backends/openmp/blrm/CMakeLists.txt | 2 +- src/backends/openmp/blrm/dfe.c | 2 +- src/backends/openmp/blrm/dmml.c | 2 +- src/backends/openmp/blrm/dqp3.c | 2 +- src/backends/openmp/blrm/drsdd.c | 4 +- src/backends/openmp/blrm/dsdd.c | 2 +- src/backends/sequential/CMakeLists.txt | 2 +- src/backends/sequential/blrm/CMakeLists.txt | 2 +- src/backends/sequential/blrm/dca.c | 2 +- src/backends/sequential/blrm/dfe.c | 2 +- src/backends/sequential/blrm/dmml.c | 2 +- src/backends/sequential/blrm/dqp3.c | 2 +- src/backends/sequential/blrm/drsdd.c | 2 +- src/backends/sequential/blrm/dsdd.c | 2 +- src/backends/sequential/dense/CMakeLists.txt | 2 +- src/backends/sequential/dense/dna.c | 2 +- src/backends/sequential/dense/dqp3.c | 2 +- src/backends/sequential/dense/drsdd.c | 4 +- src/backends/sequential/dense/dsdd.c | 2 +- src/backends/sequential/dense/dsvfr.c | 2 +- src/backends/sequential/dense/zrsdd.c | 2 +- src/backends/starpu/CMakeLists.txt | 2 +- src/backends/starpu/blrm/CMakeLists.txt | 2 +- src/backends/starpu/blrm/dmml.c | 2 +- src/backends/starpu/blrm/dqp3.c | 2 +- src/backends/starpu/blrm/drsdd.c | 11 +- src/backends/starpu/blrm/dsdd.c | 2 +- src/backends/starpu/dense/CMakeLists.txt | 2 +- src/backends/starpu/dense/dgemm.c | 2 +- src/backends/starpu/dense/dqp3.c | 2 +- src/backends/starpu/dense/drsdd.c | 2 +- src/backends/starpu/dense/dsdd.c | 2 +- src/backends/starpu/dense/fake_init.c | 2 +- src/backends/starpu/dense/kernel.c | 2 +- src/backends/starpu_cuda/CMakeLists.txt | 26 + src/backends/starpu_cuda/blrm/CMakeLists.txt | 19 + src/backends/starpu_cuda/blrm/drsdd.c | 425 +++++++++++ src/backends/starpu_cuda/dense/CMakeLists.txt | 21 + src/backends/starpu_cuda/dense/drsdd.c | 153 ++++ src/backends/starpu_cuda/dense/kernel.c | 34 + src/backends/starpu_kblas/CMakeLists.txt | 26 + src/backends/starpu_kblas/blrm/CMakeLists.txt | 19 + src/backends/starpu_kblas/blrm/drsdd.c | 531 ++++++++++++++ .../starpu_kblas/dense/CMakeLists.txt | 21 + src/backends/starpu_kblas/dense/drsdd.c | 119 +++ src/backends/starpu_kblas/dense/kernel.c | 46 ++ src/backends/starpu_kblas2/CMakeLists.txt | 26 + .../starpu_kblas2/blrm/CMakeLists.txt | 19 + src/backends/starpu_kblas2/blrm/drsdd.c | 567 +++++++++++++++ .../starpu_kblas2/dense/CMakeLists.txt | 21 + src/backends/starpu_kblas2/dense/drsdd.c | 105 +++ src/backends/starpu_kblas2/dense/kernel.c | 46 ++ .../starpu_kblas3_spatial/CMakeLists.txt | 26 + .../starpu_kblas3_spatial/blrm/CMakeLists.txt | 19 + .../starpu_kblas3_spatial/blrm/drsdd.c | 588 +++++++++++++++ .../dense/CMakeLists.txt | 21 + .../starpu_kblas3_spatial/dense/kernel.cu | 82 +++ src/control/CMakeLists.txt | 2 +- src/control/array.c | 2 +- src/control/blrf.c | 12 +- src/control/cluster.c | 2 +- src/control/init.c | 3 + src/control/problem.c | 2 +- src/itersolvers/CMakeLists.txt | 2 +- src/itersolvers/cg.c | 2 +- testing/CMakeLists.txt | 16 +- testing/cauchy.c | 2 +- testing/electrodynamics.c | 2 +- testing/electrostatics.c | 2 +- testing/minimal.c | 2 +- testing/mpi_cauchy.c | 2 +- testing/mpi_electrodynamics.c | 2 +- testing/mpi_electrostatics.c | 2 +- testing/mpi_minimal.c | 2 +- testing/mpi_spatial.c | 2 +- testing/mpi_starpu_cauchy.c | 2 +- testing/mpi_starpu_electrodynamics.c | 2 +- testing/mpi_starpu_electrostatics.c | 2 +- testing/mpi_starpu_minimal.c | 2 +- testing/mpi_starpu_spatial.c | 3 +- testing/mpi_starpu_spatial_gpu.c | 217 ++++++ testing/particles.c | 2 +- testing/randtlr.c | 2 +- testing/spatial.c | 4 +- testing/starpu_cauchy.c | 2 +- testing/starpu_electrodynamics.c | 2 +- testing/starpu_electrostatics.c | 2 +- testing/starpu_minimal.c | 2 +- testing/starpu_spatial.c | 10 +- testing/starpu_spatial_gpu.c | 145 ++++ 177 files changed, 5968 insertions(+), 164 deletions(-) create mode 100644 Data.md create mode 100644 include/starsh-mpi-starpu-kblas.h create mode 100644 include/starsh-starpu-cuda.h create mode 100644 include/starsh-starpu-kblas.h create mode 100644 src/backends/mpi_starpu_kblas/CMakeLists.txt create mode 100644 src/backends/mpi_starpu_kblas/blrm/CMakeLists.txt create mode 100644 src/backends/mpi_starpu_kblas/blrm/drsdd.c create mode 100644 src/backends/mpi_starpu_kblas2/CMakeLists.txt create mode 100644 src/backends/mpi_starpu_kblas2/blrm/CMakeLists.txt create mode 100644 src/backends/mpi_starpu_kblas2/blrm/drsdd.c create mode 100644 src/backends/mpi_starpu_kblas3_spatial/CMakeLists.txt create mode 100644 src/backends/mpi_starpu_kblas3_spatial/blrm/CMakeLists.txt create mode 100644 src/backends/mpi_starpu_kblas3_spatial/blrm/drsdd.c create mode 100644 src/backends/starpu_cuda/CMakeLists.txt create mode 100644 src/backends/starpu_cuda/blrm/CMakeLists.txt create mode 100644 src/backends/starpu_cuda/blrm/drsdd.c create mode 100644 src/backends/starpu_cuda/dense/CMakeLists.txt create mode 100644 src/backends/starpu_cuda/dense/drsdd.c create mode 100644 src/backends/starpu_cuda/dense/kernel.c create mode 100644 src/backends/starpu_kblas/CMakeLists.txt create mode 100644 src/backends/starpu_kblas/blrm/CMakeLists.txt create mode 100644 src/backends/starpu_kblas/blrm/drsdd.c create mode 100644 src/backends/starpu_kblas/dense/CMakeLists.txt create mode 100644 src/backends/starpu_kblas/dense/drsdd.c create mode 100644 src/backends/starpu_kblas/dense/kernel.c create mode 100644 src/backends/starpu_kblas2/CMakeLists.txt create mode 100644 src/backends/starpu_kblas2/blrm/CMakeLists.txt create mode 100644 src/backends/starpu_kblas2/blrm/drsdd.c create mode 100644 src/backends/starpu_kblas2/dense/CMakeLists.txt create mode 100644 src/backends/starpu_kblas2/dense/drsdd.c create mode 100644 src/backends/starpu_kblas2/dense/kernel.c create mode 100644 src/backends/starpu_kblas3_spatial/CMakeLists.txt create mode 100644 src/backends/starpu_kblas3_spatial/blrm/CMakeLists.txt create mode 100644 src/backends/starpu_kblas3_spatial/blrm/drsdd.c create mode 100644 src/backends/starpu_kblas3_spatial/dense/CMakeLists.txt create mode 100644 src/backends/starpu_kblas3_spatial/dense/kernel.cu create mode 100644 testing/mpi_starpu_spatial_gpu.c create mode 100644 testing/starpu_spatial_gpu.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 3633794c..88514b6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,7 +51,7 @@ include(FindPkgConfig) # Create project and check C compiler cmake_policy(SET CMP0048 NEW) -project(STARS-H VERSION 0.1.1 LANGUAGES C Fortran) +project(STARS-H VERSION 0.3.0 LANGUAGES C Fortran) message(STATUS "Building ${PROJECT_NAME} ${PROJECT_VERSION}") @@ -69,6 +69,12 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING option(OPENMP "Use OpenMP" ON) option(MPI "Use MPI" ON) option(STARPU "Use StarPU" ON) +# Since KBLAS does not support pkg-config, it is OFF by default, since user has +# to provide path by means of +# CFLAGS="-I/path/to/kblas/include -L/path/to/kblas/lib" +option(KBLAS "Use KBLAS" ON) +option(CUDA "Use CUDA" ON) +#set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_70,code=sm_70") # Option to force using parallel blas instead of sequential option(USE_PARALLEL_BLAS "Prefer parallel blas libraries" OFF) @@ -109,7 +115,7 @@ endif() set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) # the RPATH to be used when installing #set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") - +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) # Packaging (make package) #set(CPACK_PACKAGE_VERSION ${STARSH_VERSION}) #set(CPACK_GENERATOR "TGZ") @@ -174,6 +180,25 @@ if(STARPU) endif() endif() +# KBLAS depends on CUDA +if(KBLAS) + set(CUDA ON) +endif() + +# Check CUDA option +if(CUDA) + # If CUDA itself is available + if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + add_definitions("-DCUDA") + # If it is not available + else() + set(CUDA OFF) + # Also disable dependent KBLAS option + set(KBLAS OFF) + endif() +endif(CUDA) + # Check if GNU Scientific Library is available (for Matern kernel and # Bessel function) if(GSL) @@ -306,6 +331,17 @@ if(BLA_VENDOR MATCHES "Intel") add_definitions("-DMKL") endif() +if(STARPU AND KBLAS) + add_definitions("-DKBLAS") +# find_package(MAGMA) +# if(MAGMA_FOUND) +# include_directories(${MAGMA_INCLUDE_DIRS}) +# link_directories(${MAGMA_LIBRARY_DIRS}) +# add_definitions("-DKBLAS") +# else() +# set(KBLAS OFF) +# endif() +endif() ############################################################################### ## PRINT CONFIGURATION ## diff --git a/Data.md b/Data.md new file mode 100644 index 00000000..6d33e696 --- /dev/null +++ b/Data.md @@ -0,0 +1,18 @@ +# Dataset + +## Mesh Deformation Application + +Dataset is available in KAUST repository: https://repository.kaust.edu.sa/handle/10754/664938. + +DOI:10.25781/KAUST-V2EF2 + +## Acoustic Scattering Application + +Dataset is available in KAUST repository: https://repository.kaust.edu.sa/handle/10754/664400. + + +DOI:10.25781/KAUST-I0634 + +For more information on the dataset please refer to the readme files in the data repositories. + + diff --git a/Jenkinsfile b/Jenkinsfile index 3c57966a..b3b68c44 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -117,5 +117,4 @@ pipeline { } } } - } diff --git a/README.md b/README.md index e968b0bf..613ffcdf 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,16 @@ What is STARS-H? ================ -STARS-H is a **high performance parallel open-source** package of **Software -for Testing Accuracy, Reliability and Scalability of Hierarchical -computations**. It -provides a hierarchical matrix market in order to benchmark performance of -various libraries for hierarchical matrix compressions and computations -(including itself). **Why hierarchical matrices?** Because such matrices arise -in many PDEs and use much less memory, while requiring fewer flops for -computations. There are several hierarchical data formats, each one with its -own performance and memory footprint. STARS-H intends to provide a standard for -assessing accuracy and performance of hierarchical matrix libraries on a given -hardware architecture environment. STARS-H currently supports the tile low-rank -(TLR) data format for approximation on shared and distributed-memory systems, -using MPI, OpenMP and task-based programming models. +The Software for Testing Accuracy, Reliability and Scalability of Hierarchical (STARS-H) +computations is a parallelĀ  library that provides a high performance matrix market of +rank structured matrix operators. STARS-H supports various matrix kernels that are +proxies for many scientific applications, and optionally compresses them by exploiting +their data sparsity. This translates into a lower arithmetic complexity and memory footprint. +STARS-H intends to provide a standard software environment for assessing accuracy and performance +of š¯“—-matrix libraries on a given hardware architecture. STARS-H currently supports +the tile low-rank (TLR) data format for approximation on shared and distributed-memory systems, +possibly equipped with GPUs, using MPI, OpenMP and task-based programming models. + Vision of STARS-H ================= @@ -49,7 +46,7 @@ Applications in matrix-free form: 3. Electrodynamics (sin(kr)/r and cos(kr)/r) 4. Random synthetic TLR matrix 5. Spatial statistics (exponential, square exponential and matern kernels) -6. Mesh deformation using radial basis function (gaussian, exponential, inverse quadratic, inverse multi-quadratic, CPTS, and Wendland kernels) +6. Mesh deformation using radial basis functions, i.e., Gaussian, exponential, inverse quadratic, inverse multi-quadratic, CPTS, and Wendland kernels. 7. Acoustic scattering @@ -138,5 +135,11 @@ and have additional steps on approximation of corresponding matrices. *Important notice: the approximation phase does not require the entire dense matrix to be stored, since matrix elements are computed on the fly.* +Dataset +======== + +Please see Data.md for information about dataset. + + ![Handout](docs/STARS-H-final.png) diff --git a/SARS-CoV-2-meshes/GeneratePopulation.py b/SARS-CoV-2-meshes/GeneratePopulation.py index 596b51cb..843a8e66 100644 --- a/SARS-CoV-2-meshes/GeneratePopulation.py +++ b/SARS-CoV-2-meshes/GeneratePopulation.py @@ -1,4 +1,4 @@ -# @version 1.3.0 +# @version 0.3.0 import pandas as pd import numpy as np diff --git a/SARS-CoV-2-meshes/HierarchicalPopulationCluster.py b/SARS-CoV-2-meshes/HierarchicalPopulationCluster.py index 627fef91..eda0d3b0 100644 --- a/SARS-CoV-2-meshes/HierarchicalPopulationCluster.py +++ b/SARS-CoV-2-meshes/HierarchicalPopulationCluster.py @@ -1,4 +1,4 @@ -# @version 1.3.0 +# @version 0.3.0 import pandas as pd import numpy as np diff --git a/VERSION.txt b/VERSION.txt index f0bb29e7..0d91a54c 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -1.3.0 +0.3.0 diff --git a/docs/STARS-H-final.png b/docs/STARS-H-final.png index b18732049e2c56d11e500aff63556a2299ecefe0..0e707d575424f4a9c206fc82a1115563d983f982 100644 GIT binary patch literal 1485990 zcmaI8bx@l@+x88V;_j}&3dP+Wf?Fx>?(SN=1PksiEd`3ZyGwC`dkewc-rnEyzTaO@ z@623t?(A%?$>hAU`#a7(HZf`{a_A@|C@?TE=sJ&J=>O)EHE&z z47O5IYW7lcQqB&}ZkjHp=9Ur`&SsVxa*`YZ0{k#Aj7jDuCK~cAj3cHvCMF}3%uFb5 z-Wt)-APtk?zMe^{v7T|0ksfe{fx#9L)>c0ZJUa<~K>JV+QW4vqFB5$!!>BD98nMZt z*OXLKDy)njOifHCVCquG6;Z@bD5&%SOr#S=FlL0XV4Y?>MPXpXVSv&ST0XER zowgv0rBwRc!7k_4noqhsgwB>gR+nIS12_hfAwJ(6%WMXzy2x3_lyY{++3jaIqSc3j=GP6 zd@eQu@A973pNcrU-J#xOZ^MIPPLi=S1xSo=Uqk-4(32!1K9XCu6frlC#1~zj;9*8GZg%AR(yckGwV<^N*UE}5YCr5xc~N=-=E7=bnI4i+#t07;x=NZQNPO~ zM()5tMC6`TK^LGC^an5U?HR*h&~7cV+JA4^&AO=NB4N6Cdq-zh+@dQ4_3f83|M$y> z(fGlp`fWF=n*-f@>p7&2jNO(mKIfD#yx>FC-q{M=`E{()Qj96Kw<`k0jK|lZWWG`{1$v))O}{Rqd!T?i zrtj?agBp=CmA@}MZt$b18g_d-Ped`T${_B$kz6ua8^8D4e`0Oga$V+sSJis1FBj)t zYTGggdto)cQsrk0LQ*-&8fuT-3rMyEx}K~xkRYmX=(^-OeNHeTqJ&N