diff --git a/.github/workflows/nemo_tests.yml b/.github/workflows/nemo_tests.yml index 79aab92ba2..24fcc56bd4 100644 --- a/.github/workflows/nemo_tests.yml +++ b/.github/workflows/nemo_tests.yml @@ -84,6 +84,7 @@ jobs: # PSyclone passthrough for MetOffice NEMO - name: NEMO MetOffice Passthrough + if: ${{ github.run_attempt != '1' }} run: | . .runner_venv/bin/activate export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts @@ -101,6 +102,7 @@ jobs: . .runner_venv/bin/activate export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts export NEMO_DIR=${HOME}/NEMO + export NEMOV4=1 # Enables specific NEMOV4 exclusions in the PSyclone transformation script cd $PSYCLONE_NEMO_DIR module load nvidia-hpcsdk/${NVFORTRAN_VERSION} module load hdf5/${HDF5_VERSION} netcdf_c/${NETCDF_C_VERSION} netcdf_fortran/${NETCDF_FORTRAN_VERSION} @@ -197,6 +199,7 @@ jobs: module load oneapi/${ONEAPI_VERSION} module load hdf5/${HDF5_VERSION} netcdf_c/${NETCDF_C_VERSION} netcdf_fortran/${NETCDF_FORTRAN_VERSION} module load perl/${PERL_VERSION} + export NEMOV4=1 # Enables specific NEMOV4 exclusions in the PSyclone transformation script export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts export NEMO_DIR=${HOME}/NEMOGCM_V40 export COMPILER_ARCH=linux_intel diff --git a/.github/workflows/nemo_v5_tests.yml b/.github/workflows/nemo_v5_tests.yml index 1f47b0f0b7..3248283be8 100644 --- a/.github/workflows/nemo_v5_tests.yml +++ b/.github/workflows/nemo_v5_tests.yml @@ -45,6 +45,8 @@ jobs: run_if_on_mirror: if: ${{ github.repository == 'stfc/PSyclone-mirror' }} runs-on: self-hosted + env: + NEMODIR_NAME: NEMOv5_Jan25 steps: - uses: actions/checkout@v3 @@ -72,6 +74,7 @@ jobs: # PSyclone passthrough for 5.0-beta of NEMO. - name: NEMO 5.0 gfortran passthrough + if: ${{ github.run_attempt != '1' }} run: | # Set up environment source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh @@ -79,7 +82,7 @@ jobs: source .runner_venv/bin/activate export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts export PSYCLONE_HOME=${PWD}/.runner_venv - export NEMO_DIR=${HOME}/NEMOv5 + export NEMO_DIR=${HOME}/${NEMODIR_NAME} export TEST_DIR=BENCH_PASSTHROUGH_GCC # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS @@ -98,9 +101,10 @@ jobs: mpirun -np 4 ./nemo tail run.stat # This was produced with gfortran, so we can do an exact diff - diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.100steps run.stat + diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat - name: NEMO 5.0 nvidia passthrough + if: ${{ github.run_attempt != '1' }} run: | # Set up environment source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh @@ -108,13 +112,13 @@ jobs: source .runner_venv/bin/activate export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts export PSYCLONE_HOME=${PWD}/.runner_venv - export NEMO_DIR=${HOME}/NEMOv5 + export NEMO_DIR=${HOME}/${NEMODIR_NAME} export TEST_DIR=BENCH_PASSTHROUGH_NVHPC # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS cd $NEMO_DIR cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm - export FCFLAGS="-i4 -Mr8 -O1 -Kieee -nofma -Mnovect" + export FCFLAGS="-i4 -Mr8 -O1 -nofma -Mnovect" # Clean up and compile # Without key_mpi_off it fails to compile (even without psyclone) @@ -127,11 +131,12 @@ jobs: cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg ./nemo tail run.stat - diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.100steps run.stat + diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat export VAR_TIME=$(awk '/ step /{print $3}' timing.output | head -n 1 | sed -e 's/s//') echo "Time-stepping duration = " $VAR_TIME - name: NEMO 5.0 Intel passthrough + if: ${{ github.run_attempt != '1' }} run: | # Set up environment source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh @@ -139,7 +144,7 @@ jobs: source .runner_venv/bin/activate export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts export PSYCLONE_HOME=${PWD}/.runner_venv - export NEMO_DIR=${HOME}/NEMOv5 + export NEMO_DIR=${HOME}/${NEMODIR_NAME} export TEST_DIR=BENCH_PASSTHROUGH_ONEAPI # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS @@ -157,7 +162,7 @@ jobs: cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg mpirun -np 6 ./nemo tail run.stat - diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.oneapi.small.100steps run.stat + diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.oneapi.small.10steps run.stat - name: NEMO 5.0 gfortran OpenMP for CPUs run: | @@ -167,7 +172,7 @@ jobs: source .runner_venv/bin/activate export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts export PSYCLONE_HOME=${PWD}/.runner_venv - export NEMO_DIR=${HOME}/NEMOv5 + export NEMO_DIR=${HOME}/${NEMODIR_NAME} export TEST_DIR=BENCH_OMP_THREADING_GCC # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS @@ -177,7 +182,6 @@ jobs: # Clean up and compile ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} clean -y - export NEMOV5=1 # Enables specific NEMOV5 optimisations in the PSyclone transformation script ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_cpu_trans.py \ add_key "key_nosignedzero" -j 4 -v 1 @@ -186,7 +190,7 @@ jobs: cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg OMP_NUM_THREADS=4 mpirun -np 1 ./nemo tail run.stat - diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.100steps run.stat + diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat export TIME_sec=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) ${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \ "mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \ @@ -197,7 +201,7 @@ jobs: ci_test: "NEMOv5 OpenMP for CPU", nemo_version: "NEMOv5", system: "GlaDos", compiler:"gfortran-14" , date: new Date(), elapsed_time: '"${TIME_sec}"'})' - - name: NEMO 5.0 nvidia OpenMP for GPUs (managed memory) + - name: NEMO 5.0 nvidia OpenMP for GPUs (BENCH - managed memory) run: | # Set up environment source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh @@ -205,14 +209,14 @@ jobs: source .runner_venv/bin/activate export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts export PSYCLONE_HOME=${PWD}/.runner_venv - export NEMO_DIR=${HOME}/NEMOv5 + export NEMO_DIR=${HOME}/${NEMODIR_NAME} export TEST_DIR=BENCH_OMP_OFFLOAD_NVHPC # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS # We compile at -O1 to permit comparison of the results. cd $NEMO_DIR cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm - export FCFLAGS="-i4 -Mr8 -O1 -Kieee -nofma -Mnovect -g -mp=gpu -gpu=managed" + export FCFLAGS="-i4 -Mr8 -O1 -nofma -Mnovect -g -mp=gpu -gpu=managed,math_uniform" # Clean up and compile # Without key_mpi_off it fails to compile (even without psyclone) @@ -221,17 +225,54 @@ jobs: add_key "key_mpi_off key_nosignedzero" -j 4 -v 1 # Run test (disabled because it is currently too slow) - # cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 - # cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg - # ./nemo + cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 + cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg + ./nemo # tail run.stat - # diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.100steps run.stat - # export TIME_sec=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) - # ${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \ - # "mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \ - # --quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \ - # --password ${{ secrets.MONGODB_PASSWORD }} \ - # --eval 'db.GitHub_CI.insertOne({branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'", - # github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'", - # ci_test: "NEMOv5 OpenMP for GPU", nemo_version: "NEMOv5", system: "GlaDos", - # compiler:"nvhpc-24.5" , date: new Date(), elapsed_time: '"${TIME_sec}"'})' + diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat + export TIME_sec=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) + ${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \ + "mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \ + --quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \ + --password ${{ secrets.MONGODB_PASSWORD }} \ + --eval 'db.GitHub_CI.insertOne({branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'", + github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'", + ci_test: "NEMOv5 OpenMP for GPU (BENCH)", nemo_version: "NEMOv5", system: "GlaDos", + compiler:"nvhpc-24.5" , date: new Date(), elapsed_time: '"${TIME_sec}"'})' + + - name: NEMO 5.0 nvidia OpenMP for GPUs (UKMO ORCA1 - managed memory) + run: | + # Set up environment + source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh + spack unload && spack load nemo-build-environment%nvhpc@24.5 + source .runner_venv/bin/activate + export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts + export PSYCLONE_HOME=${PWD}/.runner_venv + export NEMO_DIR=${HOME}/${NEMODIR_NAME} + export TEST_DIR=ORCA1_OMP_OFFLOAD_NVHPC + + # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS + # We compile at "-O1 -nofma -Mnovect -gpu=math_uniform" to permit comparison of the results. + cd $NEMO_DIR + cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm + export FCFLAGS="-i4 -Mr8 -O1 -nofma -Mnovect -g -mp=gpu -gpu=managed,math_uniform" + + # Clean up and compile + # Without key_mpi_off it fails to compile (even without psyclone) + ./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} clean -y + ./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \ + add_key "key_mpi_off key_nosignedzero" -j 4 -v 1 + + # Run test (disabled because it is currently too slow) + cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 + ./nemo + diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca1.nvhpc.10steps run.stat + export TIME_sec=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) + ${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \ + "mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \ + --quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \ + --password ${{ secrets.MONGODB_PASSWORD }} \ + --eval 'db.GitHub_CI.insertOne({branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'", + github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'", + ci_test: "NEMOv5 OpenMP for GPU (ORCA1)", nemo_version: "NEMOv5", system: "GlaDos", + compiler:"nvhpc-24.5" , date: new Date(), elapsed_time: '"${TIME_sec}"'})' diff --git a/examples/nemo/scripts/KGOs/namelist_cfg_bench_small b/examples/nemo/scripts/KGOs/namelist_cfg_bench_small index 834f83b538..234cd70a8a 100644 --- a/examples/nemo/scripts/KGOs/namelist_cfg_bench_small +++ b/examples/nemo/scripts/KGOs/namelist_cfg_bench_small @@ -7,7 +7,7 @@ !----------------------------------------------------------------------- cn_exp = 'BENCH' ! experience name nn_it000 = 1 ! first time step - nn_itend = 100 ! last time step + nn_itend = 10 ! last time step nn_stock = -1 ! frequency of creation of a restart file (modulo referenced to 1) nn_write = -1 ! frequency of write in the output file (modulo referenced to nn_it000) / diff --git a/examples/nemo/scripts/KGOs/run.stat.bench.gfortran.small.100steps b/examples/nemo/scripts/KGOs/run.stat.bench.gfortran.small.100steps deleted file mode 100644 index 95b5c35d01..0000000000 --- a/examples/nemo/scripts/KGOs/run.stat.bench.gfortran.small.100steps +++ /dev/null @@ -1,100 +0,0 @@ - it : 1 |ssh|_max: 0.2336851764570087D+01 |U|_max: 0.7052149477800684D-02 |V|_max: 0.2308260467200877D-02 S_min: 0.2996908781150693D+02 S_max: 0.3101392942716721D+02 - it : 2 |ssh|_max: 0.3739164083094878D+01 |U|_max: 0.1029616821992987D-01 |V|_max: 0.9486960009862211D-02 S_min: 0.2996910922616945D+02 S_max: 0.3101392859195436D+02 - it : 3 |ssh|_max: 0.4179101131274851D+01 |U|_max: 0.1301524988138879D-01 |V|_max: 0.2226585559898513D-01 S_min: 0.2996913484029493D+02 S_max: 0.3101392781894970D+02 - it : 4 |ssh|_max: 0.4569875511150748D+01 |U|_max: 0.1401053780649201D-01 |V|_max: 0.3894932715115156D-01 S_min: 0.2996916048603851D+02 S_max: 0.3101392710567817D+02 - it : 5 |ssh|_max: 0.4796169575324639D+01 |U|_max: 0.1103849688785435D-01 |V|_max: 0.5247761995640531D-01 S_min: 0.2996918638309708D+02 S_max: 0.3101392644923621D+02 - it : 6 |ssh|_max: 0.4986687389489863D+01 |U|_max: 0.1061909566753376D-01 |V|_max: 0.6682435160313002D-01 S_min: 0.2996921212409725D+02 S_max: 0.3101392584662024D+02 - it : 7 |ssh|_max: 0.5137377532600958D+01 |U|_max: 0.7776206738812892D-02 |V|_max: 0.8230434351326746D-01 S_min: 0.2996923806059345D+02 S_max: 0.3101392529517780D+02 - it : 8 |ssh|_max: 0.5245041711882124D+01 |U|_max: 0.1114333312848225D-01 |V|_max: 0.9661364041991180D-01 S_min: 0.2996926384223953D+02 S_max: 0.3101392479253744D+02 - it : 9 |ssh|_max: 0.5162398664673749D+01 |U|_max: 0.1465335344080455D-01 |V|_max: 0.1139851405821525D+00 S_min: 0.2996929000744533D+02 S_max: 0.3101392433638144D+02 - it : 10 |ssh|_max: 0.5005955481222619D+01 |U|_max: 0.2023455937023107D-01 |V|_max: 0.1294762239325535D+00 S_min: 0.2996931617422945D+02 S_max: 0.3101392392447119D+02 - it : 11 |ssh|_max: 0.4855135547363155D+01 |U|_max: 0.2414868247481750D-01 |V|_max: 0.1381240863345169D+00 S_min: 0.2996934315861018D+02 S_max: 0.3101392355475714D+02 - it : 12 |ssh|_max: 0.4692755224833184D+01 |U|_max: 0.3065956316452254D-01 |V|_max: 0.1451002472720360D+00 S_min: 0.2996937014494064D+02 S_max: 0.3101392322533556D+02 - it : 13 |ssh|_max: 0.4536098100542291D+01 |U|_max: 0.3442772287760921D-01 |V|_max: 0.1495389977889906D+00 S_min: 0.2996938036344907D+02 S_max: 0.3101392293436327D+02 - it : 14 |ssh|_max: 0.4431563612482347D+01 |U|_max: 0.4081132586570636D-01 |V|_max: 0.1519115296349336D+00 S_min: 0.2996939059705999D+02 S_max: 0.3101392268007751D+02 - it : 15 |ssh|_max: 0.4427364591240526D+01 |U|_max: 0.4497061368218146D-01 |V|_max: 0.1527299906039677D+00 S_min: 0.2996941616404622D+02 S_max: 0.3101392246083734D+02 - it : 16 |ssh|_max: 0.4374572635856285D+01 |U|_max: 0.5019633620636200D-01 |V|_max: 0.1522475888777552D+00 S_min: 0.2996944173095707D+02 S_max: 0.3101392227509158D+02 - it : 17 |ssh|_max: 0.4341821484889612D+01 |U|_max: 0.5531555071540357D-01 |V|_max: 0.1517913760503613D+00 S_min: 0.2996946794573724D+02 S_max: 0.3101392212133833D+02 - it : 18 |ssh|_max: 0.4325752650484618D+01 |U|_max: 0.5984280097656534D-01 |V|_max: 0.1506951155150409D+00 S_min: 0.2996949416824957D+02 S_max: 0.3101392199813921D+02 - it : 19 |ssh|_max: 0.4301354130031908D+01 |U|_max: 0.6412034806212455D-01 |V|_max: 0.1492011360136977D+00 S_min: 0.2996952103824535D+02 S_max: 0.3101392189957653D+02 - it : 20 |ssh|_max: 0.4266486330341549D+01 |U|_max: 0.6846696076334181D-01 |V|_max: 0.1487061949929750D+00 S_min: 0.2996954790895234D+02 S_max: 0.3101392180101296D+02 - it : 21 |ssh|_max: 0.4228899683771108D+01 |U|_max: 0.7286106786441825D-01 |V|_max: 0.1482733050868238D+00 S_min: 0.2996957538213342D+02 S_max: 0.3101392170244917D+02 - it : 22 |ssh|_max: 0.4216167750032006D+01 |U|_max: 0.7942948840862415D-01 |V|_max: 0.1471684529331044D+00 S_min: 0.2996960281724479D+02 S_max: 0.3101392160388572D+02 - it : 23 |ssh|_max: 0.4194760562121319D+01 |U|_max: 0.8259229294750987D-01 |V|_max: 0.1458617371205514D+00 S_min: 0.2996963081714462D+02 S_max: 0.3101392150532316D+02 - it : 24 |ssh|_max: 0.4168713559873129D+01 |U|_max: 0.8868057756900001D-01 |V|_max: 0.1444473218199361D+00 S_min: 0.2996965875627752D+02 S_max: 0.3101392140676201D+02 - it : 25 |ssh|_max: 0.4134949051225507D+01 |U|_max: 0.9077057185821176D-01 |V|_max: 0.1439947172949023D+00 S_min: 0.2996968725371423D+02 S_max: 0.3101392130820279D+02 - it : 26 |ssh|_max: 0.4102570874661376D+01 |U|_max: 0.9385269294354093D-01 |V|_max: 0.1432368550039717D+00 S_min: 0.2996971569976720D+02 S_max: 0.3101392120964600D+02 - it : 27 |ssh|_max: 0.4087523300279630D+01 |U|_max: 0.9719584642584371D-01 |V|_max: 0.1422171247740371D+00 S_min: 0.2996974469969665D+02 S_max: 0.3101392111109208D+02 - it : 28 |ssh|_max: 0.4066711100039118D+01 |U|_max: 0.9981230002154087D-01 |V|_max: 0.1409251258433798D+00 S_min: 0.2996977366131380D+02 S_max: 0.3101392101254152D+02 - it : 29 |ssh|_max: 0.4041002208104996D+01 |U|_max: 0.1010000359259521D+00 |V|_max: 0.1393809648399729D+00 S_min: 0.2996980318329325D+02 S_max: 0.3101392091399473D+02 - it : 30 |ssh|_max: 0.4009285982003062D+01 |U|_max: 0.1032359453137443D+00 |V|_max: 0.1386229372478098D+00 S_min: 0.2996983266255877D+02 S_max: 0.3101392081545213D+02 - it : 31 |ssh|_max: 0.3982415222842285D+01 |U|_max: 0.1064909952799985D+00 |V|_max: 0.1376784018230090D+00 S_min: 0.2996986268498295D+02 S_max: 0.3101392071691412D+02 - it : 32 |ssh|_max: 0.3969982464349176D+01 |U|_max: 0.1090049443795551D+00 |V|_max: 0.1364874284015920D+00 S_min: 0.2996989265164330D+02 S_max: 0.3101392061838107D+02 - it : 33 |ssh|_max: 0.3951693237880511D+01 |U|_max: 0.1091781336121864D+00 |V|_max: 0.1350595740917304D+00 S_min: 0.2996992314600502D+02 S_max: 0.3101392051985331D+02 - it : 34 |ssh|_max: 0.3929024132864214D+01 |U|_max: 0.1108394851484137D+00 |V|_max: 0.1334223685706022D+00 S_min: 0.2996995357902092D+02 S_max: 0.3101392042133121D+02 - it : 35 |ssh|_max: 0.3902051104798720D+01 |U|_max: 0.1123215057057724D+00 |V|_max: 0.1324678559470795D+00 S_min: 0.2996998460938622D+02 S_max: 0.3101392032281504D+02 - it : 36 |ssh|_max: 0.3881823555864093D+01 |U|_max: 0.1135896712524620D+00 |V|_max: 0.1314603955531179D+00 S_min: 0.2997001560011671D+02 S_max: 0.3101392022430511D+02 - it : 37 |ssh|_max: 0.3867100883930083D+01 |U|_max: 0.1146836715283930D+00 |V|_max: 0.1303987239276822D+00 S_min: 0.2997004703037640D+02 S_max: 0.3101392012580167D+02 - it : 38 |ssh|_max: 0.3847949152864398D+01 |U|_max: 0.1158160520232005D+00 |V|_max: 0.1292381833357298D+00 S_min: 0.2997007844703269D+02 S_max: 0.3101392002730497D+02 - it : 39 |ssh|_max: 0.3825127641498751D+01 |U|_max: 0.1170177368085170D+00 |V|_max: 0.1279194616031653D+00 S_min: 0.2997010997053799D+02 S_max: 0.3101391992881522D+02 - it : 40 |ssh|_max: 0.3799528294348824D+01 |U|_max: 0.1186024141919507D+00 |V|_max: 0.1265811278328502D+00 S_min: 0.2997014149200218D+02 S_max: 0.3101391983033262D+02 - it : 41 |ssh|_max: 0.3772032775514555D+01 |U|_max: 0.1194298985387558D+00 |V|_max: 0.1257718604302258D+00 S_min: 0.2997017312118061D+02 S_max: 0.3101391973185734D+02 - it : 42 |ssh|_max: 0.3751883989161410D+01 |U|_max: 0.1202594108999530D+00 |V|_max: 0.1248426990485741D+00 S_min: 0.2997020485109384D+02 S_max: 0.3101391963338953D+02 - it : 43 |ssh|_max: 0.3733872363853277D+01 |U|_max: 0.1209428304251352D+00 |V|_max: 0.1238750010512143D+00 S_min: 0.2997023681279894D+02 S_max: 0.3101391953492931D+02 - it : 44 |ssh|_max: 0.3708689314338177D+01 |U|_max: 0.1216476445564216D+00 |V|_max: 0.1230482176287984D+00 S_min: 0.2997026888352095D+02 S_max: 0.3101391943647680D+02 - it : 45 |ssh|_max: 0.3675315817224827D+01 |U|_max: 0.1217187086074750D+00 |V|_max: 0.1223729447437161D+00 S_min: 0.2997030115697795D+02 S_max: 0.3101391933803208D+02 - it : 46 |ssh|_max: 0.3638628468354306D+01 |U|_max: 0.1220536369642546D+00 |V|_max: 0.1216146579240754D+00 S_min: 0.2997033350191469D+02 S_max: 0.3101391923959522D+02 - it : 47 |ssh|_max: 0.3603078747818889D+01 |U|_max: 0.1228390657960220D+00 |V|_max: 0.1209725239865341D+00 S_min: 0.2997036595291029D+02 S_max: 0.3101391914116627D+02 - it : 48 |ssh|_max: 0.3578024562485435D+01 |U|_max: 0.1233180522853033D+00 |V|_max: 0.1204603678388503D+00 S_min: 0.2997039840296358D+02 S_max: 0.3101391904274524D+02 - it : 49 |ssh|_max: 0.3552088235953275D+01 |U|_max: 0.1237554573374173D+00 |V|_max: 0.1198699312416498D+00 S_min: 0.2997043094517514D+02 S_max: 0.3101391894433213D+02 - it : 50 |ssh|_max: 0.3524980372696726D+01 |U|_max: 0.1252151996325486D+00 |V|_max: 0.1191536339279734D+00 S_min: 0.2997046348723887D+02 S_max: 0.3101391884592693D+02 - it : 51 |ssh|_max: 0.3496694919192748D+01 |U|_max: 0.1244822857127000D+00 |V|_max: 0.1183524274010442D+00 S_min: 0.2997049610934201D+02 S_max: 0.3101391874752956D+02 - it : 52 |ssh|_max: 0.3467275233288353D+01 |U|_max: 0.1243785561579632D+00 |V|_max: 0.1174596101044341D+00 S_min: 0.2997052873156215D+02 S_max: 0.3101391864913996D+02 - it : 53 |ssh|_max: 0.3437074898196145D+01 |U|_max: 0.1249843254911183D+00 |V|_max: 0.1165990177600824D+00 S_min: 0.2997056142115479D+02 S_max: 0.3101391855075804D+02 - it : 54 |ssh|_max: 0.3412808776859360D+01 |U|_max: 0.1262160905869304D+00 |V|_max: 0.1160025742906133D+00 S_min: 0.2997059411101461D+02 S_max: 0.3101391845238375D+02 - it : 55 |ssh|_max: 0.3391450350315669D+01 |U|_max: 0.1255493311068343D+00 |V|_max: 0.1153094788274209D+00 S_min: 0.2997062685752950D+02 S_max: 0.3101391835401706D+02 - it : 56 |ssh|_max: 0.3368770895797094D+01 |U|_max: 0.1257336544090955D+00 |V|_max: 0.1145337415699852D+00 S_min: 0.2997065960424352D+02 S_max: 0.3101391825565806D+02 - it : 57 |ssh|_max: 0.3353200034887146D+01 |U|_max: 0.1269656754817013D+00 |V|_max: 0.1136697423684477D+00 S_min: 0.2997069239915498D+02 S_max: 0.3101391815730700D+02 - it : 58 |ssh|_max: 0.3347600402412007D+01 |U|_max: 0.1272877934478984D+00 |V|_max: 0.1127229210566140D+00 S_min: 0.2997072519408028D+02 S_max: 0.3101391805896441D+02 - it : 59 |ssh|_max: 0.3437288728770171D+01 |U|_max: 0.1262930293680335D+00 |V|_max: 0.1116946626352186D+00 S_min: 0.2997071218253398D+02 S_max: 0.3101391796063123D+02 - it : 60 |ssh|_max: 0.3436080618231017D+01 |U|_max: 0.1273247312425994D+00 |V|_max: 0.1110159073289365D+00 S_min: 0.2997069923992022D+02 S_max: 0.3101391786230895D+02 - it : 61 |ssh|_max: 0.3498032084302758D+01 |U|_max: 0.1274936440026021D+00 |V|_max: 0.1102702437296978D+00 S_min: 0.2997073066598240D+02 S_max: 0.3101391776399960D+02 - it : 62 |ssh|_max: 0.3548875977055387D+01 |U|_max: 0.1287231634754152D+00 |V|_max: 0.1094421295742103D+00 S_min: 0.2997076213406375D+02 S_max: 0.3101391766570563D+02 - it : 63 |ssh|_max: 0.3611010538472270D+01 |U|_max: 0.1289986514796674D+00 |V|_max: 0.1085344229984168D+00 S_min: 0.2997079424665560D+02 S_max: 0.3101391756742955D+02 - it : 64 |ssh|_max: 0.3616484493253657D+01 |U|_max: 0.1293211256377026D+00 |V|_max: 0.1075492677366715D+00 S_min: 0.2997082645847949D+02 S_max: 0.3101391746917325D+02 - it : 65 |ssh|_max: 0.3700460333102999D+01 |U|_max: 0.1295006357484061D+00 |V|_max: 0.1064875051227157D+00 S_min: 0.2997085933317098D+02 S_max: 0.3101391737093726D+02 - it : 66 |ssh|_max: 0.3697987297630913D+01 |U|_max: 0.1291667138787790D+00 |V|_max: 0.1057037291354291D+00 S_min: 0.2997089223691964D+02 S_max: 0.3101391727272005D+02 - it : 67 |ssh|_max: 0.3717067730483875D+01 |U|_max: 0.1291720826499722D+00 |V|_max: 0.1049385987028414D+00 S_min: 0.2997092514369806D+02 S_max: 0.3101391717451805D+02 - it : 68 |ssh|_max: 0.3725017796413917D+01 |U|_max: 0.1287456717129999D+00 |V|_max: 0.1040924503724060D+00 S_min: 0.2997095805078223D+02 S_max: 0.3101391707632598D+02 - it : 69 |ssh|_max: 0.3783102705892638D+01 |U|_max: 0.1284629705168528D+00 |V|_max: 0.1031564825708730D+00 S_min: 0.2997099095103568D+02 S_max: 0.3101391697813832D+02 - it : 70 |ssh|_max: 0.3818433367959464D+01 |U|_max: 0.1282813749451166D+00 |V|_max: 0.1021781423168341D+00 S_min: 0.2997102385175010D+02 S_max: 0.3101391687995094D+02 - it : 71 |ssh|_max: 0.3851131306793331D+01 |U|_max: 0.1277861814682851D+00 |V|_max: 0.1009683893629114D+00 S_min: 0.2997105673789912D+02 S_max: 0.3101391678176254D+02 - it : 72 |ssh|_max: 0.3878317161616489D+01 |U|_max: 0.1270678368284661D+00 |V|_max: 0.9974557638529602D-01 S_min: 0.2997108962446472D+02 S_max: 0.3101391668357471D+02 - it : 73 |ssh|_max: 0.3899703715688805D+01 |U|_max: 0.1263371285216474D+00 |V|_max: 0.9756453661860603D-01 S_min: 0.2997112288329826D+02 S_max: 0.3101391658539023D+02 - it : 74 |ssh|_max: 0.3915016760780269D+01 |U|_max: 0.1269321248943728D+00 |V|_max: 0.9409623040708052D-01 S_min: 0.2997115614495198D+02 S_max: 0.3101391648721021D+02 - it : 75 |ssh|_max: 0.3942061473499172D+01 |U|_max: 0.1261161951122663D+00 |V|_max: 0.8894907704785138D-01 S_min: 0.2997118999289991D+02 S_max: 0.3101391638903134D+02 - it : 76 |ssh|_max: 0.3960769464011461D+01 |U|_max: 0.1264125511157165D+00 |V|_max: 0.8198090241845961D-01 S_min: 0.2997122384368348D+02 S_max: 0.3101391629084492D+02 - it : 77 |ssh|_max: 0.3972823012982603D+01 |U|_max: 0.1262003190368890D+00 |V|_max: 0.7274702218054171D-01 S_min: 0.2997125832933568D+02 S_max: 0.3101391619263892D+02 - it : 78 |ssh|_max: 0.4182767258105401D+01 |U|_max: 0.1254429168184836D+00 |V|_max: 0.6174855656923273D-01 S_min: 0.2997129280599455D+02 S_max: 0.3101391609440241D+02 - it : 79 |ssh|_max: 0.4464320492211273D+01 |U|_max: 0.1250107534504160D+00 |V|_max: 0.4938998002230323D-01 S_min: 0.2997132725935006D+02 S_max: 0.3101391599613026D+02 - it : 80 |ssh|_max: 0.4691659593199160D+01 |U|_max: 0.1249465581843477D+00 |V|_max: 0.4406556323374541D-01 S_min: 0.2997136172406285D+02 S_max: 0.3101391589782565D+02 - it : 81 |ssh|_max: 0.4847534239764846D+01 |U|_max: 0.1246216569432278D+00 |V|_max: 0.4490728485955739D-01 S_min: 0.2997139616869343D+02 S_max: 0.3101391579949843D+02 - it : 82 |ssh|_max: 0.4921368678320993D+01 |U|_max: 0.1250401706237382D+00 |V|_max: 0.4570676250134878D-01 S_min: 0.2997143063996485D+02 S_max: 0.3101391570116008D+02 - it : 83 |ssh|_max: 0.4909323017975378D+01 |U|_max: 0.1247182478822446D+00 |V|_max: 0.4646968203897572D-01 S_min: 0.2997146508443484D+02 S_max: 0.3101391560281827D+02 - it : 84 |ssh|_max: 0.4812117528475492D+01 |U|_max: 0.1246568340051059D+00 |V|_max: 0.5348530102148100D-01 S_min: 0.2997149953533280D+02 S_max: 0.3101391550447450D+02 - it : 85 |ssh|_max: 0.4634376427481938D+01 |U|_max: 0.1248504619935375D+00 |V|_max: 0.6513834069350445D-01 S_min: 0.2997149559614360D+02 S_max: 0.3101391540612583D+02 - it : 86 |ssh|_max: 0.4382675694681749D+01 |U|_max: 0.1252693521629411D+00 |V|_max: 0.7858657185884789D-01 S_min: 0.2997149162749577D+02 S_max: 0.3101391530777037D+02 - it : 87 |ssh|_max: 0.4208799088831883D+01 |U|_max: 0.1254101195033613D+00 |V|_max: 0.9208886037021863D-01 S_min: 0.2997152422294450D+02 S_max: 0.3101391520941244D+02 - it : 88 |ssh|_max: 0.4220457453582305D+01 |U|_max: 0.1254274875887708D+00 |V|_max: 0.1049689960927698D+00 S_min: 0.2997155681853749D+02 S_max: 0.3101391511106351D+02 - it : 89 |ssh|_max: 0.4224806935877820D+01 |U|_max: 0.1255654852969129D+00 |V|_max: 0.1157302555662952D+00 S_min: 0.2997153227260946D+02 S_max: 0.3101391501273761D+02 - it : 90 |ssh|_max: 0.4237697503344373D+01 |U|_max: 0.1260641626704890D+00 |V|_max: 0.1238266661856865D+00 S_min: 0.2997150772419434D+02 S_max: 0.3101391491444352D+02 - it : 91 |ssh|_max: 0.4269285515231942D+01 |U|_max: 0.1262749984156360D+00 |V|_max: 0.1290184649847683D+00 S_min: 0.2997153801826848D+02 S_max: 0.3101391480701454D+02 - it : 92 |ssh|_max: 0.4282296969944682D+01 |U|_max: 0.1268305620620326D+00 |V|_max: 0.1320767028793483D+00 S_min: 0.2997156828269546D+02 S_max: 0.3101391469044970D+02 - it : 93 |ssh|_max: 0.4293648162388765D+01 |U|_max: 0.1271524042492304D+00 |V|_max: 0.1327537509901237D+00 S_min: 0.2997159899144967D+02 S_max: 0.3101391456587404D+02 - it : 94 |ssh|_max: 0.4308927978726551D+01 |U|_max: 0.1268164306612873D+00 |V|_max: 0.1334577207469428D+00 S_min: 0.2997162964590055D+02 S_max: 0.3101391443314500D+02 - it : 95 |ssh|_max: 0.4315702895557181D+01 |U|_max: 0.1264933205160509D+00 |V|_max: 0.1340208119283705D+00 S_min: 0.2997166075040118D+02 S_max: 0.3101391429169621D+02 - it : 96 |ssh|_max: 0.4326461128621080D+01 |U|_max: 0.1263418695560262D+00 |V|_max: 0.1341896431904529D+00 S_min: 0.2997169183629738D+02 S_max: 0.3101391414146741D+02 - it : 97 |ssh|_max: 0.4339387118318213D+01 |U|_max: 0.1263028910790446D+00 |V|_max: 0.1340172539470082D+00 S_min: 0.2997172340478246D+02 S_max: 0.3101391398341884D+02 - it : 98 |ssh|_max: 0.4348288854361329D+01 |U|_max: 0.1263825890978068D+00 |V|_max: 0.1332936318973822D+00 S_min: 0.2997175497128216D+02 S_max: 0.3101391381939025D+02 - it : 99 |ssh|_max: 0.4350958876474419D+01 |U|_max: 0.1262222066500761D+00 |V|_max: 0.1323665655498839D+00 S_min: 0.2997178702549860D+02 S_max: 0.3101391365143150D+02 - it : 100 |ssh|_max: 0.4359024235826554D+01 |U|_max: 0.1261633139791182D+00 |V|_max: 0.1310216302821914D+00 S_min: 0.2997181908528601D+02 S_max: 0.3101391348098758D+02 diff --git a/examples/nemo/scripts/KGOs/run.stat.bench.gfortran.small.10steps b/examples/nemo/scripts/KGOs/run.stat.bench.gfortran.small.10steps new file mode 100644 index 0000000000..b0f8f90060 --- /dev/null +++ b/examples/nemo/scripts/KGOs/run.stat.bench.gfortran.small.10steps @@ -0,0 +1,10 @@ + it : 1 |ssh|_max: 0.2336851764570087D+01 |U|_max: 0.7053248015579857D-02 |V|_max: 0.2308346115756259D-02 S_min: 0.2996908779321225D+02 S_max: 0.3101392941293399D+02 + it : 2 |ssh|_max: 0.3739162010287973D+01 |U|_max: 0.1029843199698906D-01 |V|_max: 0.9493800242775713D-02 S_min: 0.2996911000748410D+02 S_max: 0.3101392863147436D+02 + it : 3 |ssh|_max: 0.4224443521974239D+01 |U|_max: 0.1349433227265986D-01 |V|_max: 0.2284885234301404D-01 S_min: 0.2996913553478157D+02 S_max: 0.3101392784904396D+02 + it : 4 |ssh|_max: 0.4659313564999622D+01 |U|_max: 0.1490637483762341D-01 |V|_max: 0.4048444554220138D-01 S_min: 0.2996916129319160D+02 S_max: 0.3101392717586671D+02 + it : 5 |ssh|_max: 0.4949503007019767D+01 |U|_max: 0.1145357177490677D-01 |V|_max: 0.5364770396337813D-01 S_min: 0.2996918706295251D+02 S_max: 0.3101392651315916D+02 + it : 6 |ssh|_max: 0.5140472974504293D+01 |U|_max: 0.1064859943349158D-01 |V|_max: 0.6818865538921454D-01 S_min: 0.2996921262561763D+02 S_max: 0.3101392594056643D+02 + it : 7 |ssh|_max: 0.5229361171698655D+01 |U|_max: 0.7814316351505392D-02 |V|_max: 0.8358086738711774D-01 S_min: 0.2996923864577587D+02 S_max: 0.3101392538498657D+02 + it : 8 |ssh|_max: 0.5220719217849857D+01 |U|_max: 0.1141515836389672D-01 |V|_max: 0.9761604183737865D-01 S_min: 0.2996926417117689D+02 S_max: 0.3101392490495337D+02 + it : 9 |ssh|_max: 0.5145297949564862D+01 |U|_max: 0.1416399592482473D-01 |V|_max: 0.1152759253497909D+00 S_min: 0.2996929035879930D+02 S_max: 0.3101392444612748D+02 + it : 10 |ssh|_max: 0.4979557010366737D+01 |U|_max: 0.1986785874282448D-01 |V|_max: 0.1303543987480547D+00 S_min: 0.2996931641421842D+02 S_max: 0.3101392405137852D+02 diff --git a/examples/nemo/scripts/KGOs/run.stat.bench.nvhpc.small.100steps b/examples/nemo/scripts/KGOs/run.stat.bench.nvhpc.small.100steps deleted file mode 100644 index b4e1e11f80..0000000000 --- a/examples/nemo/scripts/KGOs/run.stat.bench.nvhpc.small.100steps +++ /dev/null @@ -1,100 +0,0 @@ - it : 1 |ssh|_max: 0.2336851764570087D+01 |U|_max: 0.7052149477800684D-02 |V|_max: 0.2308260467200878D-02 S_min: 0.2996908781150693D+02 S_max: 0.3101392942716721D+02 - it : 2 |ssh|_max: 0.3739164083094879D+01 |U|_max: 0.1029616821992987D-01 |V|_max: 0.9486960009862213D-02 S_min: 0.2996910922616945D+02 S_max: 0.3101392859195436D+02 - it : 3 |ssh|_max: 0.4179101131274851D+01 |U|_max: 0.1301524988138877D-01 |V|_max: 0.2226585559898513D-01 S_min: 0.2996913484029493D+02 S_max: 0.3101392781894970D+02 - it : 4 |ssh|_max: 0.4569875511150748D+01 |U|_max: 0.1401053780649233D-01 |V|_max: 0.3894932715115157D-01 S_min: 0.2996916048603851D+02 S_max: 0.3101392710567817D+02 - it : 5 |ssh|_max: 0.4796169575324495D+01 |U|_max: 0.1103849688785496D-01 |V|_max: 0.5247761995640531D-01 S_min: 0.2996918638309708D+02 S_max: 0.3101392644923621D+02 - it : 6 |ssh|_max: 0.4986687389489813D+01 |U|_max: 0.1061909566753376D-01 |V|_max: 0.6682435160312988D-01 S_min: 0.2996921212409725D+02 S_max: 0.3101392584662024D+02 - it : 7 |ssh|_max: 0.5137377532600873D+01 |U|_max: 0.7776206738812510D-02 |V|_max: 0.8230434351326728D-01 S_min: 0.2996923806059344D+02 S_max: 0.3101392529517780D+02 - it : 8 |ssh|_max: 0.5245041711882096D+01 |U|_max: 0.1114333312848249D-01 |V|_max: 0.9661364041990601D-01 S_min: 0.2996926384223953D+02 S_max: 0.3101392479253744D+02 - it : 9 |ssh|_max: 0.5162398664673712D+01 |U|_max: 0.1465335344080445D-01 |V|_max: 0.1139851405821512D+00 S_min: 0.2996929000744533D+02 S_max: 0.3101392433638144D+02 - it : 10 |ssh|_max: 0.5005955481222640D+01 |U|_max: 0.2023455937023348D-01 |V|_max: 0.1294762239325558D+00 S_min: 0.2996931617422945D+02 S_max: 0.3101392392447119D+02 - it : 11 |ssh|_max: 0.4855135547362951D+01 |U|_max: 0.2414868247482435D-01 |V|_max: 0.1381240863344968D+00 S_min: 0.2996934315861018D+02 S_max: 0.3101392355475714D+02 - it : 12 |ssh|_max: 0.4692755224833276D+01 |U|_max: 0.3065956316453255D-01 |V|_max: 0.1451002472720154D+00 S_min: 0.2996937014494064D+02 S_max: 0.3101392322533556D+02 - it : 13 |ssh|_max: 0.4536098100542226D+01 |U|_max: 0.3442772287761053D-01 |V|_max: 0.1495389977889847D+00 S_min: 0.2996938036344907D+02 S_max: 0.3101392293436327D+02 - it : 14 |ssh|_max: 0.4431563612482459D+01 |U|_max: 0.4081132586561843D-01 |V|_max: 0.1519115296349496D+00 S_min: 0.2996939059705999D+02 S_max: 0.3101392268007751D+02 - it : 15 |ssh|_max: 0.4427364591241171D+01 |U|_max: 0.4497061368312513D-01 |V|_max: 0.1527299906039690D+00 S_min: 0.2996941616404622D+02 S_max: 0.3101392246083735D+02 - it : 16 |ssh|_max: 0.4374572635856428D+01 |U|_max: 0.5019633619410922D-01 |V|_max: 0.1522475888777603D+00 S_min: 0.2996944173095707D+02 S_max: 0.3101392227509159D+02 - it : 17 |ssh|_max: 0.4341821484891653D+01 |U|_max: 0.5531555071177060D-01 |V|_max: 0.1517913760503617D+00 S_min: 0.2996946794573724D+02 S_max: 0.3101392212133834D+02 - it : 18 |ssh|_max: 0.4325752650489856D+01 |U|_max: 0.5984280097672790D-01 |V|_max: 0.1506951155150552D+00 S_min: 0.2996949416824956D+02 S_max: 0.3101392199813922D+02 - it : 19 |ssh|_max: 0.4301354130036598D+01 |U|_max: 0.6412034806293720D-01 |V|_max: 0.1492011360136931D+00 S_min: 0.2996952103824535D+02 S_max: 0.3101392189957654D+02 - it : 20 |ssh|_max: 0.4266486330344253D+01 |U|_max: 0.6846696076325871D-01 |V|_max: 0.1487061949929901D+00 S_min: 0.2996954790895233D+02 S_max: 0.3101392180101297D+02 - it : 21 |ssh|_max: 0.4228899683777529D+01 |U|_max: 0.7286106786429154D-01 |V|_max: 0.1482733050868236D+00 S_min: 0.2996957538213341D+02 S_max: 0.3101392170244918D+02 - it : 22 |ssh|_max: 0.4216167750037363D+01 |U|_max: 0.7942948840870191D-01 |V|_max: 0.1471684529331192D+00 S_min: 0.2996960281724479D+02 S_max: 0.3101392160388574D+02 - it : 23 |ssh|_max: 0.4194760562125468D+01 |U|_max: 0.8259229294731039D-01 |V|_max: 0.1458617371205740D+00 S_min: 0.2996963081714462D+02 S_max: 0.3101392150532317D+02 - it : 24 |ssh|_max: 0.4168713559877566D+01 |U|_max: 0.8868057756819381D-01 |V|_max: 0.1444473218199544D+00 S_min: 0.2996965875627752D+02 S_max: 0.3101392140676202D+02 - it : 25 |ssh|_max: 0.4134949051228753D+01 |U|_max: 0.9077057185689461D-01 |V|_max: 0.1439947172949589D+00 S_min: 0.2996968725371422D+02 S_max: 0.3101392130820281D+02 - it : 26 |ssh|_max: 0.4102570874666168D+01 |U|_max: 0.9385269294321964D-01 |V|_max: 0.1432368550039930D+00 S_min: 0.2996971569976718D+02 S_max: 0.3101392120964601D+02 - it : 27 |ssh|_max: 0.4087523300284104D+01 |U|_max: 0.9719584642617853D-01 |V|_max: 0.1422171247741089D+00 S_min: 0.2996974469969664D+02 S_max: 0.3101392111109210D+02 - it : 28 |ssh|_max: 0.4066711100043158D+01 |U|_max: 0.9981230002174134D-01 |V|_max: 0.1409251258434301D+00 S_min: 0.2996977366131379D+02 S_max: 0.3101392101254153D+02 - it : 29 |ssh|_max: 0.4041002208108393D+01 |U|_max: 0.1010000359257200D+00 |V|_max: 0.1393809648399830D+00 S_min: 0.2996980318329324D+02 S_max: 0.3101392091399473D+02 - it : 30 |ssh|_max: 0.4009285982005935D+01 |U|_max: 0.1032359453138919D+00 |V|_max: 0.1386229372479101D+00 S_min: 0.2996983266255876D+02 S_max: 0.3101392081545213D+02 - it : 31 |ssh|_max: 0.3982415222845769D+01 |U|_max: 0.1064909952801874D+00 |V|_max: 0.1376784018230775D+00 S_min: 0.2996986268498294D+02 S_max: 0.3101392071691412D+02 - it : 32 |ssh|_max: 0.3969982464352029D+01 |U|_max: 0.1090049443796477D+00 |V|_max: 0.1364874284016503D+00 S_min: 0.2996989265164330D+02 S_max: 0.3101392061838106D+02 - it : 33 |ssh|_max: 0.3951693237882826D+01 |U|_max: 0.1091781336122693D+00 |V|_max: 0.1350595740918004D+00 S_min: 0.2996992314600502D+02 S_max: 0.3101392051985331D+02 - it : 34 |ssh|_max: 0.3929024132866158D+01 |U|_max: 0.1108394851485590D+00 |V|_max: 0.1334223685706514D+00 S_min: 0.2996995357902093D+02 S_max: 0.3101392042133121D+02 - it : 35 |ssh|_max: 0.3902051104800102D+01 |U|_max: 0.1123215057056570D+00 |V|_max: 0.1324678559471949D+00 S_min: 0.2996998460938623D+02 S_max: 0.3101392032281504D+02 - it : 36 |ssh|_max: 0.3881823555866305D+01 |U|_max: 0.1135896712524384D+00 |V|_max: 0.1314603955532161D+00 S_min: 0.2997001560011670D+02 S_max: 0.3101392022430510D+02 - it : 37 |ssh|_max: 0.3867100883932392D+01 |U|_max: 0.1146836715279505D+00 |V|_max: 0.1303987239277994D+00 S_min: 0.2997004703037639D+02 S_max: 0.3101392012580166D+02 - it : 38 |ssh|_max: 0.3847949152866380D+01 |U|_max: 0.1158160520230990D+00 |V|_max: 0.1292381833357932D+00 S_min: 0.2997007844703268D+02 S_max: 0.3101392002730496D+02 - it : 39 |ssh|_max: 0.3825127641499835D+01 |U|_max: 0.1170177368084939D+00 |V|_max: 0.1279194616032133D+00 S_min: 0.2997010997053799D+02 S_max: 0.3101391992881521D+02 - it : 40 |ssh|_max: 0.3799528294350297D+01 |U|_max: 0.1186024141910895D+00 |V|_max: 0.1265811278329400D+00 S_min: 0.2997014149200217D+02 S_max: 0.3101391983033260D+02 - it : 41 |ssh|_max: 0.3772032775515615D+01 |U|_max: 0.1194298985393341D+00 |V|_max: 0.1257718604303186D+00 S_min: 0.2997017312118060D+02 S_max: 0.3101391973185731D+02 - it : 42 |ssh|_max: 0.3751883989163516D+01 |U|_max: 0.1202594109005010D+00 |V|_max: 0.1248426990486548D+00 S_min: 0.2997020485109384D+02 S_max: 0.3101391963338951D+02 - it : 43 |ssh|_max: 0.3733872363855164D+01 |U|_max: 0.1209428304254681D+00 |V|_max: 0.1238750010512520D+00 S_min: 0.2997023681279892D+02 S_max: 0.3101391953492929D+02 - it : 44 |ssh|_max: 0.3708689314339792D+01 |U|_max: 0.1216476445565052D+00 |V|_max: 0.1230482176288930D+00 S_min: 0.2997026888352094D+02 S_max: 0.3101391943647678D+02 - it : 45 |ssh|_max: 0.3675315817226136D+01 |U|_max: 0.1217187086075321D+00 |V|_max: 0.1223729447437563D+00 S_min: 0.2997030115697795D+02 S_max: 0.3101391933803206D+02 - it : 46 |ssh|_max: 0.3638628468355767D+01 |U|_max: 0.1220536369644271D+00 |V|_max: 0.1216146579241133D+00 S_min: 0.2997033350191468D+02 S_max: 0.3101391923959520D+02 - it : 47 |ssh|_max: 0.3603078747820178D+01 |U|_max: 0.1228390657961987D+00 |V|_max: 0.1209725239866043D+00 S_min: 0.2997036595291029D+02 S_max: 0.3101391914116624D+02 - it : 48 |ssh|_max: 0.3578024562487180D+01 |U|_max: 0.1233180522855824D+00 |V|_max: 0.1204603678389096D+00 S_min: 0.2997039840296357D+02 S_max: 0.3101391904274521D+02 - it : 49 |ssh|_max: 0.3552088235955346D+01 |U|_max: 0.1237554573377439D+00 |V|_max: 0.1198699312417205D+00 S_min: 0.2997043094517513D+02 S_max: 0.3101391894433211D+02 - it : 50 |ssh|_max: 0.3524980372698633D+01 |U|_max: 0.1252151996327404D+00 |V|_max: 0.1191536339280307D+00 S_min: 0.2997046348723886D+02 S_max: 0.3101391884592691D+02 - it : 51 |ssh|_max: 0.3496694919194285D+01 |U|_max: 0.1244822857129232D+00 |V|_max: 0.1183524274010882D+00 S_min: 0.2997049610934200D+02 S_max: 0.3101391874752954D+02 - it : 52 |ssh|_max: 0.3467275233289597D+01 |U|_max: 0.1243785561582379D+00 |V|_max: 0.1174596101044670D+00 S_min: 0.2997052873156214D+02 S_max: 0.3101391864913995D+02 - it : 53 |ssh|_max: 0.3437074898197378D+01 |U|_max: 0.1249843255015833D+00 |V|_max: 0.1165990177601439D+00 S_min: 0.2997056142115478D+02 S_max: 0.3101391855075804D+02 - it : 54 |ssh|_max: 0.3412808776861752D+01 |U|_max: 0.1262160905984583D+00 |V|_max: 0.1160025742906975D+00 S_min: 0.2997059411101460D+02 S_max: 0.3101391845238375D+02 - it : 55 |ssh|_max: 0.3391450350317827D+01 |U|_max: 0.1255493311192749D+00 |V|_max: 0.1153094788274777D+00 S_min: 0.2997062685752948D+02 S_max: 0.3101391835401706D+02 - it : 56 |ssh|_max: 0.3368770895799043D+01 |U|_max: 0.1257336544093186D+00 |V|_max: 0.1145337415700857D+00 S_min: 0.2997065960424350D+02 S_max: 0.3101391825565806D+02 - it : 57 |ssh|_max: 0.3353200034895614D+01 |U|_max: 0.1269656754842931D+00 |V|_max: 0.1136697423684689D+00 S_min: 0.2997069239915496D+02 S_max: 0.3101391815730699D+02 - it : 58 |ssh|_max: 0.3347600402425957D+01 |U|_max: 0.1272877934505800D+00 |V|_max: 0.1127229210566598D+00 S_min: 0.2997072519408026D+02 S_max: 0.3101391805896440D+02 - it : 59 |ssh|_max: 0.3437288728778376D+01 |U|_max: 0.1262930293707257D+00 |V|_max: 0.1116946626352421D+00 S_min: 0.2997071218253397D+02 S_max: 0.3101391796063123D+02 - it : 60 |ssh|_max: 0.3436080618240430D+01 |U|_max: 0.1273247312434009D+00 |V|_max: 0.1110159073289832D+00 S_min: 0.2997069923992020D+02 S_max: 0.3101391786230894D+02 - it : 61 |ssh|_max: 0.3498032084311259D+01 |U|_max: 0.1274936440033007D+00 |V|_max: 0.1102702437297952D+00 S_min: 0.2997073066598238D+02 S_max: 0.3101391776399959D+02 - it : 62 |ssh|_max: 0.3548875977064023D+01 |U|_max: 0.1287231634760127D+00 |V|_max: 0.1094421295742595D+00 S_min: 0.2997076213406373D+02 S_max: 0.3101391766570562D+02 - it : 63 |ssh|_max: 0.3611010538480137D+01 |U|_max: 0.1289986514802405D+00 |V|_max: 0.1085344229984256D+00 S_min: 0.2997079424665559D+02 S_max: 0.3101391756742954D+02 - it : 64 |ssh|_max: 0.3616484493265280D+01 |U|_max: 0.1293211256380585D+00 |V|_max: 0.1075492677367493D+00 S_min: 0.2997082645847948D+02 S_max: 0.3101391746917324D+02 - it : 65 |ssh|_max: 0.3700460333109640D+01 |U|_max: 0.1295006357486421D+00 |V|_max: 0.1064875051227378D+00 S_min: 0.2997085933317097D+02 S_max: 0.3101391737093724D+02 - it : 66 |ssh|_max: 0.3697987297638709D+01 |U|_max: 0.1291667138790256D+00 |V|_max: 0.1057037291354423D+00 S_min: 0.2997089223691963D+02 S_max: 0.3101391727272004D+02 - it : 67 |ssh|_max: 0.3717067730491975D+01 |U|_max: 0.1291720826502144D+00 |V|_max: 0.1049385987029615D+00 S_min: 0.2997092514369805D+02 S_max: 0.3101391717451803D+02 - it : 68 |ssh|_max: 0.3725017796426960D+01 |U|_max: 0.1287456717134016D+00 |V|_max: 0.1040924503724242D+00 S_min: 0.2997095805078222D+02 S_max: 0.3101391707632596D+02 - it : 69 |ssh|_max: 0.3783102705905647D+01 |U|_max: 0.1284629705169764D+00 |V|_max: 0.1031564825709115D+00 S_min: 0.2997099095103567D+02 S_max: 0.3101391697813829D+02 - it : 70 |ssh|_max: 0.3818433367964147D+01 |U|_max: 0.1282813749452238D+00 |V|_max: 0.1021781423168463D+00 S_min: 0.2997102385175009D+02 S_max: 0.3101391687995091D+02 - it : 71 |ssh|_max: 0.3851131306807627D+01 |U|_max: 0.1277861814675451D+00 |V|_max: 0.1009683893629269D+00 S_min: 0.2997105673789911D+02 S_max: 0.3101391678176252D+02 - it : 72 |ssh|_max: 0.3878317161632458D+01 |U|_max: 0.1270678368270693D+00 |V|_max: 0.9974557638532533D-01 S_min: 0.2997108962446471D+02 S_max: 0.3101391668357468D+02 - it : 73 |ssh|_max: 0.3899703715704921D+01 |U|_max: 0.1263371284791457D+00 |V|_max: 0.9756453661864138D-01 S_min: 0.2997112288329826D+02 S_max: 0.3101391658539021D+02 - it : 74 |ssh|_max: 0.3915016760797248D+01 |U|_max: 0.1269321248953413D+00 |V|_max: 0.9409623040711927D-01 S_min: 0.2997115614495198D+02 S_max: 0.3101391648721019D+02 - it : 75 |ssh|_max: 0.3942061473515380D+01 |U|_max: 0.1261161951133135D+00 |V|_max: 0.8894907704792182D-01 S_min: 0.2997118999289990D+02 S_max: 0.3101391638903132D+02 - it : 76 |ssh|_max: 0.3960769464030260D+01 |U|_max: 0.1264125511167559D+00 |V|_max: 0.8198090241846254D-01 S_min: 0.2997122384368346D+02 S_max: 0.3101391629084490D+02 - it : 77 |ssh|_max: 0.3972823013001658D+01 |U|_max: 0.1262003190374280D+00 |V|_max: 0.7274702218056431D-01 S_min: 0.2997125832933567D+02 S_max: 0.3101391619263891D+02 - it : 78 |ssh|_max: 0.4182767258105897D+01 |U|_max: 0.1254429168191073D+00 |V|_max: 0.6174855656926707D-01 S_min: 0.2997129280599453D+02 S_max: 0.3101391609440239D+02 - it : 79 |ssh|_max: 0.4464320492211743D+01 |U|_max: 0.1250107534508194D+00 |V|_max: 0.4938998002235008D-01 S_min: 0.2997132725935005D+02 S_max: 0.3101391599613024D+02 - it : 80 |ssh|_max: 0.4691659593199857D+01 |U|_max: 0.1249465581847335D+00 |V|_max: 0.4406556323340609D-01 S_min: 0.2997136172406284D+02 S_max: 0.3101391589782563D+02 - it : 81 |ssh|_max: 0.4847534239765801D+01 |U|_max: 0.1246216569433034D+00 |V|_max: 0.4490728485923341D-01 S_min: 0.2997139616869341D+02 S_max: 0.3101391579949841D+02 - it : 82 |ssh|_max: 0.4921368678323269D+01 |U|_max: 0.1250401706238890D+00 |V|_max: 0.4570676250097606D-01 S_min: 0.2997143063996483D+02 S_max: 0.3101391570116005D+02 - it : 83 |ssh|_max: 0.4909323017977909D+01 |U|_max: 0.1247182478824475D+00 |V|_max: 0.4646968203864665D-01 S_min: 0.2997146508443482D+02 S_max: 0.3101391560281824D+02 - it : 84 |ssh|_max: 0.4812117528478752D+01 |U|_max: 0.1246568340055924D+00 |V|_max: 0.5348530102140409D-01 S_min: 0.2997149953533279D+02 S_max: 0.3101391550447448D+02 - it : 85 |ssh|_max: 0.4634376427486580D+01 |U|_max: 0.1248504619938853D+00 |V|_max: 0.6513834069334297D-01 S_min: 0.2997149559614358D+02 S_max: 0.3101391540612580D+02 - it : 86 |ssh|_max: 0.4382675694687798D+01 |U|_max: 0.1252693521630729D+00 |V|_max: 0.7858657185868224D-01 S_min: 0.2997149162749575D+02 S_max: 0.3101391530777033D+02 - it : 87 |ssh|_max: 0.4208799088839814D+01 |U|_max: 0.1254101195034855D+00 |V|_max: 0.9208886037003144D-01 S_min: 0.2997152422294449D+02 S_max: 0.3101391520941240D+02 - it : 88 |ssh|_max: 0.4220457453590830D+01 |U|_max: 0.1254274875890097D+00 |V|_max: 0.1049689960926161D+00 S_min: 0.2997155681853747D+02 S_max: 0.3101391511106346D+02 - it : 89 |ssh|_max: 0.4224806935885991D+01 |U|_max: 0.1255654852971839D+00 |V|_max: 0.1157302555661710D+00 S_min: 0.2997153227260944D+02 S_max: 0.3101391501273756D+02 - it : 90 |ssh|_max: 0.4237697503352208D+01 |U|_max: 0.1260641626709973D+00 |V|_max: 0.1238266661855383D+00 S_min: 0.2997150772419432D+02 S_max: 0.3101391491444346D+02 - it : 91 |ssh|_max: 0.4269285515239898D+01 |U|_max: 0.1262749984161399D+00 |V|_max: 0.1290184649846225D+00 S_min: 0.2997153801826847D+02 S_max: 0.3101391480701448D+02 - it : 92 |ssh|_max: 0.4282296969952965D+01 |U|_max: 0.1268305620623284D+00 |V|_max: 0.1320767028793223D+00 S_min: 0.2997156828269544D+02 S_max: 0.3101391469044965D+02 - it : 93 |ssh|_max: 0.4293648162397032D+01 |U|_max: 0.1271524042496063D+00 |V|_max: 0.1327537509899229D+00 S_min: 0.2997159899144967D+02 S_max: 0.3101391456587399D+02 - it : 94 |ssh|_max: 0.4308927978736097D+01 |U|_max: 0.1268164306616869D+00 |V|_max: 0.1334577207466611D+00 S_min: 0.2997162964590055D+02 S_max: 0.3101391443314495D+02 - it : 95 |ssh|_max: 0.4315702895567394D+01 |U|_max: 0.1264933205162474D+00 |V|_max: 0.1340208119280830D+00 S_min: 0.2997166075040117D+02 S_max: 0.3101391429169616D+02 - it : 96 |ssh|_max: 0.4326461128630202D+01 |U|_max: 0.1263418695562006D+00 |V|_max: 0.1341896431901131D+00 S_min: 0.2997169183629738D+02 S_max: 0.3101391414146736D+02 - it : 97 |ssh|_max: 0.4339387118327937D+01 |U|_max: 0.1263028910794723D+00 |V|_max: 0.1340172539467550D+00 S_min: 0.2997172340478246D+02 S_max: 0.3101391398341879D+02 - it : 98 |ssh|_max: 0.4348288854371773D+01 |U|_max: 0.1263825890982318D+00 |V|_max: 0.1332936318972628D+00 S_min: 0.2997175497128216D+02 S_max: 0.3101391381939020D+02 - it : 99 |ssh|_max: 0.4350958876484044D+01 |U|_max: 0.1262222066505068D+00 |V|_max: 0.1323665655500854D+00 S_min: 0.2997178702549860D+02 S_max: 0.3101391365143146D+02 - it : 100 |ssh|_max: 0.4359024235836054D+01 |U|_max: 0.1261633139795349D+00 |V|_max: 0.1310216302824607D+00 S_min: 0.2997181908528602D+02 S_max: 0.3101391348098755D+02 diff --git a/examples/nemo/scripts/KGOs/run.stat.bench.nvhpc.small.10steps b/examples/nemo/scripts/KGOs/run.stat.bench.nvhpc.small.10steps new file mode 100644 index 0000000000..2c88a954b4 --- /dev/null +++ b/examples/nemo/scripts/KGOs/run.stat.bench.nvhpc.small.10steps @@ -0,0 +1,10 @@ + it : 1 |ssh|_max: 0.2336851764570087D+01 |U|_max: 0.7053248015579857D-02 |V|_max: 0.2308346115756259D-02 S_min: 0.2996908779321225D+02 S_max: 0.3101392941293399D+02 + it : 2 |ssh|_max: 0.3739162010287973D+01 |U|_max: 0.1029843199698907D-01 |V|_max: 0.9493800242775704D-02 S_min: 0.2996911000748410D+02 S_max: 0.3101392863147436D+02 + it : 3 |ssh|_max: 0.4224443521974239D+01 |U|_max: 0.1349433227265986D-01 |V|_max: 0.2284885234301404D-01 S_min: 0.2996913553478157D+02 S_max: 0.3101392784904396D+02 + it : 4 |ssh|_max: 0.4659313564999622D+01 |U|_max: 0.1490637483762343D-01 |V|_max: 0.4048444554220137D-01 S_min: 0.2996916129319160D+02 S_max: 0.3101392717586671D+02 + it : 5 |ssh|_max: 0.4949503007019763D+01 |U|_max: 0.1145357177490683D-01 |V|_max: 0.5364770396338010D-01 S_min: 0.2996918706295251D+02 S_max: 0.3101392651315916D+02 + it : 6 |ssh|_max: 0.5140472974504293D+01 |U|_max: 0.1064859943349158D-01 |V|_max: 0.6818865538921461D-01 S_min: 0.2996921262561763D+02 S_max: 0.3101392594056643D+02 + it : 7 |ssh|_max: 0.5229361171698658D+01 |U|_max: 0.7814316351505422D-02 |V|_max: 0.8358086738711774D-01 S_min: 0.2996923864577587D+02 S_max: 0.3101392538498657D+02 + it : 8 |ssh|_max: 0.5220719217849735D+01 |U|_max: 0.1141515836390011D-01 |V|_max: 0.9761604183737931D-01 S_min: 0.2996926417117689D+02 S_max: 0.3101392490495337D+02 + it : 9 |ssh|_max: 0.5145297949564718D+01 |U|_max: 0.1416399592481431D-01 |V|_max: 0.1152759253497911D+00 S_min: 0.2996929035879930D+02 S_max: 0.3101392444612748D+02 + it : 10 |ssh|_max: 0.4979557010366683D+01 |U|_max: 0.1986785874282657D-01 |V|_max: 0.1303543987480519D+00 S_min: 0.2996931641421842D+02 S_max: 0.3101392405137852D+02 diff --git a/examples/nemo/scripts/KGOs/run.stat.bench.oneapi.small.100steps b/examples/nemo/scripts/KGOs/run.stat.bench.oneapi.small.100steps deleted file mode 100644 index 34dfea5c76..0000000000 --- a/examples/nemo/scripts/KGOs/run.stat.bench.oneapi.small.100steps +++ /dev/null @@ -1,100 +0,0 @@ - it : 1 |ssh|_max: 0.2336851764570087D+01 |U|_max: 0.7052149477800684D-02 |V|_max: 0.2308260467200877D-02 S_min: 0.2996908781150693D+02 S_max: 0.3101392942716721D+02 - it : 2 |ssh|_max: 0.3739164083094878D+01 |U|_max: 0.1029616821992987D-01 |V|_max: 0.9486960009862211D-02 S_min: 0.2996910922616945D+02 S_max: 0.3101392859195436D+02 - it : 3 |ssh|_max: 0.4179101131274851D+01 |U|_max: 0.1301524988138879D-01 |V|_max: 0.2226585559898513D-01 S_min: 0.2996913484029493D+02 S_max: 0.3101392781894970D+02 - it : 4 |ssh|_max: 0.4569875511150748D+01 |U|_max: 0.1401053780649201D-01 |V|_max: 0.3894932715115156D-01 S_min: 0.2996916048603851D+02 S_max: 0.3101392710567817D+02 - it : 5 |ssh|_max: 0.4796169575324639D+01 |U|_max: 0.1103849688785435D-01 |V|_max: 0.5247761995640531D-01 S_min: 0.2996918638309708D+02 S_max: 0.3101392644923621D+02 - it : 6 |ssh|_max: 0.4986687389489863D+01 |U|_max: 0.1061909566753376D-01 |V|_max: 0.6682435160313002D-01 S_min: 0.2996921212409725D+02 S_max: 0.3101392584662024D+02 - it : 7 |ssh|_max: 0.5137377532600958D+01 |U|_max: 0.7776206738812892D-02 |V|_max: 0.8230434351326746D-01 S_min: 0.2996923806059345D+02 S_max: 0.3101392529517780D+02 - it : 8 |ssh|_max: 0.5245041711882124D+01 |U|_max: 0.1114333312848225D-01 |V|_max: 0.9661364041991180D-01 S_min: 0.2996926384223953D+02 S_max: 0.3101392479253744D+02 - it : 9 |ssh|_max: 0.5162398664673749D+01 |U|_max: 0.1465335344080455D-01 |V|_max: 0.1139851405821525D+00 S_min: 0.2996929000744533D+02 S_max: 0.3101392433638144D+02 - it : 10 |ssh|_max: 0.5005955481222619D+01 |U|_max: 0.2023455937023107D-01 |V|_max: 0.1294762239325535D+00 S_min: 0.2996931617422945D+02 S_max: 0.3101392392447119D+02 - it : 11 |ssh|_max: 0.4855135547363155D+01 |U|_max: 0.2414868247481750D-01 |V|_max: 0.1381240863345169D+00 S_min: 0.2996934315861018D+02 S_max: 0.3101392355475714D+02 - it : 12 |ssh|_max: 0.4692755224833184D+01 |U|_max: 0.3065956316452254D-01 |V|_max: 0.1451002472720360D+00 S_min: 0.2996937014494064D+02 S_max: 0.3101392322533556D+02 - it : 13 |ssh|_max: 0.4536098100542291D+01 |U|_max: 0.3442772287760921D-01 |V|_max: 0.1495389977889906D+00 S_min: 0.2996938036344907D+02 S_max: 0.3101392293436327D+02 - it : 14 |ssh|_max: 0.4431563612482432D+01 |U|_max: 0.4081132586570636D-01 |V|_max: 0.1519115296349336D+00 S_min: 0.2996939059705999D+02 S_max: 0.3101392268007751D+02 - it : 15 |ssh|_max: 0.4427364591240694D+01 |U|_max: 0.4497061368218146D-01 |V|_max: 0.1527299906039677D+00 S_min: 0.2996941616404622D+02 S_max: 0.3101392246083734D+02 - it : 16 |ssh|_max: 0.4374572635856301D+01 |U|_max: 0.5019633620636200D-01 |V|_max: 0.1522475888777536D+00 S_min: 0.2996944173095707D+02 S_max: 0.3101392227509158D+02 - it : 17 |ssh|_max: 0.4341821484889683D+01 |U|_max: 0.5531555071540357D-01 |V|_max: 0.1517913760503816D+00 S_min: 0.2996946794573724D+02 S_max: 0.3101392212133833D+02 - it : 18 |ssh|_max: 0.4325752650484654D+01 |U|_max: 0.5984280097656534D-01 |V|_max: 0.1506951155150623D+00 S_min: 0.2996949416824957D+02 S_max: 0.3101392199813921D+02 - it : 19 |ssh|_max: 0.4301354130031953D+01 |U|_max: 0.6412034806212455D-01 |V|_max: 0.1492011360137230D+00 S_min: 0.2996952103824535D+02 S_max: 0.3101392189957653D+02 - it : 20 |ssh|_max: 0.4266486330341614D+01 |U|_max: 0.6846696076334163D-01 |V|_max: 0.1487061949930155D+00 S_min: 0.2996954790895234D+02 S_max: 0.3101392180101296D+02 - it : 21 |ssh|_max: 0.4228899683771286D+01 |U|_max: 0.7286106786441840D-01 |V|_max: 0.1482733050868589D+00 S_min: 0.2996957538213342D+02 S_max: 0.3101392170244917D+02 - it : 22 |ssh|_max: 0.4216167750032211D+01 |U|_max: 0.7942948840865724D-01 |V|_max: 0.1471684529331264D+00 S_min: 0.2996960281724480D+02 S_max: 0.3101392160388572D+02 - it : 23 |ssh|_max: 0.4194760562121468D+01 |U|_max: 0.8259229294736969D-01 |V|_max: 0.1458617371205686D+00 S_min: 0.2996963081714462D+02 S_max: 0.3101392150532316D+02 - it : 24 |ssh|_max: 0.4168713559873026D+01 |U|_max: 0.8868057756831037D-01 |V|_max: 0.1444473218199480D+00 S_min: 0.2996965875627752D+02 S_max: 0.3101392140676201D+02 - it : 25 |ssh|_max: 0.4134949051225482D+01 |U|_max: 0.9077057185702207D-01 |V|_max: 0.1439947172949192D+00 S_min: 0.2996968725371423D+02 S_max: 0.3101392130820279D+02 - it : 26 |ssh|_max: 0.4102570874661344D+01 |U|_max: 0.9385269294336257D-01 |V|_max: 0.1432368550039935D+00 S_min: 0.2996971569976719D+02 S_max: 0.3101392120964600D+02 - it : 27 |ssh|_max: 0.4087523300279763D+01 |U|_max: 0.9719584642625356D-01 |V|_max: 0.1422171247740602D+00 S_min: 0.2996974469969664D+02 S_max: 0.3101392111109208D+02 - it : 28 |ssh|_max: 0.4066711100039126D+01 |U|_max: 0.9981230002172750D-01 |V|_max: 0.1409251258433966D+00 S_min: 0.2996977366131381D+02 S_max: 0.3101392101254152D+02 - it : 29 |ssh|_max: 0.4041002208105124D+01 |U|_max: 0.1010000359256210D+00 |V|_max: 0.1393809648399793D+00 S_min: 0.2996980318329325D+02 S_max: 0.3101392091399473D+02 - it : 30 |ssh|_max: 0.4009285982003260D+01 |U|_max: 0.1032359453137507D+00 |V|_max: 0.1386229372478241D+00 S_min: 0.2996983266255877D+02 S_max: 0.3101392081545213D+02 - it : 31 |ssh|_max: 0.3982415222842367D+01 |U|_max: 0.1064909952800815D+00 |V|_max: 0.1376784018230019D+00 S_min: 0.2996986268498295D+02 S_max: 0.3101392071691412D+02 - it : 32 |ssh|_max: 0.3969982464349032D+01 |U|_max: 0.1090049443796141D+00 |V|_max: 0.1364874284016058D+00 S_min: 0.2996989265164331D+02 S_max: 0.3101392061838107D+02 - it : 33 |ssh|_max: 0.3951693237880205D+01 |U|_max: 0.1091781336122528D+00 |V|_max: 0.1350595740917290D+00 S_min: 0.2996992314600503D+02 S_max: 0.3101392051985331D+02 - it : 34 |ssh|_max: 0.3929024132864395D+01 |U|_max: 0.1108394851484848D+00 |V|_max: 0.1334223685706007D+00 S_min: 0.2996995357902093D+02 S_max: 0.3101392042133121D+02 - it : 35 |ssh|_max: 0.3902051104798368D+01 |U|_max: 0.1123215057054647D+00 |V|_max: 0.1324678559470817D+00 S_min: 0.2996998460938623D+02 S_max: 0.3101392032281504D+02 - it : 36 |ssh|_max: 0.3881823555864022D+01 |U|_max: 0.1135896712523749D+00 |V|_max: 0.1314603955531347D+00 S_min: 0.2997001560011672D+02 S_max: 0.3101392022430511D+02 - it : 37 |ssh|_max: 0.3867100883930298D+01 |U|_max: 0.1146836715285295D+00 |V|_max: 0.1303987239276847D+00 S_min: 0.2997004703037641D+02 S_max: 0.3101392012580167D+02 - it : 38 |ssh|_max: 0.3847949152864445D+01 |U|_max: 0.1158160520230024D+00 |V|_max: 0.1292381833357216D+00 S_min: 0.2997007844703269D+02 S_max: 0.3101392002730497D+02 - it : 39 |ssh|_max: 0.3825127641498424D+01 |U|_max: 0.1170177368083699D+00 |V|_max: 0.1279194616031618D+00 S_min: 0.2997010997053799D+02 S_max: 0.3101391992881522D+02 - it : 40 |ssh|_max: 0.3799528294348854D+01 |U|_max: 0.1186024141917715D+00 |V|_max: 0.1265811278328482D+00 S_min: 0.2997014149200217D+02 S_max: 0.3101391983033262D+02 - it : 41 |ssh|_max: 0.3772032775513986D+01 |U|_max: 0.1194298985388969D+00 |V|_max: 0.1257718604302326D+00 S_min: 0.2997017312118061D+02 S_max: 0.3101391973185734D+02 - it : 42 |ssh|_max: 0.3751883989161505D+01 |U|_max: 0.1202594109001221D+00 |V|_max: 0.1248426990485798D+00 S_min: 0.2997020485109385D+02 S_max: 0.3101391963338953D+02 - it : 43 |ssh|_max: 0.3733872363853362D+01 |U|_max: 0.1209428304251624D+00 |V|_max: 0.1238750010512002D+00 S_min: 0.2997023681279893D+02 S_max: 0.3101391953492931D+02 - it : 44 |ssh|_max: 0.3708689314337767D+01 |U|_max: 0.1216476445561787D+00 |V|_max: 0.1230482176287931D+00 S_min: 0.2997026888352094D+02 S_max: 0.3101391943647680D+02 - it : 45 |ssh|_max: 0.3675315817224632D+01 |U|_max: 0.1217187086071681D+00 |V|_max: 0.1223729447437032D+00 S_min: 0.2997030115697795D+02 S_max: 0.3101391933803208D+02 - it : 46 |ssh|_max: 0.3638628468354382D+01 |U|_max: 0.1220536369642724D+00 |V|_max: 0.1216146579240835D+00 S_min: 0.2997033350191469D+02 S_max: 0.3101391923959522D+02 - it : 47 |ssh|_max: 0.3603078747819178D+01 |U|_max: 0.1228390657960318D+00 |V|_max: 0.1209725239865438D+00 S_min: 0.2997036595291029D+02 S_max: 0.3101391914116627D+02 - it : 48 |ssh|_max: 0.3578024562485622D+01 |U|_max: 0.1233180522854220D+00 |V|_max: 0.1204603678388465D+00 S_min: 0.2997039840296357D+02 S_max: 0.3101391904274524D+02 - it : 49 |ssh|_max: 0.3552088235953243D+01 |U|_max: 0.1237554573371921D+00 |V|_max: 0.1198699312416473D+00 S_min: 0.2997043094517513D+02 S_max: 0.3101391894433213D+02 - it : 50 |ssh|_max: 0.3524980372696811D+01 |U|_max: 0.1252151996322733D+00 |V|_max: 0.1191536339279773D+00 S_min: 0.2997046348723886D+02 S_max: 0.3101391884592693D+02 - it : 51 |ssh|_max: 0.3496694919192611D+01 |U|_max: 0.1244822857124727D+00 |V|_max: 0.1183524274010599D+00 S_min: 0.2997049610934200D+02 S_max: 0.3101391874752956D+02 - it : 52 |ssh|_max: 0.3467275233288531D+01 |U|_max: 0.1243785561578379D+00 |V|_max: 0.1174596101043964D+00 S_min: 0.2997052873156214D+02 S_max: 0.3101391864913996D+02 - it : 53 |ssh|_max: 0.3437074898195704D+01 |U|_max: 0.1249843255013401D+00 |V|_max: 0.1165990177600484D+00 S_min: 0.2997056142115478D+02 S_max: 0.3101391855075804D+02 - it : 54 |ssh|_max: 0.3412808776859598D+01 |U|_max: 0.1262160905982095D+00 |V|_max: 0.1160025742906464D+00 S_min: 0.2997059411101460D+02 S_max: 0.3101391845238375D+02 - it : 55 |ssh|_max: 0.3391450350315754D+01 |U|_max: 0.1255493311190441D+00 |V|_max: 0.1153094788274204D+00 S_min: 0.2997062685752948D+02 S_max: 0.3101391835401706D+02 - it : 56 |ssh|_max: 0.3368770895796970D+01 |U|_max: 0.1257336544090192D+00 |V|_max: 0.1145337415699901D+00 S_min: 0.2997065960424351D+02 S_max: 0.3101391825565806D+02 - it : 57 |ssh|_max: 0.3353200034885668D+01 |U|_max: 0.1269656754817496D+00 |V|_max: 0.1136697423684110D+00 S_min: 0.2997069239915496D+02 S_max: 0.3101391815730700D+02 - it : 58 |ssh|_max: 0.3347600402409617D+01 |U|_max: 0.1272877934479490D+00 |V|_max: 0.1127229210566474D+00 S_min: 0.2997072519408027D+02 S_max: 0.3101391805896441D+02 - it : 59 |ssh|_max: 0.3437288728774063D+01 |U|_max: 0.1262930293681089D+00 |V|_max: 0.1116946626351905D+00 S_min: 0.2997071218253397D+02 S_max: 0.3101391796063123D+02 - it : 60 |ssh|_max: 0.3436080618232489D+01 |U|_max: 0.1273247312426725D+00 |V|_max: 0.1110159073289249D+00 S_min: 0.2997069923992021D+02 S_max: 0.3101391786230895D+02 - it : 61 |ssh|_max: 0.3498032084304711D+01 |U|_max: 0.1274936440026631D+00 |V|_max: 0.1102702437296927D+00 S_min: 0.2997073066598238D+02 S_max: 0.3101391776399961D+02 - it : 62 |ssh|_max: 0.3548875977058656D+01 |U|_max: 0.1287231634754545D+00 |V|_max: 0.1094421295742185D+00 S_min: 0.2997076213406373D+02 S_max: 0.3101391766570564D+02 - it : 63 |ssh|_max: 0.3611010538473953D+01 |U|_max: 0.1289986514796432D+00 |V|_max: 0.1085344229984183D+00 S_min: 0.2997079424665559D+02 S_max: 0.3101391756742956D+02 - it : 64 |ssh|_max: 0.3616484493257231D+01 |U|_max: 0.1293211256376708D+00 |V|_max: 0.1075492677366635D+00 S_min: 0.2997082645847947D+02 S_max: 0.3101391746917326D+02 - it : 65 |ssh|_max: 0.3700460333099680D+01 |U|_max: 0.1295006357482681D+00 |V|_max: 0.1064875051227292D+00 S_min: 0.2997085933317096D+02 S_max: 0.3101391737093727D+02 - it : 66 |ssh|_max: 0.3697987297630196D+01 |U|_max: 0.1291667138786394D+00 |V|_max: 0.1057037291354404D+00 S_min: 0.2997089223691962D+02 S_max: 0.3101391727272008D+02 - it : 67 |ssh|_max: 0.3717067730483907D+01 |U|_max: 0.1291720826499367D+00 |V|_max: 0.1049385987028540D+00 S_min: 0.2997092514369804D+02 S_max: 0.3101391717451807D+02 - it : 68 |ssh|_max: 0.3725017796414757D+01 |U|_max: 0.1287456717131272D+00 |V|_max: 0.1040924503723860D+00 S_min: 0.2997095805078221D+02 S_max: 0.3101391707632600D+02 - it : 69 |ssh|_max: 0.3783102705894147D+01 |U|_max: 0.1284629705169741D+00 |V|_max: 0.1031564825708750D+00 S_min: 0.2997099095103566D+02 S_max: 0.3101391697813833D+02 - it : 70 |ssh|_max: 0.3818433367956780D+01 |U|_max: 0.1282813749452129D+00 |V|_max: 0.1021781423168145D+00 S_min: 0.2997102385175008D+02 S_max: 0.3101391687995095D+02 - it : 71 |ssh|_max: 0.3851131306796907D+01 |U|_max: 0.1277861814683754D+00 |V|_max: 0.1009683893628891D+00 S_min: 0.2997105673789910D+02 S_max: 0.3101391678176255D+02 - it : 72 |ssh|_max: 0.3878317161620401D+01 |U|_max: 0.1270678368286666D+00 |V|_max: 0.9974557638525483D-01 S_min: 0.2997108962446470D+02 S_max: 0.3101391668357472D+02 - it : 73 |ssh|_max: 0.3899703715693884D+01 |U|_max: 0.1263371285217334D+00 |V|_max: 0.9756453661867369D-01 S_min: 0.2997112288329824D+02 S_max: 0.3101391658539024D+02 - it : 74 |ssh|_max: 0.3915016760786203D+01 |U|_max: 0.1269321248951530D+00 |V|_max: 0.9409623040711934D-01 S_min: 0.2997115614495196D+02 S_max: 0.3101391648721023D+02 - it : 75 |ssh|_max: 0.3942061473503193D+01 |U|_max: 0.1261161951131452D+00 |V|_max: 0.8894907704786824D-01 S_min: 0.2997118999289989D+02 S_max: 0.3101391638903135D+02 - it : 76 |ssh|_max: 0.3960769464016469D+01 |U|_max: 0.1264125511156541D+00 |V|_max: 0.8198090241846065D-01 S_min: 0.2997122384368346D+02 S_max: 0.3101391629084494D+02 - it : 77 |ssh|_max: 0.3972823012988623D+01 |U|_max: 0.1262003190368183D+00 |V|_max: 0.7274702218052119D-01 S_min: 0.2997125832933565D+02 S_max: 0.3101391619263894D+02 - it : 78 |ssh|_max: 0.4182767258105465D+01 |U|_max: 0.1254429168184168D+00 |V|_max: 0.6174855656919787D-01 S_min: 0.2997129280599452D+02 S_max: 0.3101391609440242D+02 - it : 79 |ssh|_max: 0.4464320492211434D+01 |U|_max: 0.1250107534501377D+00 |V|_max: 0.4938998002225516D-01 S_min: 0.2997132725935004D+02 S_max: 0.3101391599613028D+02 - it : 80 |ssh|_max: 0.4691659593198860D+01 |U|_max: 0.1249465581839708D+00 |V|_max: 0.4406556323372962D-01 S_min: 0.2997136172406283D+02 S_max: 0.3101391589782568D+02 - it : 81 |ssh|_max: 0.4847534239764419D+01 |U|_max: 0.1246216569432732D+00 |V|_max: 0.4490728485956302D-01 S_min: 0.2997139616869340D+02 S_max: 0.3101391579949845D+02 - it : 82 |ssh|_max: 0.4921368678321179D+01 |U|_max: 0.1250401706238481D+00 |V|_max: 0.4570676250138464D-01 S_min: 0.2997143063996483D+02 S_max: 0.3101391570116010D+02 - it : 83 |ssh|_max: 0.4909323017975257D+01 |U|_max: 0.1247182478823917D+00 |V|_max: 0.4646968203901422D-01 S_min: 0.2997146508443481D+02 S_max: 0.3101391560281829D+02 - it : 84 |ssh|_max: 0.4812117528474969D+01 |U|_max: 0.1246568340052443D+00 |V|_max: 0.5348530102156426D-01 S_min: 0.2997149953533278D+02 S_max: 0.3101391550447453D+02 - it : 85 |ssh|_max: 0.4634376427481367D+01 |U|_max: 0.1248504619934433D+00 |V|_max: 0.6513834069351385D-01 S_min: 0.2997149559614357D+02 S_max: 0.3101391540612586D+02 - it : 86 |ssh|_max: 0.4382675694680996D+01 |U|_max: 0.1252693521627229D+00 |V|_max: 0.7858657185886725D-01 S_min: 0.2997149162749574D+02 S_max: 0.3101391530777039D+02 - it : 87 |ssh|_max: 0.4208799088827834D+01 |U|_max: 0.1254101195031483D+00 |V|_max: 0.9208886037021496D-01 S_min: 0.2997152422294448D+02 S_max: 0.3101391520941246D+02 - it : 88 |ssh|_max: 0.4220457453579241D+01 |U|_max: 0.1254274875888451D+00 |V|_max: 0.1049689960927978D+00 S_min: 0.2997155681853747D+02 S_max: 0.3101391511106353D+02 - it : 89 |ssh|_max: 0.4224806935874703D+01 |U|_max: 0.1255654852970041D+00 |V|_max: 0.1157302555663579D+00 S_min: 0.2997153227260944D+02 S_max: 0.3101391501273763D+02 - it : 90 |ssh|_max: 0.4237697503341710D+01 |U|_max: 0.1260641626704384D+00 |V|_max: 0.1238266661857331D+00 S_min: 0.2997150772419433D+02 S_max: 0.3101391491444353D+02 - it : 91 |ssh|_max: 0.4269285515228033D+01 |U|_max: 0.1262749984156161D+00 |V|_max: 0.1290184649847892D+00 S_min: 0.2997153801826847D+02 S_max: 0.3101391480701455D+02 - it : 92 |ssh|_max: 0.4282296969940004D+01 |U|_max: 0.1268305620621147D+00 |V|_max: 0.1320767028791575D+00 S_min: 0.2997156828269545D+02 S_max: 0.3101391469044972D+02 - it : 93 |ssh|_max: 0.4293648162385427D+01 |U|_max: 0.1271524042493944D+00 |V|_max: 0.1327537509901421D+00 S_min: 0.2997159899144966D+02 S_max: 0.3101391456587405D+02 - it : 94 |ssh|_max: 0.4308927978724404D+01 |U|_max: 0.1268164306614173D+00 |V|_max: 0.1334577207468123D+00 S_min: 0.2997162964590054D+02 S_max: 0.3101391443314503D+02 - it : 95 |ssh|_max: 0.4315702895554523D+01 |U|_max: 0.1264933205161273D+00 |V|_max: 0.1340208119281884D+00 S_min: 0.2997166075040117D+02 S_max: 0.3101391429169623D+02 - it : 96 |ssh|_max: 0.4326461128616466D+01 |U|_max: 0.1263418695560965D+00 |V|_max: 0.1341896431902789D+00 S_min: 0.2997169183629737D+02 S_max: 0.3101391414146742D+02 - it : 97 |ssh|_max: 0.4339387118314458D+01 |U|_max: 0.1263028910791693D+00 |V|_max: 0.1340172539470355D+00 S_min: 0.2997172340478246D+02 S_max: 0.3101391398341886D+02 - it : 98 |ssh|_max: 0.4348288854357979D+01 |U|_max: 0.1263825890979448D+00 |V|_max: 0.1332936318975038D+00 S_min: 0.2997175497128216D+02 S_max: 0.3101391381939027D+02 - it : 99 |ssh|_max: 0.4350958876472678D+01 |U|_max: 0.1262222066502535D+00 |V|_max: 0.1323665655500089D+00 S_min: 0.2997178702549861D+02 S_max: 0.3101391365143152D+02 - it : 100 |ssh|_max: 0.4359024235823600D+01 |U|_max: 0.1261633139792707D+00 |V|_max: 0.1310216302823160D+00 S_min: 0.2997181908528601D+02 S_max: 0.3101391348098761D+02 diff --git a/examples/nemo/scripts/KGOs/run.stat.bench.oneapi.small.10steps b/examples/nemo/scripts/KGOs/run.stat.bench.oneapi.small.10steps new file mode 100644 index 0000000000..b0f8f90060 --- /dev/null +++ b/examples/nemo/scripts/KGOs/run.stat.bench.oneapi.small.10steps @@ -0,0 +1,10 @@ + it : 1 |ssh|_max: 0.2336851764570087D+01 |U|_max: 0.7053248015579857D-02 |V|_max: 0.2308346115756259D-02 S_min: 0.2996908779321225D+02 S_max: 0.3101392941293399D+02 + it : 2 |ssh|_max: 0.3739162010287973D+01 |U|_max: 0.1029843199698906D-01 |V|_max: 0.9493800242775713D-02 S_min: 0.2996911000748410D+02 S_max: 0.3101392863147436D+02 + it : 3 |ssh|_max: 0.4224443521974239D+01 |U|_max: 0.1349433227265986D-01 |V|_max: 0.2284885234301404D-01 S_min: 0.2996913553478157D+02 S_max: 0.3101392784904396D+02 + it : 4 |ssh|_max: 0.4659313564999622D+01 |U|_max: 0.1490637483762341D-01 |V|_max: 0.4048444554220138D-01 S_min: 0.2996916129319160D+02 S_max: 0.3101392717586671D+02 + it : 5 |ssh|_max: 0.4949503007019767D+01 |U|_max: 0.1145357177490677D-01 |V|_max: 0.5364770396337813D-01 S_min: 0.2996918706295251D+02 S_max: 0.3101392651315916D+02 + it : 6 |ssh|_max: 0.5140472974504293D+01 |U|_max: 0.1064859943349158D-01 |V|_max: 0.6818865538921454D-01 S_min: 0.2996921262561763D+02 S_max: 0.3101392594056643D+02 + it : 7 |ssh|_max: 0.5229361171698655D+01 |U|_max: 0.7814316351505392D-02 |V|_max: 0.8358086738711774D-01 S_min: 0.2996923864577587D+02 S_max: 0.3101392538498657D+02 + it : 8 |ssh|_max: 0.5220719217849857D+01 |U|_max: 0.1141515836389672D-01 |V|_max: 0.9761604183737865D-01 S_min: 0.2996926417117689D+02 S_max: 0.3101392490495337D+02 + it : 9 |ssh|_max: 0.5145297949564862D+01 |U|_max: 0.1416399592482473D-01 |V|_max: 0.1152759253497909D+00 S_min: 0.2996929035879930D+02 S_max: 0.3101392444612748D+02 + it : 10 |ssh|_max: 0.4979557010366737D+01 |U|_max: 0.1986785874282448D-01 |V|_max: 0.1303543987480547D+00 S_min: 0.2996931641421842D+02 S_max: 0.3101392405137852D+02 diff --git a/examples/nemo/scripts/KGOs/run.stat.orca1.nvhpc.10steps b/examples/nemo/scripts/KGOs/run.stat.orca1.nvhpc.10steps new file mode 100644 index 0000000000..f354e97b28 --- /dev/null +++ b/examples/nemo/scripts/KGOs/run.stat.orca1.nvhpc.10steps @@ -0,0 +1,10 @@ + it : 1 |ssh|_max: 0.2916211875218087D+01 |U|_max: 0.9981163300069769D+00 |V|_max: 0.1307725327848167D+01 S_min: 0.4662006718575353D+01 S_max: 0.4113683127178150D+02 + it : 2 |ssh|_max: 0.2993687776643656D+01 |U|_max: 0.2222546780479131D+01 |V|_max: 0.2356596334062002D+01 S_min: 0.4676370204123349D+01 S_max: 0.4113676679086335D+02 + it : 3 |ssh|_max: 0.2980914460894113D+01 |U|_max: 0.3209483567667491D+01 |V|_max: 0.2754499501397742D+01 S_min: 0.4689947532502438D+01 S_max: 0.4113668748287516D+02 + it : 4 |ssh|_max: 0.2931506876273280D+01 |U|_max: 0.4144909991457986D+01 |V|_max: 0.2975812579591785D+01 S_min: 0.4699047798840508D+01 S_max: 0.4113662256133599D+02 + it : 5 |ssh|_max: 0.2722453056197598D+01 |U|_max: 0.4767435607510221D+01 |V|_max: 0.3687030466427942D+01 S_min: 0.4706627021626249D+01 S_max: 0.4113654818608597D+02 + it : 6 |ssh|_max: 0.2646796570389801D+01 |U|_max: 0.5207612464372120D+01 |V|_max: 0.3717499845110545D+01 S_min: 0.4713059832415359D+01 S_max: 0.4113648388708570D+02 + it : 7 |ssh|_max: 0.2567591158968874D+01 |U|_max: 0.5134450014160691D+01 |V|_max: 0.3696139950895351D+01 S_min: 0.4718670148787913D+01 S_max: 0.4113641268478863D+02 + it : 8 |ssh|_max: 0.2405462273410292D+01 |U|_max: 0.5097657179968111D+01 |V|_max: 0.3337026635792947D+01 S_min: 0.4723834038866863D+01 S_max: 0.4113635084308528D+02 + it : 9 |ssh|_max: 0.2786981134450641D+01 |U|_max: 0.4730664948689733D+01 |V|_max: 0.3157690707425519D+01 S_min: 0.4727376010231868D+01 S_max: 0.4113628337011737D+02 + it : 10 |ssh|_max: 0.2788273218892431D+01 |U|_max: 0.4406279755214914D+01 |V|_max: 0.3292925965365973D+01 S_min: 0.4730630269756457D+01 S_max: 0.4113622442765698D+02 diff --git a/examples/nemo/scripts/omp_cpu_trans.py b/examples/nemo/scripts/omp_cpu_trans.py index 7f4ba2648d..664c5b1e79 100755 --- a/examples/nemo/scripts/omp_cpu_trans.py +++ b/examples/nemo/scripts/omp_cpu_trans.py @@ -41,7 +41,7 @@ from utils import ( insert_explicit_loop_parallelism, normalise_loops, add_profiling, enhance_tree_information, PASSTHROUGH_ISSUES, PARALLELISATION_ISSUES, - NEMO_MODULES_TO_IMPORT) + NEMO_MODULES_TO_IMPORT, PRIVATISATION_ISSUES) from psyclone.psyir.nodes import Routine from psyclone.transformations import OMPLoopTrans @@ -53,12 +53,16 @@ RESOLVE_IMPORTS = NEMO_MODULES_TO_IMPORT # A environment variable can inform if this is targeting NEMOv5, in which case -# array privatisation is enabled. -NEMOV5 = os.environ.get('NEMOV5', False) +# array privatisation is disabled. +NEMOV4 = os.environ.get('NEMOV4', False) # List of all files that psyclone will skip processing FILES_TO_SKIP = PASSTHROUGH_ISSUES +if PROFILING_ENABLED: + # Fails with profiling enabled. issue #2723 + FILES_TO_SKIP.append("mppini.f90") + def trans(psyir): ''' Add OpenMP Parallel and Do directives to all loops, including the @@ -68,6 +72,13 @@ def trans(psyir): :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` ''' + + # If the environemnt has ONLY_FILE defined, only process that one file and + # nothing else. This is useful for file-by-file exhaustive tests. + only_do_file = os.environ.get('ONLY_FILE', False) + if only_do_file and psyir.name != only_do_file: + return + omp_parallel_trans = None omp_loop_trans = OMPLoopTrans(omp_schedule="static") omp_loop_trans.omp_directive = "paralleldo" @@ -94,5 +105,6 @@ def trans(psyir): region_directive_trans=omp_parallel_trans, loop_directive_trans=omp_loop_trans, collapse=False, - privatise_arrays=NEMOV5 and psyir.name != "ldftra.f90", + privatise_arrays=(not NEMOV4 and + psyir.name not in PRIVATISATION_ISSUES) ) diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index b34b876d22..f53879f642 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -37,14 +37,16 @@ ''' PSyclone transformation script showing the introduction of OpenMP for GPU directives into Nemo code. ''' +import os from utils import ( insert_explicit_loop_parallelism, normalise_loops, add_profiling, - enhance_tree_information, NOT_PERFORMANT, NEMO_MODULES_TO_IMPORT) -from psyclone.psyGen import TransInfo + enhance_tree_information, PASSTHROUGH_ISSUES, PARALLELISATION_ISSUES, + NEMO_MODULES_TO_IMPORT, PRIVATISATION_ISSUES) from psyclone.psyir.nodes import ( Loop, Routine, Directive, Assignment, OMPAtomicDirective) from psyclone.psyir.transformations import OMPTargetTrans -from psyclone.transformations import OMPDeclareTargetTrans, TransformationError +from psyclone.transformations import ( + OMPLoopTrans, OMPDeclareTargetTrans, TransformationError) PROFILING_ENABLED = False @@ -53,7 +55,60 @@ RESOLVE_IMPORTS = NEMO_MODULES_TO_IMPORT # List of all files that psyclone will skip processing -FILES_TO_SKIP = NOT_PERFORMANT +FILES_TO_SKIP = PASSTHROUGH_ISSUES + [ + "sbcblk.f90", # Compiler error: Vector expression used where scalar + # expression required + "sbcflx.f90", # NEMOv4 sbc_dyc causes NVFORTRAN-S-0083-Vector expression + # used where scalar expression required + "fldread.f90", # Wrong runtime results + "zdfddm.f90", # Wrong results + "zdfiwm.f90", # Wrong results + "zdfswm.f90", # fort2 terminated by signal 11 +] + +SKIP_FOR_PERFORMANCE = [ + # Check if these work with NEMOv4 + "iom.f90", + "iom_nf90.f90", + "iom_def.f90", + "timing.f90", + "prtctl.f90", + "trazdf.f90", + "dynzdf.f90", +] + +DONT_HOIST = [ + # Incorrect hoisting + "lbcnfd.f90", +] + +OFFLOADING_ISSUES = [ + "trcrad.f90", # Illegal address during kernel execution, unless the + # dimensions are small + "traatf_qco.f90", # Runtime: Failed to find device function (BENCH) + "lbclnk.f90", # Improve performance until #2751 + "traqsr.f90", + "ldftra.f90", # Wrong runtime results + "geo2ocean.f90", # Uses MATH function calls (EXCLUDE FOR TESTING #2856) + "zdftke.f90", # Uses MATH function calls (EXCLUDE FOR TESTING #2856) +] + +# A environment variable can inform if this is targeting NEMOv4, in which case +# array privatisation is disabled and some more files excluded +NEMOV4 = os.environ.get('NEMOV4', False) + +NEMOV4_EXCLUSIONS = [ + "domvvl.f90", + "domzgr.f90", + "dtatsd.f90", + "dynnxt.f90", + "sbcisf.f90", + "sshwzv.f90", + "step.f90", + "zdfmxl.f90", + "traadv_fct.f90", + "traadv.f90", +] def trans(psyir): @@ -65,22 +120,48 @@ def trans(psyir): :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` ''' + # If the environemnt has ONLY_FILE defined, only process that one file and + # known-good files that need a "declare target" inside. This is useful for + # file-by-file exhaustive tests. + only_do_file = os.environ.get('ONLY_FILE', False) + if only_do_file and psyir.name not in (only_do_file, + "lib_fortran.f90", + "solfrac_mod.f90"): + return + omp_target_trans = OMPTargetTrans() - omp_loop_trans = TransInfo().get_trans_name('OMPLoopTrans') - omp_loop_trans.omp_directive = "loop" + omp_gpu_loop_trans = OMPLoopTrans(omp_schedule="none") + omp_gpu_loop_trans.omp_directive = "teamsloop" + omp_cpu_loop_trans = OMPLoopTrans(omp_schedule="static") + omp_cpu_loop_trans.omp_directive = "paralleldo" - # TODO #2317: Has structure accesses that can not be offloaded and has - # a problematic range to loop expansion of (1:1) + # Many of the obs_ files have problems to be offloaded to the GPU if psyir.name.startswith("obs_"): - print("Skipping file", psyir.name) + return + + if psyir.name in SKIP_FOR_PERFORMANCE: + return + + if NEMOV4 and psyir.name in NEMOV4_EXCLUSIONS: + return + + # ICE routines do not perform well on GPU, so we skip them + if psyir.name.startswith("ice"): return for subroutine in psyir.walk(Routine): - if PROFILING_ENABLED: - add_profiling(subroutine.children) + # Skip things from the initialisation + if (subroutine.name.endswith('_alloc') or + subroutine.name.endswith('_init') or + subroutine.name.startswith('Agrif') or + subroutine.name.startswith('dia_') or + subroutine.name == 'dom_msk' or + subroutine.name == 'dom_zgr' or + subroutine.name == 'dom_ngb'): + continue - print(f"Transforming subroutine: {subroutine.name}") + print(f"Adding OpenMP offloading to subroutine: {subroutine.name}") enhance_tree_information(subroutine) @@ -90,17 +171,27 @@ def trans(psyir): convert_array_notation=True, loopify_array_intrinsics=True, convert_range_loops=True, - hoist_expressions=True + hoist_expressions=(psyir.name not in DONT_HOIST) ) - # Thes are functions that are called from inside parallel regions, + # These are functions that are called from inside parallel regions, # annotate them with 'omp declare target' - if subroutine.name.lower().startswith("sign_"): - OMPDeclareTargetTrans().apply(subroutine) - print(f"Marked {subroutine.name} as GPU-enabled") + if ( + subroutine.name.lower().startswith("sign_") or + subroutine.name.lower() == "solfrac" or + (psyir.name == "sbc_phy.f90" and not subroutine.walk(Loop)) + ): + try: + OMPDeclareTargetTrans().apply(subroutine) + print(f"Marked {subroutine.name} as GPU-enabled") + except TransformationError as err: + print(err) # We continue parallelising inside the routine, but this could # change if the parallelisation directives added below are not # nestable, in that case we could add a 'continue' here + elif PROFILING_ENABLED: + # We annotate the rest with profiling hooks if requested + add_profiling(subroutine.children) # For now this is a special case for stpctl.f90 because it forces # loops to parallelise without many safety checks @@ -113,7 +204,7 @@ def trans(psyir): if loop.ancestor(Directive): continue try: - omp_loop_trans.apply(loop, options={"force": True}) + omp_gpu_loop_trans.apply(loop, options={"force": True}) except TransformationError: continue omp_target_trans.apply(loop.parent.parent) @@ -127,10 +218,19 @@ def trans(psyir): parent.addchild(atomic) continue - insert_explicit_loop_parallelism( - subroutine, - region_directive_trans=omp_target_trans, - loop_directive_trans=omp_loop_trans, - # Collapse is necessary to give GPUs enough parallel items - collapse=True - ) + if psyir.name not in PARALLELISATION_ISSUES + OFFLOADING_ISSUES: + insert_explicit_loop_parallelism( + subroutine, + region_directive_trans=omp_target_trans, + loop_directive_trans=omp_gpu_loop_trans, + collapse=True, + privatise_arrays=(psyir.name not in PRIVATISATION_ISSUES) + ) + elif psyir.name not in PARALLELISATION_ISSUES: + # This have issues offloading, but we can still do OpenMP threading + insert_explicit_loop_parallelism( + subroutine, + loop_directive_trans=omp_cpu_loop_trans, + privatise_arrays=(not NEMOV4 and + psyir.name not in PRIVATISATION_ISSUES) + ) diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index a9e87e29f4..5066365974 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -37,7 +37,7 @@ from psyclone.domain.common.transformations import KernelModuleInlineTrans from psyclone.psyir.nodes import ( - Assignment, Loop, Directive, Reference, CodeBlock, + Assignment, Loop, Directive, Reference, CodeBlock, ArrayReference, Call, Return, IfBlock, Routine, IntrinsicCall) from psyclone.psyir.symbols import ( DataSymbol, INTEGER_TYPE, ScalarType, RoutineSymbol) @@ -64,25 +64,86 @@ ] # If routine names contain these substrings then we do not profile them -PROFILING_IGNORE = ["_init", "_rst", "alloc", "agrif", "flo_dom", - "macho", "mpp_", "nemo_gcm", +PROFILING_IGNORE = ["flo_dom", "macho", "mpp_", "nemo_gcm", # These are small functions that the addition of profiling # prevents from being in-lined (and then breaks any attempt # to create OpenACC regions with calls to them) "interp1", "interp2", "interp3", "integ_spline", "sbc_dcy", - "sum", "sign_", "ddpdd", "psyclone_cmp_int", + "sum", "sign_", "ddpdd", "solfrac", "psyclone_cmp_int", "psyclone_cmp_char", "psyclone_cmp_logical"] # Currently fparser has no way of distinguishing array accesses from # function calls if the symbol is imported from some other module. -# We therefore work-around this by keeping a list of known NEMO functions. -NEMO_FUNCTIONS = ["alpha_charn", "cd_neutral_10m", "cpl_freq", "cp_air", - "eos_pt_from_ct", "gamma_moist", "l_vap", "q_air_rh", - "sbc_dcy", "solfrac", "psi_h", "psi_m", "psi_m_coare", - "psi_h_coare", "psi_m_ecmwf", "psi_h_ecmwf", "q_sat", - "rho_air", "visc_air", "sbc_dcy", "glob_sum", - "glob_sum_full", "ptr_sj", "ptr_sjk", "interp1", "interp2", - "interp3", "integ_spline", "nf90_put_var"] +# We therefore work-around this by keeping a list of known NEMO functions +# from v4 and v5. +NEMO_FUNCTIONS = [ + # Internal funtions can be obtained with: + # $ grep -rhi "end function" src/ | awk '{print $3}' | uniq | sort + 'abl_alloc', 'add_xxx', 'Agrif_CFixed', 'agrif_external_switch_index', + 'Agrif_Fixed', 'agrif_oce_alloc', 'Agrif_Root', 'alfa_charn', 'alngam', + 'alpha_sw_sclr', 'alpha_sw_vctr', 'arr_hls', 'arr_lbnd', 'arr_lbnd_2d_dp', + 'arr_lbnd_2d_i', 'arr_lbnd_2d_sp', 'arr_lbnd_3d_dp', 'arr_lbnd_3d_i', + 'arr_lbnd_3d_sp', 'arr_lbnd_4d_dp', 'arr_lbnd_4d_i', 'arr_lbnd_4d_sp', + 'arr_lbnd_5d_dp', 'arr_lbnd_5d_i', 'arr_lbnd_5d_sp', 'atg', + 'bdy_oce_alloc', 'bdy_segs_surf', 'Cd_from_z0', 'CdN10_f_LU12', + 'CdN10_f_LU13', 'cd_n10_ncar', 'cd_neutral_10m', 'CdN_f_LG15', + 'CdN_f_LG15_light', 'CdN_f_LU12_eq36', 'ce_n10_ncar', 'charn_coare3p0', + 'charn_coare3p6', 'charn_coare3p6_wave', 'check_hdom', 'ch_n10_ncar', + 'cp_air', 'cp_air_sclr', 'cp_air_vctr', 'cpl_freq', 'crs_dom_alloc', + 'crs_dom_alloc2', 'dayjul', 'def_newlink', 'delta_skin_layer', + 'depth', 'dep_to_p', 'de_sat_dt_ice_sclr', 'de_sat_dt_ice_vctr', + 'dia_ar5_alloc', 'diadct_alloc', 'dia_hth_alloc', 'dia_ptr_alloc', + 'dia_wri_alloc', 'dom_oce_alloc', 'dom_vvl_alloc', 'dq_sat_dt_ice_sclr', + 'dq_sat_dt_ice_vctr', 'dyn_dmp_alloc', 'dyn_ldf_iso_alloc', + 'dyn_spg_ts_alloc', 'eos_pt_from_ct', 'e_sat_ice_sclr', 'e_sat_ice_vctr', + 'e_sat_sclr', 'e_sat_vctr', 'exa_mpl_alloc', 'f_h_louis_sclr', + 'f_h_louis_vctr', 'find_link', 'fintegral', 'fld_filename', + 'flo_dom_alloc', 'flo_dstnce', 'flo_oce_alloc', 'flo_rst_alloc', + 'flo_wri_alloc', 'f_m_louis_sclr', 'f_m_louis_vctr', 'frac_solar_abs', + 'fspott', 'FUNCTION_GLOBMINMAX', 'FUNCTION_GLOBSUM', 'gamain', + 'gamma_moist', 'gamma_moist_sclr', 'gamma_moist_vctr', 'get_unit', + 'grt_cir_dis', 'grt_cir_dis_saa', 'icb_alloc', 'icb_utl_bilin', + 'icb_utl_bilin_2d_h', 'icb_utl_bilin_3d_h', 'icb_utl_bilin_e', + 'icb_utl_bilin_h', 'icb_utl_bilin_x', 'icb_utl_count', 'icb_utl_heat', + 'icb_utl_mass', 'icb_utl_yearday', 'ice1D_alloc', 'ice_alloc', + 'ice_dia_alloc', 'ice_dyn_rdgrft_alloc', 'ice_perm_eff', + 'ice_thd_pnd_alloc', 'ice_update_alloc', 'ice_var_sshdyn', 'in_hdom', + 'integ_spline', 'interp', 'interp1', 'interp2', 'interp3', + 'iom_axis', 'iom_getszuld', 'iom_nf90_varid', 'iom_sdate', 'iom_use', + 'iom_varid', 'iom_xios_setid', 'iscpl_alloc', 'is_tile', 'kiss', + 'ksec_week', 'lib_mpp_alloc', 'linquad', 'L_vap', 'L_vap_sclr', + 'L_vap_vctr', 'm', 'maxdist', 'mynode', 'nblinks', 'nodal_factort', + 'oce_alloc', 'oce_SWE_alloc', 'One_on_L', 'p2z_exp_alloc', + 'p2z_lim_alloc', 'p2z_prod_alloc', 'p4z_che_alloc', 'p4z_diaz_alloc', + 'p4z_flx_alloc', 'p4z_lim_alloc', 'p4z_meso_alloc', 'p4z_opt_alloc', + 'p4z_prod_alloc', 'p4z_rem_alloc', 'p4z_sed_alloc', 'p4z_sink_alloc', + 'p5z_lim_alloc', 'p5z_meso_alloc', 'p5z_prod_alloc', + 'PHI', 'potemp', 'pres_temp_sclr', 'pres_temp_vctr', 'prt_ctl_sum_2d', + 'prt_ctl_sum_3d', 'prt_ctl_write_sum', 'psi_h', 'psi_h_andreas', + 'psi_h_coare', 'psi_h_ecmwf', 'psi_h_ice', 'psi_h_mfs', 'psi_h_ncar', + 'psi_m', 'psi_m_andreas', 'psi_m_coare', 'psi_m_ecmwf', 'psi_m_ice', + 'psi_m_mfs', 'psi_m_ncar', 'p_to_dep', 'ptr_ci_2d', 'ptr_sj_2d', + 'ptr_sj_3d', 'ptr_sjk', 'q_air_rh', 'qlw_net_sclr', 'qlw_net_vctr', + 'q_sat', 'q_sat_sclr', 'q_sat_vctr', 'qsr_ext_lev', 'rho_air', + 'rho_air_sclr', 'rho_air_vctr', 'Ri_bulk', 'Ri_bulk_sclr', 'Ri_bulk_vctr', + 'rough_leng_m', 'rough_leng_tq', 's', 'sbc_blk_alloc', 'sbc_blk_ice_alloc', + 'sbc_cpl_alloc', 'sbc_dcy', 'sbc_dcy_alloc', 'sbc_ice_alloc', + 'sbc_ice_cice_alloc', 'sbc_oce_alloc', 'sbc_rnf_alloc', + 'sbc_ssr_alloc', 'sed_adv_alloc', 'sed_alloc', 'sed_oce_alloc', + 'sms_c14_alloc', 'sms_pisces_alloc', 'snw_ent', 'solfrac', + 'sto_par_flt_fac', 'sum2d', 'sw_adtg', 'sw_ptmp', 'theta', + 'theta_exner_sclr', 'theta_exner_vctr', 't_imp', 'tra_bbl_alloc', + 'tra_dmp_alloc', 'trc_alloc', 'trc_dmp_alloc', 'trc_dmp_sed_alloc', + 'trc_oce_alloc', 'trc_oce_ext_lev', 'trc_opt_alloc', 'trc_sms_cfc_alloc', + 'trc_sms_my_trc_alloc', 'trc_sub_alloc', 'trd_ken_alloc', 'trd_mxl_alloc', + 'trdmxl_oce_alloc', 'trd_mxl_trc_alloc', 'trd_pen_alloc', 'trd_tra_alloc', + 'trd_trc_oce_alloc', 'trd_vor_alloc', 'twrk_id', 'UN10_from_CD', + 'UN10_from_ustar', 'u_star_andreas', 'virt_temp_sclr', 'virt_temp_vctr', + 'visc_air', 'visc_air_sclr', 'visc_air_vctr', 'w1', 'w2', 'z0_from_Cd', + 'z0tq_LKB', 'zdf_gls_alloc', 'zdf_iwm_alloc', 'zdf_mfc_alloc', + 'zdf_mxl_alloc', 'zdf_oce_alloc', 'zdf_osm_alloc', 'zdf_phy_alloc', + 'zdf_tke_alloc', 'zdf_tmx_alloc', +] # Currently fparser has no way of distinguishing array accesses from statement # functions, the following subroutines contains known statement functions @@ -96,6 +157,11 @@ PARALLELISATION_ISSUES = [ "ldfc1d_c2d.f90", "tramle.f90", + "dynspg_ts.f90", # Uses MATH function calls (EXCLUDE FOR TESTING #2856) +] + +PRIVATISATION_ISSUES = [ + "ldftra.f90", # Wrong runtime results ] @@ -136,19 +202,30 @@ def enhance_tree_information(schedule): # Manually set the datatype of some integer scalars that are # important for performance _it_should_be(reference.symbol, ScalarType, INTEGER_TYPE) - elif reference.symbol.name in NEMO_FUNCTIONS: - if reference.symbol.is_import or reference.symbol.is_unresolved: - # The parser gets these wrong, they are Calls not ArrayRefs - if not isinstance(reference.symbol, RoutineSymbol): - # We need to specialise the generic Symbol to a Routine - reference.symbol.specialise(RoutineSymbol) - if not (isinstance(reference.parent, Call) and - reference.parent.routine is reference): - # We also need to replace the Reference node with a Call - call = Call.create(reference.symbol) - for child in reference.children[:]: - call.addchild(child.detach()) - reference.replace_with(call) + elif ( + # If its an ArrayReference ... + isinstance(reference, ArrayReference) and + # ... with the following name ... + (reference.symbol.name in NEMO_FUNCTIONS or + reference.symbol.name.startswith('local_') or + reference.symbol.name.startswith('glob_') or + reference.symbol.name.startswith('SIGN_') or + reference.symbol.name.startswith('netcdf_') or + reference.symbol.name.startswith('nf90_')) and + # ... and the symbol is unresolved + (reference.symbol.is_import or reference.symbol.is_unresolved) + ): + # The parser gets these wrong, they are Calls not ArrayRefs + if not isinstance(reference.symbol, RoutineSymbol): + # We need to specialise the generic Symbol to a Routine + reference.symbol.specialise(RoutineSymbol) + if not (isinstance(reference.parent, Call) and + reference.parent.routine is reference): + # We also need to replace the Reference node with a Call + call = Call.create(reference.symbol) + for child in reference.children[:]: + call.addchild(child.detach()) + reference.replace_with(call) def inline_calls(schedule): diff --git a/src/psyclone/psyir/nodes/__init__.py b/src/psyclone/psyir/nodes/__init__.py index ddfd927e91..615f2fa2b9 100644 --- a/src/psyclone/psyir/nodes/__init__.py +++ b/src/psyclone/psyir/nodes/__init__.py @@ -92,7 +92,7 @@ OMPStandaloneDirective, OMPRegionDirective, OMPTargetDirective, OMPLoopDirective, OMPDeclareTargetDirective, OMPTeamsDistributeParallelDoDirective, OMPAtomicDirective, - OMPSimdDirective) + OMPSimdDirective, OMPTeamsLoopDirective) from psyclone.psyir.nodes.clause import Clause, OperandClause from psyclone.psyir.nodes.omp_clauses import ( OMPGrainsizeClause, OMPNogroupClause, OMPNowaitClause, OMPNumTasksClause, @@ -184,6 +184,7 @@ 'OMPDeclareTargetDirective', 'OMPSimdDirective', 'OMPTeamsDistributeParallelDoDirective', + 'OMPTeamsLoopDirective', # OMP Clause Nodes 'OMPGrainsizeClause', 'OMPNogroupClause', diff --git a/src/psyclone/psyir/nodes/intrinsic_call.py b/src/psyclone/psyir/nodes/intrinsic_call.py index 3895cac10a..bf925c829b 100644 --- a/src/psyclone/psyir/nodes/intrinsic_call.py +++ b/src/psyclone/psyir/nodes/intrinsic_call.py @@ -791,17 +791,23 @@ def is_available_on_device(self): IntrinsicCall.Intrinsic.DPROD, IntrinsicCall.Intrinsic.EXP, IntrinsicCall.Intrinsic.IAND, IntrinsicCall.Intrinsic.IEOR, IntrinsicCall.Intrinsic.INT, IntrinsicCall.Intrinsic.IOR, - IntrinsicCall.Intrinsic.LOG, IntrinsicCall.Intrinsic.LOG10, + IntrinsicCall.Intrinsic.LOG, IntrinsicCall.Intrinsic.MAX, IntrinsicCall.Intrinsic.MIN, IntrinsicCall.Intrinsic.MOD, IntrinsicCall.Intrinsic.NINT, IntrinsicCall.Intrinsic.NOT, IntrinsicCall.Intrinsic.REAL, IntrinsicCall.Intrinsic.SIGN, IntrinsicCall.Intrinsic.SIN, IntrinsicCall.Intrinsic.SINH, IntrinsicCall.Intrinsic.SQRT, IntrinsicCall.Intrinsic.TAN, IntrinsicCall.Intrinsic.TANH, + IntrinsicCall.Intrinsic.UBOUND, IntrinsicCall.Intrinsic.MERGE, + # The ones below can be offloaded but provide numerical differences + # even with the -gpu=uniform_math flag, ideally it should be + # configurable if these are allowed or not. + # IntrinsicCall.Intrinsic.LOG10, # The one below are not documented on nvidia compiler IntrinsicCall.Intrinsic.PRODUCT, IntrinsicCall.Intrinsic.SIZE, IntrinsicCall.Intrinsic.SUM, IntrinsicCall.Intrinsic.LBOUND, - IntrinsicCall.Intrinsic.UBOUND) + IntrinsicCall.Intrinsic.MAXVAL, IntrinsicCall.Intrinsic.MINVAL, + IntrinsicCall.Intrinsic.TINY, IntrinsicCall.Intrinsic.HUGE) @classmethod def create(cls, intrinsic, arguments=()): diff --git a/src/psyclone/psyir/nodes/omp_directives.py b/src/psyclone/psyir/nodes/omp_directives.py index af0ae7a541..170850edf3 100644 --- a/src/psyclone/psyir/nodes/omp_directives.py +++ b/src/psyclone/psyir/nodes/omp_directives.py @@ -2118,6 +2118,8 @@ def _validate_collapse_value(self): f"'{self}' has a collapse={self._collapse} and the " f"nested body at depth {depth} cannot be " f"collapsed.") + if len(cursor.loop_body.children) == 0: + break cursor = cursor.loop_body.children[0] def _validate_single_loop(self): @@ -2416,6 +2418,11 @@ class OMPTeamsDistributeParallelDoDirective(OMPParallelDoDirective): _directive_string = "teams distribute parallel do" +class OMPTeamsLoopDirective(OMPParallelDoDirective): + ''' Class representing the OMP teams loop directive. ''' + _directive_string = "teams loop" + + class OMPTargetDirective(OMPRegionDirective): ''' Class for the !$OMP TARGET directive that offloads the code contained in its region into an accelerator device. ''' diff --git a/src/psyclone/psyir/transformations/omp_loop_trans.py b/src/psyclone/psyir/transformations/omp_loop_trans.py index fa1c56d6b7..8ae32e3f75 100644 --- a/src/psyclone/psyir/transformations/omp_loop_trans.py +++ b/src/psyclone/psyir/transformations/omp_loop_trans.py @@ -38,7 +38,8 @@ from psyclone.configuration import Config from psyclone.psyir.nodes import ( Routine, OMPDoDirective, OMPLoopDirective, OMPParallelDoDirective, - OMPTeamsDistributeParallelDoDirective, OMPScheduleClause) + OMPTeamsDistributeParallelDoDirective, OMPTeamsLoopDirective, + OMPScheduleClause) from psyclone.psyir.symbols import DataSymbol, INTEGER_TYPE from psyclone.psyir.transformations.parallel_loop_trans import \ ParallelLoopTrans @@ -48,6 +49,7 @@ "do": OMPDoDirective, "paralleldo": OMPParallelDoDirective, "teamsdistributeparalleldo": OMPTeamsDistributeParallelDoDirective, + "teamsloop": OMPTeamsLoopDirective, "loop": OMPLoopDirective } #: List containing the valid names for OMP directives. diff --git a/src/psyclone/tests/psyir/nodes/intrinsic_call_test.py b/src/psyclone/tests/psyir/nodes/intrinsic_call_test.py index 4d1639902a..71b45adad2 100644 --- a/src/psyclone/tests/psyir/nodes/intrinsic_call_test.py +++ b/src/psyclone/tests/psyir/nodes/intrinsic_call_test.py @@ -118,7 +118,7 @@ def test_intrinsiccall_is_inquiry(): (IntrinsicCall.Intrinsic.ABS, True), (IntrinsicCall.Intrinsic.MIN, True), (IntrinsicCall.Intrinsic.MAX, True), - (IntrinsicCall.Intrinsic.MAXVAL, False), + (IntrinsicCall.Intrinsic.MAXVAL, True), (IntrinsicCall.Intrinsic.ALLOCATE, False), (IntrinsicCall.Intrinsic.MATMUL, False), (IntrinsicCall.Intrinsic.ACOS, True), diff --git a/src/psyclone/tests/psyir/transformations/transformations_test.py b/src/psyclone/tests/psyir/transformations/transformations_test.py index 3fa4c33f89..ec4d66a42b 100644 --- a/src/psyclone/tests/psyir/transformations/transformations_test.py +++ b/src/psyclone/tests/psyir/transformations/transformations_test.py @@ -367,7 +367,7 @@ def test_omplooptrans_properties(): omplooptrans.omp_directive = "invalid" assert ("The OMPLoopTrans.omp_directive property must be a str with " "the value of ['do', 'paralleldo', 'teamsdistributeparalleldo', " - "'loop'] but found a 'str' with value 'invalid'." + "'teamsloop', 'loop'] but found a 'str' with value 'invalid'." in str(err.value)) with pytest.raises(TypeError) as err: