-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathMakefile.pgi
176 lines (165 loc) · 7.4 KB
/
Makefile.pgi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# MAKEFILE for compiling MuSTEM 5.3 on the command line with PGI on Linux or Windows
#
# ---Dorothea Muecke-Herzberg/SuperSTEM/20190705
#
# TOK for Windows10 with Turing architecture NVidia GPU (RTX 2080Ti) with PGI Community compiler v19.4 and version 5.3 Source folder of MuSTEM on github
# and Ubuntu 18.04 with Turing architecture NVidia GPU (RTX 2080Ti) with PGI Community compiler v19.4 and version 5.3 Source folder of MuSTEM on github
#
# Prerequisites:
# Windows: install MS Windows SDK, Visual Studio Community 2017, CUDA 10.1, PGI 19.4 Community, FFTW3 pre-compiled libraries (and create import libraries)
# Linux: install CUDA dependencies, CUDA 10.1, PGI 19.4 Community, "source pgi.env", compile FFTW3 libraries
# "pgi.env"
# ---------------------------------------
# export PGI=/opt/pgi;
# export LM_LICENSE_FILE="$LM_LICENSE_FILE":/opt/pgi/license.dat;
# export PATH=/opt/pgi/linux86-64/19.4/bin:$PATH;
# export PATH=/opt/openmpi_4.0.1_pgi/bin:$PATH
#
# Note: Quick hack in line 145 in mustem.f90 from "OPEN (6, CARRIAGECONTROL = "FORTRAN")" to "Open(6)" was necessary to get it to compile
#
# Note: Don't forget to run "make clean" between builds
###############################################################################################
#CHANGE HERE AS NECESARY:
#(gpu/cpu)
PROC=cpu
#(double/single)
PREC=single
#(lin/win)
OS=lin
#FFTW3 location
FFTW3DIR=/opt/fftw3.3.8_pgi
ifeq ($(OS),win)
FFTW3DIR="C:\Program Files\PGI\win64\2019\fftw3.3.5"
#cuFFT location (only for Windows)
CUFFTDIR="C:\Program Files\PGI\win64\2019\cuda\10.1\lib\x64"
endif
#DBG=-dryrun
################################################################################################
#THERE SHOULD BE NO NEED TO CHANGE ANYTHING BELOW:
#Correctly installed compiler environment should find this by itself:
#CUDA_PATH="C:\Program Files\PGI\win64\2019\cuda\10.1"
#LINKER_FLAGS= -pgf90libs -lpgf90rtl
#CPU: FFTW3 libraries
GPU_FLAGS=
ifeq ($(OS),lin)
#Linux:
FFTW3_FLAGS=-L$(FFTW3DIR)/lib -I$(FFTW3DIR)/include -lfftw3 -lfftw3f -lfftw3_threads -lfftw3f_threads -Wl,-rpath=$(FFTW3DIR)/lib
else
#Windows: this links to the import libraries of fftw DLLs
#=> must add the library path to the environment variables to run executable sucessfully
# or put fftw3 DLLs into the PGI REDIST folder
#FFTW3_FLAGS=-I$(FFTW3DIR) -L$(FFTW3DIR) -lfftw3-3 -lfftw3f-3
FFTW3_FLAGS=-I$(FFTW3DIR) -Wl,/libpath:$(FFTW3DIR) -defaultlib:libfftw3-3 -defaultlib:libfftw3f-3
endif
EXE1=cpu
#GPU: enable Cuda, use cuFFT that comes with Cuda/PGI
ifeq ($(PROC),gpu)
GPU_FLAGS=-Mcuda=cc75 -ta=tesla:cc75 -DGPU
ifeq ($(OS),lin)
#Linux: #-lcufft must be called together with -Mcuda! -Mcudalib preferred, as then the compiler automatically chooses the correct library version
#FFTW3_FLAGS=-lcufft $(GPU_FLAGS)
#-Mcudalib MUST be called together with -Mcuda (so that the linker knows what version libraries to add!)
#Cuda is only supplied as shared libraries *.so, so do not use -Bstatic!
FFTW3_FLAGS=-Mcudalib=cufft
else
#Windows:
#MUST be called together with -Mcuda!
#FFTW3_FLAGS="C:\Program Files\PGI\win64\2019\cuda\10.1\lib\x64\cufft.lib"
FFTW3_FLAGS=-Wl,/libpath:$(CUFFTDIR) cufft.lib
endif
EXE1=gpu
endif
PRECISION=double_precision
EXE2=dble
ifeq ($(PREC),single)
PRECISION=single_precision
EXE2=sngl
endif
OBJ=o
EXE3=out
OS_FLAG=-DLIN
ifeq ($(OS),win)
OS_FLAG=-DWIN
STATIC=-Bstatic
OBJ=obj
EXE3=exe
endif
BINARY=mustem_$(EXE1)_$(EXE2).$(EXE3)
###########################################################
#dynamic linking is default for PGI on linux, and static linking is default for PGI on windows!
#for windows -Bstatic has to be used for compiling and linking!
###
#-c: Halts the compilation process after the assembling phase and writes the object code to a file
#-g: Instructs the compiler to include symbolic debugging information in the object module; sets the optimization level to zero unless a -O option is present on the command line
#-Bstatic: link to static libraries (in Windows use for both, compiling and linking)
#-Mpreprocess: Perform cpp-like preprocessing on assembly language and Fortran input source files
#-Mbackslash: Determines how the backslash character is treated in quoted strings (Fortran only)
#-Mconcur: Enable auto-concurrentization of loops. Multiple processors or cores will be used to execute parallelizable loops
#-Mextend: Instructs the compiler to accept 132-column source code; otherwise it accepts 72-column code (Fortran only)
#-Mcuda: Enables CUDA Fortran (and adds the cuda runtime libraries to the link), use option cc75 for turing support
#-Mcudalib= .e.g. =cufft, the compiler will add the version of the library matching the cuda version given with -Mcuda
#-ta: Enable OpenACC and specify the type of accelerator to which to target accelerator regions (tesla,host,multicore), suboption cc75 for turing support
#-O3: Level three specifies aggressive global optimization. This level performs all level-one and level-two optimizations and enables more aggressive hoisting and scalar replacement optimizations that may or may not be profitable
#-Wl,rpath= Stores the path of the libraries in the executable.
#-#: show invocations of compiler, assembler and linker during Makefile run
###########################################################
PGF_FLAGS=$(STATIC) -c -g -O3 -Mpreprocess -Mbackslash -Mconcur -Mextend -Mfree -Mrecursive -mp $(FFTW3_FLAGS) -D$(PRECISION) $(GPU_FLAGS) $(OS_FLAG) $(DBG)
executable: intermediate
pgf90 -o $(BINARY) *.$(OBJ) $(STATIC) -mp $(FFTW3_FLAGS) $(GPU_FLAGS) $(OS_FLAG) $(DBG)
modules:
ifeq ($(PROC),gpu)
#GPU
pgf90 $(PGF_FLAGS) quadpack.f90
pgf90 $(PGF_FLAGS) m_precision.f90
pgf90 $(PGF_FLAGS) m_string.f90
pgf90 $(PGF_FLAGS) m_numerical_tools.f90
pgf90 $(PGF_FLAGS) mod_global_variables.f90
pgf90 $(PGF_FLAGS) m_crystallography.f90
pgf90 $(PGF_FLAGS) m_electron.f90
pgf90 $(PGF_FLAGS) m_user_input.f90
pgf90 $(PGF_FLAGS) GPU_routines/mod_cufft.f90
pgf90 $(PGF_FLAGS) mod_CUFFT_wrapper.f90
pgf90 $(PGF_FLAGS) mod_output.f90
pgf90 $(PGF_FLAGS) m_multislice.f90
pgf90 $(PGF_FLAGS) m_lens.f90
pgf90 $(PGF_FLAGS) m_tilt.f90
pgf90 $(PGF_FLAGS) m_absorption.f90
pgf90 $(PGF_FLAGS) GPU_routines/mod_cuda_array_library.f90
pgf90 $(PGF_FLAGS) GPU_routines/mod_cuda_potential.f90
pgf90 $(PGF_FLAGS) m_potential.f90
pgf90 $(PGF_FLAGS) MS_utilities.f90
pgf90 $(PGF_FLAGS) GPU_routines/mod_cuda_setup.f90
pgf90 $(PGF_FLAGS) GPU_routines/mod_cuda_ms.f90
pgf90 $(PGF_FLAGS) s_absorptive_stem.f90
pgf90 $(PGF_FLAGS) s_qep_tem.f90
pgf90 $(PGF_FLAGS) s_qep_stem.f90
pgf90 $(PGF_FLAGS) s_absorptive_tem.f90
pgf90 $(PGF_FLAGS) muSTEM.f90
else
#CPU
pgf90 $(PGF_FLAGS) quadpack.f90
pgf90 $(PGF_FLAGS) mod_CUFFT_wrapper.f90
pgf90 $(PGF_FLAGS) m_precision.f90
pgf90 $(PGF_FLAGS) m_string.f90
pgf90 $(PGF_FLAGS) m_numerical_tools.f90
pgf90 $(PGF_FLAGS) mod_global_variables.f90
pgf90 $(PGF_FLAGS) m_crystallography.f90
pgf90 $(PGF_FLAGS) m_electron.f90
pgf90 $(PGF_FLAGS) m_user_input.f90
pgf90 $(PGF_FLAGS) mod_output.f90
pgf90 $(PGF_FLAGS) m_multislice.f90
pgf90 $(PGF_FLAGS) m_lens.f90
pgf90 $(PGF_FLAGS) m_tilt.f90
pgf90 $(PGF_FLAGS) m_absorption.f90
pgf90 $(PGF_FLAGS) m_potential.f90
pgf90 $(PGF_FLAGS) MS_utilities.f90
pgf90 $(PGF_FLAGS) s_absorptive_stem.f90
pgf90 $(PGF_FLAGS) s_qep_tem.f90
pgf90 $(PGF_FLAGS) s_qep_stem.f90
pgf90 $(PGF_FLAGS) s_absorptive_tem.f90
pgf90 $(PGF_FLAGS) muSTEM.f90
endif
intermediate: *.f90 modules
pgf90 $(PGF_FLAGS) *.f90
clean:
rm -f *.$(OBJ) *.mod *.tmp *.TMP *.out *.$(EXE3) *.dwf