@@ -93,6 +93,7 @@ list(REMOVE_DUPLICATES LIBOMPTARGET_DEVICE_ARCHITECTURES)
93
93
set (include_files
94
94
${include_directory} /Allocator.h
95
95
${include_directory} /Configuration.h
96
+ ${include_directory} /Platform.h
96
97
${include_directory} /Debug.h
97
98
${include_directory} /Interface.h
98
99
${include_directory} /LibC.h
@@ -111,6 +112,7 @@ set(src_files
111
112
${source_directory} /Debug.cpp
112
113
${source_directory} /Kernel.cpp
113
114
${source_directory} /LibC.cpp
115
+ ${source_directory} /LibM.cpp
114
116
${source_directory} /Mapping.cpp
115
117
${source_directory} /Misc.cpp
116
118
${source_directory} /Parallelism.cpp
@@ -133,7 +135,7 @@ set(src_files
133
135
# propagation. That said, we will run the vectorizer again after the runtime
134
136
# has been linked into the user program.
135
137
set (clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 -mllvm -vectorize-slp=false )
136
- set (link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module - vectorize-slp=false )
138
+ set (link_opt_flags -O3 -openmp-opt-disable -vectorize-slp=false )
137
139
set (link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports )
138
140
139
141
# Prepend -I to each list element
@@ -143,9 +145,12 @@ list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
143
145
# Set flags for LLVM Bitcode compilation.
144
146
set (bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
145
147
${clang_opt_flags} --offload-device-only
146
- -nocudalib -nogpulib -nostdinc
148
+ -nogpuinc -nogpulib
147
149
-fopenmp -fopenmp-cuda-mode
148
150
-Wno-unknown-cuda-version
151
+ -I${CMAKE_BINARY_DIR}/openmp/runtime/src # Need omp.h for LibM.
152
+ -I${CMAKE_BINARY_DIR}/projects/openmp/runtime/src # Need omp.h for LibM.
153
+ -I${CMAKE_BINARY_DIR}/runtime/src
149
154
-DOMPTARGET_DEVICE_RUNTIME
150
155
-I${include_directory}
151
156
-I${devicertl_base_directory}/../include
@@ -158,82 +163,6 @@ else()
158
163
list (APPEND bc_flags -DOMPTARGET_DEBUG=0 )
159
164
endif ()
160
165
161
- function (addAMDSpecificBcLibs touch_target gfxname bc_files local_depend_files )
162
- # For amdgpu, the libomptarget bc is "all inclusive".
163
- # During user compilation, the libomptarget bc is essentially the only
164
- # non-user library linked. It is linked once in GPU link phase
165
- # following llvm-link options: --internalize --only-needed
166
- if (NOT amd_device_libs_found )
167
- find_package (AMDDeviceLibs REQUIRED CONFIG
168
- HINTS
169
- ${CMAKE_BINARY_DIR} /../../tools/rocm-device-libs
170
- ${CMAKE_INSTALL_PREFIX}
171
- PATHS
172
- /opt/rocm )
173
- if (AMDDeviceLibs_DIR )
174
- set (amd_device_libs_found ON )
175
- libomptarget_say ("DeviceRTLs ${gfxname} : Getting ROCm device libs from ${AMDDeviceLibs_DIR} " )
176
- else ()
177
- libomptarget_say ("DeviceRTLs ${gfxname} : Not building AMDGCN device RTL: Could not find AMDDeviceLibs package" )
178
- return ()
179
- endif ()
180
- endif ()
181
- get_target_property (ockl_bc_file ockl LOCATION )
182
- get_target_property (ocml_bc_file ocml LOCATION )
183
- set (amdgpu_wfsz_is32 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 )
184
- string (FIND "${amdgpu_wfsz_is32} " "${gfxname} " is_32bit )
185
- if (NOT is_32bit EQUAL -1 )
186
- get_target_property (oclc_wf_bc_file oclc_wavefrontsize64_off LOCATION )
187
- else ()
188
- get_target_property (oclc_wf_bc_file oclc_wavefrontsize64_on LOCATION )
189
- endif ()
190
- string (LENGTH "${gfxname} " gfxlen )
191
- if (gfxlen EQUAL 6 )
192
- string (SUBSTRING ${gfxname} 3 3 gfxnum )
193
- else ()
194
- string (SUBSTRING ${gfxname} 3 4 gfxnum )
195
- endif ()
196
- get_target_property (oclc_isa_bc_file oclc_isa_version_${gfxnum} LOCATION )
197
-
198
- # Add custom target so targets from other directories
199
- # can be added as dependencies to ensure libm
200
- # and libhostexec bc files have been built.
201
- add_custom_target (${touch_target} ALL )
202
- add_dependencies (${touch_target}
203
- libm-target-${gfxname}
204
- libhostexec-${gfxname}.bc
205
- )
206
-
207
- # TODO: Add back -amdgpu to the names below (maybe?).
208
- list (APPEND bc_files
209
- ${CMAKE_BINARY_DIR} /libm-${gfxname}.bc
210
- # ${CMAKE_BINARY_DIR}/openmp/libomptarget/hostexec/libhostexec-${gfxname}.bc
211
- )
212
- if (OPENMP_STANDALONE_BUILD )
213
- list (APPEND bc_files
214
- ${CMAKE_BINARY_DIR} /libomptarget/hostexec/libhostexec-${gfxname}.bc
215
- )
216
- else ()
217
- list (APPEND bc_files
218
- ${CMAKE_BINARY_DIR} /openmp/libomptarget/hostexec/libhostexec-${gfxname}.bc
219
- )
220
- endif ()
221
- if (EXISTS ${CMAKE_BINARY_DIR} /../../tools/ROCMDEVLIBS )
222
- add_dependencies (${touch_target}
223
- ockl ocml oclc_wavefrontsize64_on oclc_wavefrontsize64_off oclc_isa_version_${gfxnum} )
224
- endif ()
225
-
226
- # Add amdgcn-specific bc files to link command
227
- list (APPEND bc_files ${ocml_bc_file} ${ockl_bc_file} ${oclc_wf_bc_file} ${oclc_isa_bc_file} )
228
-
229
- # Add touch-target-$(target_cpu) to local_depend_files so all $bc_files will exist or be created.
230
- list (APPEND local_depend_files ${touch_target} )
231
-
232
- # Update these values in the caller:
233
- set (bc_files ${bc_files} PARENT_SCOPE )
234
- set (local_depend_files ${local_depend_files} PARENT_SCOPE )
235
- endfunction ()
236
-
237
166
# first create an object target
238
167
add_library (omptarget.devicertl.all_objs OBJECT IMPORTED )
239
168
function (compileDeviceRTLLibrary target_cpu target_name target_triple )
@@ -257,6 +186,20 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
257
186
COMMENT "Building LLVM bitcode ${outfile} "
258
187
VERBATIM
259
188
)
189
+
190
+ if (${outfile} MATCHES "State.cpp" )
191
+ # Run the prep tool on the library to replace internal attribute with linkonce_odr for dm_alloc only.
192
+ set (outfile_prep "${outfile} -${target_cpu} -prep.bc" )
193
+ add_custom_target (${outfile_prep}
194
+ COMMAND ${PREP_TOOL} -dm ${outfile}
195
+ -o ${outfile_prep}
196
+ DEPENDS ${outfile}
197
+ COMMENT "Running ${PREP_TOOL} for ${outfile_prep} "
198
+ )
199
+ add_dependencies (${outfile_prep} ${outfile} )
200
+ set (outfile ${outfile_prep} )
201
+ endif ()
202
+
260
203
if ("${CLANG_TOOL} " STREQUAL "$<TARGET_FILE:clang>" )
261
204
# Add a file-level dependency to ensure that clang is up-to-date.
262
205
# By default, add_custom_command only builds clang if the
@@ -268,21 +211,34 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
268
211
list (APPEND bc_files ${outfile} )
269
212
endforeach ()
270
213
271
- set (bclib_name "libomptarget-${target_name} -${target_cpu} .bc" )
272
-
273
- set (local_depend_files ${bc_files} )
274
- if ( ${target_name} STREQUAL "amdgpu" )
275
- addAMDSpecificBcLibs ("touch-target-${target_cpu} " ${target_cpu} "${bc_files} " "${local_depend_files} " )
214
+ # Link in the previously compiled 'hostexec' bitcode directly.
215
+ if ("${target_name} " STREQUAL "amdgpu" )
216
+ if (OPENMP_STANDALONE_BUILD )
217
+ list (APPEND extra_bc_files
218
+ ${CMAKE_BINARY_DIR} /libomptarget/hostexec/libhostexec-${target_cpu}.bc
219
+ )
220
+ else ()
221
+ list (APPEND extra_bc_files
222
+ ${CMAKE_BINARY_DIR} /openmp/libomptarget/hostexec/libhostexec-${target_cpu}.bc
223
+ )
224
+ endif ()
225
+ add_custom_target (libhostexec-${target_cpu}
226
+ DEPENDS ${extra_bc_files}
227
+ )
228
+ add_dependencies (libhostexec-${target_cpu} libhostexec-${target_cpu}.bc )
229
+ set (extra_depends libhostexec-${target_cpu} )
276
230
endif ()
277
231
232
+ set (bclib_name "libomptarget-${target_name} -${target_cpu} .bc" )
233
+
278
234
# Link to a bitcode library.
279
235
add_custom_target (linked_${bclib_name}
280
236
COMMAND ${LINK_TOOL}
281
- -o ${CMAKE_CURRENT_BINARY_DIR} /linked_${bclib_name} ${bc_files}
282
- DEPENDS ${bc_files}
237
+ -o ${CMAKE_CURRENT_BINARY_DIR} /linked_${bclib_name} ${extra_bc_files} ${ bc_files}
238
+ DEPENDS ${bc_files} ${extra_depends}
283
239
COMMENT "Linking LLVM bitcode ${bclib_name} "
284
240
)
285
- add_dependencies (linked_${bclib_name} ${local_depend_files } )
241
+ add_dependencies (linked_${bclib_name} ${bc_files} ${extra_depends } )
286
242
if ("${LINK_TOOL} " STREQUAL "$<TARGET_FILE:llvm-link>" )
287
243
add_dependencies (linked_${bclib_name} llvm-link )
288
244
endif ()
@@ -411,8 +367,24 @@ add_custom_target(omptarget.devicertl.nvptx)
411
367
add_custom_target (omptarget.devicertl.amdgpu )
412
368
foreach (gpu_arch ${LIBOMPTARGET_DEVICE_ARCHITECTURES} )
413
369
if ("${gpu_arch} " IN_LIST all_amdgpu_architectures )
414
- set (clang_options -DLIBOMPTARGET_BC_TARGET -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=${gpu_arch} -DLIBOMPTARGET_BC_TARGET -D__AMDGCN__ -nogpulib )
415
- compileDeviceRTLLibrary (${gpu_arch} amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none ${clang_options} )
370
+ find_package (AMDDeviceLibs REQUIRED CONFIG
371
+ HINTS ${CMAKE_INSTALL_PREFIX}
372
+ ${CMAKE_BINARY_DIR} /../../tools/rocm-device-libs
373
+ PATHS /opt/rocm
374
+ )
375
+
376
+ # Link in the ROCm Device Libraries once the other files have been linked.
377
+ get_target_property (ocml_path ocml IMPORTED_LOCATION )
378
+ get_target_property (ockl_path ockl IMPORTED_LOCATION )
379
+
380
+ set (amd_options -Xclang -mcode-object-version=none
381
+ -Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}
382
+ -Xclang -mlink-builtin-bitcode -Xclang ${ockl_path}
383
+ -Wno-linker-warnings # Silence the empty host compilation.
384
+ -Xclang -mcode-object-version=none
385
+ )
386
+
387
+ compileDeviceRTLLibrary (${gpu_arch} amdgpu amdgcn-amd-amdhsa ${amd_options} )
416
388
elseif ("${gpu_arch} " IN_LIST all_nvptx_architectures )
417
389
compileDeviceRTLLibrary (${gpu_arch} nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx61 )
418
390
else ()
0 commit comments