@@ -52,19 +52,149 @@ function(_get_common_compile_options output_var flags)
52
52
endif ()
53
53
endif ()
54
54
if (LIBC_TARGET_ARCHITECTURE_IS_GPU )
55
- list (APPEND compile_options "-fopenmp" )
56
- list (APPEND compile_options "-fopenmp-cuda-mode" )
57
- foreach (gpu_arch ${LIBC_GPU_ARCHITECTURES} )
58
- list (APPEND compile_options "--offload-arch=${gpu_arch} " )
59
- endforeach ()
60
55
list (APPEND compile_options "-nogpulib" )
61
- list (APPEND compile_options "-nogpuinc" )
62
56
list (APPEND compile_options "-fvisibility=hidden" )
63
- list (APPEND compile_options "-foffload-lto" )
64
57
endif ()
65
58
set (${output_var} ${compile_options} PARENT_SCOPE )
66
59
endfunction ()
67
60
61
+ # Builds the entrypoint target for the GPU.
62
+ # Usage:
63
+ # _build_gpu_entrypoint_objects(
64
+ # <target_name>
65
+ # SRCS <list of .cpp files>
66
+ # HDRS <list of .h files>
67
+ # DEPENDS <list of dependencies>
68
+ # COMPILE_OPTIONS <optional list of special compile options for this target>
69
+ # FLAGS <optional list of flags>
70
+ # )
71
+ function (_build_gpu_entrypoint_objects fq_target_name )
72
+ cmake_parse_arguments (
73
+ "ADD_GPU_ENTRYPOINT_OBJ"
74
+ "" # No optional arguments
75
+ "NAME;CXX_STANDARD" # Single value arguments
76
+ "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments
77
+ ${ARGN}
78
+ )
79
+
80
+ # The packaged version will be built for every target GPU architecture. We do
81
+ # this so we can support multiple accelerators on the same machine.
82
+ foreach (gpu_arch ${all_gpu_architectures} )
83
+ set (gpu_target_name ${fq_target_name} .${gpu_arch} )
84
+ set (compile_options ${ADD_GPU_ENTRYPOINT_OBJ_COMPILE_OPTIONS} )
85
+ # Derive the triple from the specified architecture.
86
+ if ("${gpu_arch} " IN_LIST all_amdgpu_architectures )
87
+ set (gpu_target_triple "amdgcn-amd-amdhsa" )
88
+ list (APPEND compile_options "-mcpu=${gpu_arch} " )
89
+ elseif ("${gpu_arch} " IN_LIST all_nvptx_architectures )
90
+ set (gpu_target_triple "nvptx64-nvidia-cuda" )
91
+ list (APPEND compile_options "-march=${gpu_arch} " )
92
+ else ()
93
+ message (FATAL_ERROR "Unknown GPU architecture '${gpu_arch} '" )
94
+ endif ()
95
+ list (APPEND compile_options "--target=${gpu_target_triple} " )
96
+ list (APPEND compile_options "-emit-llvm" )
97
+
98
+ # Build the library for this target architecture. We always emit LLVM-IR for
99
+ # packaged GPU binaries.
100
+ add_library (${gpu_target_name}
101
+ EXCLUDE_FROM_ALL
102
+ OBJECT
103
+ ${ADD_GPU_ENTRYPOINT_OBJ_SRCS}
104
+ ${ADD_GPU_ENTRYPOINT_OBJ_HDRS}
105
+ )
106
+
107
+ target_compile_options (${gpu_target_name} PRIVATE ${compile_options} )
108
+ target_include_directories (${gpu_target_name} PRIVATE ${include_dirs} )
109
+ add_dependencies (${gpu_target_name} ${ADD_GPU_ENTRYPOINT_OBJ_DEPENDS} )
110
+ target_compile_definitions (${gpu_target_name} PRIVATE LLVM_LIBC_PUBLIC_PACKAGING )
111
+
112
+ # Append this target to a list of images to package into a single binary.
113
+ set (input_file $< TARGET_OBJECTS:${gpu_target_name} > )
114
+ list (APPEND packager_images
115
+ --image=file=${input_file},arch=${gpu_arch},triple=${gpu_target_triple} )
116
+ list (APPEND gpu_target_names ${gpu_target_name} )
117
+ endforeach ()
118
+
119
+ # After building the target for the desired GPUs we must package the output
120
+ # into a fatbinary, see https://clang.llvm.org/docs/OffloadingDesign.html for
121
+ # more information.
122
+ set (packaged_target_name ${fq_target_name} .__gpu__ )
123
+ set (packaged_output_name ${CMAKE_CURRENT_BINARY_DIR} /${fq_target_name}.gpubin )
124
+
125
+ add_custom_command (OUTPUT ${packaged_output_name}
126
+ COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
127
+ ${packager_images} -o ${packaged_output_name}
128
+ DEPENDS ${gpu_target_names}
129
+ COMMENT "Packaging LLVM offloading binary" )
130
+ add_custom_target (${packaged_target_name} DEPENDS ${packaged_output_name} )
131
+
132
+ # We create an empty 'stub' file for the host to contain the embedded device
133
+ # code. This will be packaged into 'libcgpu.a'.
134
+ # TODO: In the future we will want to combine every architecture for a target
135
+ # into a single bitcode file and use that. For now we simply build for
136
+ # every single one and let the offloading linker handle it.
137
+ get_filename_component (stub_filename ${ADD_GPU_ENTRYPOINT_OBJ_SRCS} NAME )
138
+ file (WRITE ${CMAKE_CURRENT_BINARY_DIR} /${stub_filename} "// Empty file.\n " )
139
+ add_library (
140
+ ${fq_target_name}
141
+ # We want an object library as the objects will eventually get packaged into
142
+ # an archive (like libcgpu.a).
143
+ EXCLUDE_FROM_ALL
144
+ OBJECT
145
+ "${CMAKE_CURRENT_BINARY_DIR} /${stub_filename} "
146
+ )
147
+ target_compile_options (${fq_target_name} BEFORE PRIVATE ${common_compile_options}
148
+ -DLLVM_LIBC_PUBLIC_PACKAGING
149
+ -nostdlib -Xclang -fembed-offload-object=${packaged_output_name} )
150
+ target_include_directories (${fq_target_name} PRIVATE ${include_dirs} )
151
+ add_dependencies (${fq_target_name} ${full_deps_list} ${packaged_target_name} )
152
+
153
+ set_target_properties (
154
+ ${fq_target_name}
155
+ PROPERTIES
156
+ ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
157
+ TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
158
+ OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name} >"
159
+ CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
160
+ DEPS "${fq_deps_list} "
161
+ FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS} "
162
+ )
163
+
164
+ # We only build the internal target for a single supported architecture.
165
+ set (internal_target_name ${fq_target_name} .__internal__ )
166
+ set (include_dirs ${LIBC_BUILD_DIR} /include ${LIBC_SOURCE_DIR} ${LIBC_BUILD_DIR} )
167
+ if (LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU OR
168
+ LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX )
169
+ add_library (
170
+ ${internal_target_name}
171
+ EXCLUDE_FROM_ALL
172
+ OBJECT
173
+ ${ADD_ENTRYPOINT_OBJ_SRCS}
174
+ ${ADD_ENTRYPOINT_OBJ_HDRS}
175
+ )
176
+ target_compile_options (${internal_target_name} BEFORE PRIVATE
177
+ ${common_compile_options} --target=${LIBC_GPU_TARGET_TRIPLE} )
178
+ if (LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU )
179
+ target_compile_options (${internal_target_name} PRIVATE -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} )
180
+ elseif (LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX )
181
+ target_compile_options (${internal_target_name} PRIVATE -march=${LIBC_GPU_TARGET_ARCHITECTURE} )
182
+ endif ()
183
+ target_include_directories (${internal_target_name} PRIVATE ${include_dirs} )
184
+ add_dependencies (${internal_target_name} ${full_deps_list} )
185
+ set_target_properties (
186
+ ${internal_target_name}
187
+ PROPERTIES
188
+ CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
189
+ FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS} "
190
+ )
191
+ set_target_properties (
192
+ ${fq_target_name}
193
+ PROPERTIES OBJECT_FILE_RAW "$<TARGET_OBJECTS:${internal_target_name} >"
194
+ )
195
+ endif ()
196
+ endfunction ()
197
+
68
198
# Rule which is essentially a wrapper over add_library to compile a set of
69
199
# sources to object files.
70
200
# Usage:
@@ -127,7 +257,6 @@ function(create_object_library fq_target_name)
127
257
if (NOT ADD_OBJECT_CXX_STANDARD )
128
258
set (ADD_OBJECT_CXX_STANDARD ${CMAKE_CXX_STANDARD} )
129
259
endif ()
130
-
131
260
set_target_properties (
132
261
${fq_target_name}
133
262
PROPERTIES
@@ -350,53 +479,67 @@ function(create_entrypoint_object fq_target_name)
350
479
endif ()
351
480
endif ()
352
481
353
- add_library (
354
- ${internal_target_name}
355
- # TODO: We don't need an object library for internal consumption.
356
- # A future change should switch this to a normal static library.
357
- EXCLUDE_FROM_ALL
358
- OBJECT
359
- ${ADD_ENTRYPOINT_OBJ_SRCS}
360
- ${ADD_ENTRYPOINT_OBJ_HDRS}
361
- )
362
- target_compile_options (${internal_target_name} BEFORE PRIVATE ${common_compile_options} )
363
- target_include_directories (${internal_target_name} PRIVATE ${include_dirs} )
364
- add_dependencies (${internal_target_name} ${full_deps_list} )
365
- set_target_properties (
366
- ${internal_target_name}
367
- PROPERTIES
482
+ # GPU builds require special handling for the objects because we want to
483
+ # export several different targets at once, e.g. for both Nvidia and AMD.
484
+ if (LIBC_TARGET_ARCHITECTURE_IS_GPU )
485
+ _build_gpu_entrypoint_objects (
486
+ ${fq_target_name}
487
+ SRCS ${ADD_ENTRYPOINT_OBJ_SRCS}
488
+ HDRS ${ADD_ENTRYPOINT_OBJ_HDRS}
489
+ COMPILE_OPTIONS ${common_compile_options}
490
+ DEPENDS ${full_deps_list}
368
491
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
369
492
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS} "
370
- )
493
+ )
494
+ else ()
495
+ add_library (
496
+ ${internal_target_name}
497
+ # TODO: We don't need an object library for internal consumption.
498
+ # A future change should switch this to a normal static library.
499
+ EXCLUDE_FROM_ALL
500
+ OBJECT
501
+ ${ADD_ENTRYPOINT_OBJ_SRCS}
502
+ ${ADD_ENTRYPOINT_OBJ_HDRS}
503
+ )
504
+ target_compile_options (${internal_target_name} BEFORE PRIVATE ${common_compile_options} )
505
+ target_include_directories (${internal_target_name} PRIVATE ${include_dirs} )
506
+ add_dependencies (${internal_target_name} ${full_deps_list} )
507
+ set_target_properties (
508
+ ${internal_target_name}
509
+ PROPERTIES
510
+ CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
511
+ FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS} "
512
+ )
371
513
372
- add_library (
373
- ${fq_target_name}
374
- # We want an object library as the objects will eventually get packaged into
375
- # an archive (like libc.a).
376
- EXCLUDE_FROM_ALL
377
- OBJECT
378
- ${ADD_ENTRYPOINT_OBJ_SRCS}
379
- ${ADD_ENTRYPOINT_OBJ_HDRS}
380
- )
381
- target_compile_options (${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLLVM_LIBC_PUBLIC_PACKAGING )
382
- target_include_directories (${fq_target_name} PRIVATE ${include_dirs} )
383
- add_dependencies (${fq_target_name} ${full_deps_list} )
514
+ add_library (
515
+ ${fq_target_name}
516
+ # We want an object library as the objects will eventually get packaged into
517
+ # an archive (like libc.a).
518
+ EXCLUDE_FROM_ALL
519
+ OBJECT
520
+ ${ADD_ENTRYPOINT_OBJ_SRCS}
521
+ ${ADD_ENTRYPOINT_OBJ_HDRS}
522
+ )
523
+ target_compile_options (${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLLVM_LIBC_PUBLIC_PACKAGING )
524
+ target_include_directories (${fq_target_name} PRIVATE ${include_dirs} )
525
+ add_dependencies (${fq_target_name} ${full_deps_list} )
384
526
385
- set_target_properties (
386
- ${fq_target_name}
387
- PROPERTIES
388
- ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
389
- TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
390
- OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name} >"
391
- # TODO: We don't need to list internal object files if the internal
392
- # target is a normal static library.
393
- OBJECT_FILE_RAW "$<TARGET_OBJECTS:${internal_target_name} >"
394
- CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
395
- DEPS "${fq_deps_list} "
396
- FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS} "
397
- )
527
+ set_target_properties (
528
+ ${fq_target_name}
529
+ PROPERTIES
530
+ ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
531
+ TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
532
+ OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name} >"
533
+ # TODO: We don't need to list internal object files if the internal
534
+ # target is a normal static library.
535
+ OBJECT_FILE_RAW "$<TARGET_OBJECTS:${internal_target_name} >"
536
+ CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
537
+ DEPS "${fq_deps_list} "
538
+ FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS} "
539
+ )
540
+ endif ()
398
541
399
- if (LLVM_LIBC_ENABLE_LINTING )
542
+ if (LLVM_LIBC_ENABLE_LINTING AND TARGET ${internal_target_name} )
400
543
if (NOT LLVM_LIBC_CLANG_TIDY )
401
544
message (FATAL_ERROR "Something is wrong! LLVM_LIBC_ENABLE_LINTING is "
402
545
"ON but LLVM_LIBC_CLANG_TIDY is not set." )
0 commit comments