@@ -89,14 +89,14 @@ static cl::opt<bool> OutputAssembly{"S",
89
89
cl::Hidden, cl::cat (PostLinkCat)};
90
90
91
91
enum IRSplitMode {
92
- SPLIT_PER_TU, // one module per translation unit
92
+ SPLIT_PER_TU, // one module per translation unit
93
93
SPLIT_PER_KERNEL, // one module per kernel
94
- SPLIT_AUTO // automatically select split mode
94
+ SPLIT_AUTO // automatically select split mode
95
95
};
96
96
97
97
static cl::opt<IRSplitMode> SplitMode (
98
98
" split" , cl::desc(" split input module" ), cl::Optional,
99
- cl::init(SPLIT_PER_TU ),
99
+ cl::init(SPLIT_AUTO ),
100
100
cl::values(
101
101
clEnumValN (SPLIT_PER_TU, " source" ,
102
102
" 1 output module per source (translation unit)" ),
@@ -292,16 +292,6 @@ enum KernelMapEntryScope {
292
292
};
293
293
294
294
static KernelMapEntryScope selectDeviceCodeSplitScopeAutomatically (Module &M) {
295
- // Here we can employ various heuristics to decide which way to split kernels
296
- // is the best in each particular situation.
297
- // At the moment, we assume that per-kernel split is the best way of splitting
298
- // device code and it can be always selected unless:
299
- // - there are functions marked with [[intel::device_indirectly_callable]]
300
- // attribute, because it instructs us to make this function available to the
301
- // whole program as it was compiled as a single module.
302
- // - there are indirect calls in the module, which means that we don't know
303
- // how to group functions so both caller and callee of indirect call are in
304
- // the same module.
305
295
if (IROutputOnly) {
306
296
// We allow enabling auto split mode even in presence of -ir-output-only
307
297
// flag, but in this case we are limited by it so we can't do any split at
@@ -310,11 +300,17 @@ static KernelMapEntryScope selectDeviceCodeSplitScopeAutomatically(Module &M) {
310
300
}
311
301
312
302
for (const auto &F : M.functions ()) {
303
+ // There are functions marked with [[intel::device_indirectly_callable]]
304
+ // attribute, because it instructs us to make this function available to the
305
+ // whole program as it was compiled as a single module.
313
306
if (F.hasFnAttribute (" referenced-indirectly" ))
314
307
return Scope_Global;
315
308
if (F.isDeclaration ())
316
309
continue ;
317
- for (const auto &BB: F) {
310
+ // There are indirect calls in the module, which means that we don't know
311
+ // how to group functions so both caller and callee of indirect call are in
312
+ // the same module.
313
+ for (const auto &BB : F) {
318
314
for (const auto &I : BB) {
319
315
if (auto *CI = dyn_cast<CallInst>(&I)) {
320
316
if (!CI->getCalledFunction ())
@@ -324,6 +320,8 @@ static KernelMapEntryScope selectDeviceCodeSplitScopeAutomatically(Module &M) {
324
320
}
325
321
}
326
322
323
+ // At the moment, we assume that per-source split is the best way of splitting
324
+ // device code and can always be used execpt for cases handled above.
327
325
return Scope_PerModule;
328
326
}
329
327
@@ -633,6 +631,8 @@ int main(int argc, char **argv) {
633
631
" kernels with the same values of the 'sycl-module-id' attribute will\n "
634
632
" be put into the same module. If -split=kernel option is specified,\n "
635
633
" one module per kernel will be emitted.\n "
634
+ " '-split=auto' mode automatically selects the best way of splitting\n "
635
+ " kernels into modules based on some heuristic.\n "
636
636
" - If -symbols options is also specified, then for each produced module\n "
637
637
" a text file containing names of all spir kernels in it is generated.\n "
638
638
" - Specialization constant intrinsic transformer. Replaces symbolic\n "
@@ -652,7 +652,9 @@ int main(int argc, char **argv) {
652
652
" $ sycl-post-link --ir-output-only --spec-const=default \\\n "
653
653
" -o example_p.bc example.bc\n "
654
654
" will produce single output file example_p.bc suitable for SPIRV\n "
655
- " translation.\n " );
655
+ " translation.\n "
656
+ " --ir-output-only option is not not compatible with split modes other\n "
657
+ " than 'auto'.\n " );
656
658
657
659
bool DoSplit = SplitMode.getNumOccurrences () > 0 ;
658
660
bool DoSpecConst = SpecConstLower.getNumOccurrences () > 0 ;
0 commit comments