Skip to content

Commit d64a1e3

Browse files
Fznamznonpremanandrao
authored andcommitted
[SYCL] Option to disable alloca address space for sret arguments (#17976)
Recent community change llvm/llvm-project#114062 enabled the use of alloca address space for sret arguments. This causes several issues for sycl, particularly for the SPIR target where this leads to invalid address-space-casts from the local address space. The new option -foffload-use-alloca-addrspace-for-srets is TRUE by default (and produces the current community behavior) and is set to FALSE in sycl device compilation modes (where the prior behavior is retained). The commit also reverts some test changes made to reflect the current community behavior. --------- Co-authored-by: Premanand M Rao <[email protected]>
1 parent ee1078c commit d64a1e3

File tree

21 files changed

+271
-114
lines changed

21 files changed

+271
-114
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,11 @@ CODEGENOPT(DisableSYCLEarlyOpts, 1, 0)
477477
/// which do not contain "user" code.
478478
CODEGENOPT(OptimizeSYCLFramework, 1, 0)
479479

480+
/// Whether to use alloca address space for `sret` arguments.
481+
/// TODO: This option can be removed once a fix goes in that can
482+
/// work with the community changes for using the alloca address space.
483+
CODEGENOPT(UseAllocaASForSrets, 1, 0)
484+
480485
/// Turn on fp64 partial emulation for kernels with only fp64 conversion
481486
/// operations and no fp64 computation operations (requires Intel GPU backend
482487
/// supporting fp64 partial emulation)

clang/include/clang/Driver/Options.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8753,6 +8753,13 @@ def fsycl_is_native_cpu : Flag<["-"], "fsycl-is-native-cpu">,
87538753
HelpText<"Perform device compilation for Native CPU.">,
87548754
Visibility<[CC1Option]>,
87558755
MarshallingInfoFlag<LangOpts<"SYCLIsNativeCPU">>;
8756+
// TODO: This option can be removed once a fix goes in that can
8757+
// work with the community changes for using the alloca address space.
8758+
defm offload_use_alloca_addrspace_for_srets : BoolFOption<"offload-use-alloca-addrspace-for-srets",
8759+
CodeGenOpts<"UseAllocaASForSrets">,
8760+
DefaultTrue,
8761+
PosFlag<SetTrue, [], [CC1Option], "Use alloca address space for sret arguments for offloading targets">,
8762+
NegFlag<SetFalse>>;
87568763

87578764
} // let Visibility = [CC1Option]
87588765

clang/lib/CodeGen/ABIInfoImpl.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,16 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
2121
// Records with non-trivial destructors/copy-constructors should not be
2222
// passed by value.
2323
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
24-
return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
24+
return getNaturalAlignIndirect(Ty,
25+
getCodeGenOpts().UseAllocaASForSrets
26+
? getDataLayout().getAllocaAddrSpace()
27+
: CGT.getTargetAddressSpace(Ty),
2528
RAA == CGCXXABI::RAA_DirectInMemory);
2629

27-
return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace());
30+
return getNaturalAlignIndirect(Ty,
31+
getCodeGenOpts().UseAllocaASForSrets
32+
? getDataLayout().getAllocaAddrSpace()
33+
: CGT.getTargetAddressSpace(Ty));
2834
}
2935

3036
// Treat an enum type as its underlying type.
@@ -37,7 +43,10 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
3743
Context.getTypeSize(Context.getTargetInfo().hasInt128Type()
3844
? Context.Int128Ty
3945
: Context.LongLongTy))
40-
return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace());
46+
return getNaturalAlignIndirect(Ty,
47+
getCodeGenOpts().UseAllocaASForSrets
48+
? getDataLayout().getAllocaAddrSpace()
49+
: CGT.getTargetAddressSpace(Ty));
4150

4251
return (isPromotableIntegerTypeForABI(Ty)
4352
? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty))
@@ -49,7 +58,10 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
4958
return ABIArgInfo::getIgnore();
5059

5160
if (isAggregateTypeForABI(RetTy))
52-
return getNaturalAlignIndirect(RetTy, getDataLayout().getAllocaAddrSpace());
61+
return getNaturalAlignIndirect(RetTy,
62+
getCodeGenOpts().UseAllocaASForSrets
63+
? getDataLayout().getAllocaAddrSpace()
64+
: CGT.getTargetAddressSpace(RetTy));
5365

5466
// Treat an enum type as its underlying type.
5567
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
@@ -61,7 +73,9 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
6173
? getContext().Int128Ty
6274
: getContext().LongLongTy))
6375
return getNaturalAlignIndirect(RetTy,
64-
getDataLayout().getAllocaAddrSpace());
76+
getCodeGenOpts().UseAllocaASForSrets
77+
? getDataLayout().getAllocaAddrSpace()
78+
: CGT.getTargetAddressSpace(RetTy));
6579

6680
return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
6781
: ABIArgInfo::getDirect());
@@ -122,14 +136,16 @@ CGCXXABI::RecordArgABI CodeGen::getRecordArgABI(QualType T, CGCXXABI &CXXABI) {
122136
}
123137

124138
bool CodeGen::classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
125-
const ABIInfo &Info) {
139+
const ABIInfo &Info, CodeGenTypes &CGT) {
126140
QualType Ty = FI.getReturnType();
127141

128142
if (const auto *RT = Ty->getAs<RecordType>())
129143
if (!isa<CXXRecordDecl>(RT->getDecl()) &&
130144
!RT->getDecl()->canPassInRegisters()) {
131145
FI.getReturnInfo() = Info.getNaturalAlignIndirect(
132-
Ty, Info.getDataLayout().getAllocaAddrSpace());
146+
Ty, Info.getCodeGenOpts().UseAllocaASForSrets
147+
? Info.getDataLayout().getAllocaAddrSpace()
148+
: CGT.getTargetAddressSpace(Ty));
133149
return true;
134150
}
135151

clang/lib/CodeGen/ABIInfoImpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI);
4646
CGCXXABI::RecordArgABI getRecordArgABI(QualType T, CGCXXABI &CXXABI);
4747

4848
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
49-
const ABIInfo &Info);
49+
const ABIInfo &Info, CodeGenTypes &CGT);
5050

5151
/// Pass transparent unions as if they were the type of the first element. Sema
5252
/// should ensure that all elements of the union have the same "machine type".

clang/lib/CodeGen/CGCall.cpp

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,8 +1680,12 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
16801680

16811681
// Add type for sret argument.
16821682
if (IRFunctionArgs.hasSRetArg()) {
1683-
ArgTypes[IRFunctionArgs.getSRetArgNo()] = llvm::PointerType::get(
1684-
getLLVMContext(), FI.getReturnInfo().getIndirectAddrSpace());
1683+
QualType Ret = FI.getReturnType();
1684+
unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets
1685+
? FI.getReturnInfo().getIndirectAddrSpace()
1686+
: CGM.getTypes().getTargetAddressSpace(Ret);
1687+
ArgTypes[IRFunctionArgs.getSRetArgNo()] =
1688+
llvm::PointerType::get(getLLVMContext(), AddressSpace);
16851689
}
16861690

16871691
// Add type for inalloca argument.
@@ -5258,6 +5262,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
52585262
// If the call returns a temporary with struct return, create a temporary
52595263
// alloca to hold the result, unless one is given to us.
52605264
Address SRetPtr = Address::invalid();
5265+
RawAddress SRetAlloca = RawAddress::invalid();
52615266
llvm::Value *UnusedReturnSizePtr = nullptr;
52625267
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
52635268
// For virtual function pointer thunks and musttail calls, we must always
@@ -5271,19 +5276,27 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
52715276
} else if (!ReturnValue.isNull()) {
52725277
SRetPtr = ReturnValue.getAddress();
52735278
} else {
5274-
SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp");
5279+
SRetPtr = CGM.getCodeGenOpts().UseAllocaASForSrets
5280+
? CreateMemTempWithoutCast(RetTy, "tmp")
5281+
: CreateMemTemp(RetTy, "tmp", &SRetAlloca);
52755282
if (HaveInsertPoint() && ReturnValue.isUnused()) {
52765283
llvm::TypeSize size =
52775284
CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
5278-
UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getBasePointer());
5285+
if (CGM.getCodeGenOpts().UseAllocaASForSrets)
5286+
UnusedReturnSizePtr =
5287+
EmitLifetimeStart(size, SRetPtr.getBasePointer());
5288+
else
5289+
UnusedReturnSizePtr =
5290+
EmitLifetimeStart(size, SRetAlloca.getPointer());
52795291
}
52805292
}
52815293
if (IRFunctionArgs.hasSRetArg()) {
52825294
// A mismatch between the allocated return value's AS and the target's
52835295
// chosen IndirectAS can happen e.g. when passing the this pointer through
52845296
// a chain involving stores to / loads from the DefaultAS; we address this
52855297
// here, symmetrically with the handling we have for normal pointer args.
5286-
if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) {
5298+
if (CGM.getCodeGenOpts().UseAllocaASForSrets &&
5299+
(SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace())) {
52875300
llvm::Value *V = SRetPtr.getBasePointer();
52885301
LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace());
52895302
LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace());
@@ -5881,9 +5894,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
58815894
// can't depend on being inside of an ExprWithCleanups, so we need to manually
58825895
// pop this cleanup later on. Being eager about this is OK, since this
58835896
// temporary is 'invisible' outside of the callee.
5884-
if (UnusedReturnSizePtr)
5885-
pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr,
5886-
UnusedReturnSizePtr);
5897+
if (UnusedReturnSizePtr) {
5898+
if (CGM.getCodeGenOpts().UseAllocaASForSrets)
5899+
pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr,
5900+
UnusedReturnSizePtr);
5901+
else
5902+
pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca,
5903+
UnusedReturnSizePtr);
5904+
}
58875905

58885906
llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();
58895907

clang/lib/CodeGen/ItaniumCXXABI.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,10 +1353,14 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
13531353

13541354
// If C++ prohibits us from making a copy, return by address.
13551355
if (!RD->canPassInRegisters()) {
1356-
auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
1357-
FI.getReturnInfo() = ABIArgInfo::getIndirect(
1358-
Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(),
1359-
/*ByVal=*/false);
1356+
QualType Ret = FI.getReturnType();
1357+
auto Align = CGM.getContext().getTypeAlignInChars(Ret);
1358+
unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets
1359+
? CGM.getDataLayout().getAllocaAddrSpace()
1360+
: CGM.getTypes().getTargetAddressSpace(Ret);
1361+
FI.getReturnInfo() =
1362+
ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AddressSpace,
1363+
/*ByVal=*/false);
13601364
return true;
13611365
}
13621366
return false;

clang/lib/CodeGen/MicrosoftCXXABI.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,10 +1174,14 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
11741174
bool isIndirectReturn = !isTrivialForABI || FI.isInstanceMethod();
11751175

11761176
if (isIndirectReturn) {
1177-
CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
1178-
FI.getReturnInfo() = ABIArgInfo::getIndirect(
1179-
Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(),
1180-
/*ByVal=*/false);
1177+
QualType Ret = FI.getReturnType();
1178+
CharUnits Align = CGM.getContext().getTypeAlignInChars(Ret);
1179+
unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets
1180+
? CGM.getDataLayout().getAllocaAddrSpace()
1181+
: CGM.getTypes().getTargetAddressSpace(Ret);
1182+
FI.getReturnInfo() =
1183+
ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AddressSpace,
1184+
/*ByVal=*/false);
11811185

11821186
// MSVC always passes `this` before the `sret` parameter.
11831187
FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class AArch64ABIInfo : public ABIInfo {
6060
SmallVectorImpl<llvm::Type *> &Flattened) const;
6161

6262
void computeInfo(CGFunctionInfo &FI) const override {
63-
if (!::classifyReturnType(getCXXABI(), FI, *this))
63+
if (!::classifyReturnType(getCXXABI(), FI, *this, CGT))
6464
FI.getReturnInfo() =
6565
classifyReturnType(FI.getReturnType(), FI.isVariadic());
6666

clang/lib/CodeGen/Targets/ARM.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ void WindowsARMTargetCodeGenInfo::setTargetAttributes(
231231
}
232232

233233
void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
234-
if (!::classifyReturnType(getCXXABI(), FI, *this))
234+
if (!::classifyReturnType(getCXXABI(), FI, *this, CGT))
235235
FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(),
236236
FI.getCallingConvention());
237237

clang/lib/CodeGen/Targets/SPIR.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ ABIArgInfo CommonSPIRABIInfo::classifyKernelArgumentType(QualType Ty) const {
5151
}
5252
// Pass all aggregate types allowed by Sema by value.
5353
if (isAggregateTypeForABI(Ty))
54-
return getNaturalAlignIndirect(Ty);
54+
return getNaturalAlignIndirect(Ty,
55+
getCodeGenOpts().UseAllocaASForSrets
56+
? getDataLayout().getAllocaAddrSpace()
57+
: CGT.getTargetAddressSpace(Ty));
5558
}
5659

5760
return DefaultABIInfo::classifyArgumentType(Ty);
@@ -129,7 +132,11 @@ ABIArgInfo CommonSPIRABIInfo::classifyRegcallArgumentType(QualType Ty) const {
129132
// Records with non-trivial destructors/copy-constructors should not be
130133
// passed by value.
131134
if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
132-
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
135+
return getNaturalAlignIndirect(Ty,
136+
getCodeGenOpts().UseAllocaASForSrets
137+
? getDataLayout().getAllocaAddrSpace()
138+
: CGT.getTargetAddressSpace(Ty),
139+
RAA == CGCXXABI::RAA_DirectInMemory);
133140

134141
// Ignore empty structs/unions.
135142
if (isEmptyRecord(getContext(), Ty, true))
@@ -320,7 +327,10 @@ ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
320327
// Records with non-trivial destructors/copy-constructors should not be
321328
// passed by value.
322329
if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
323-
return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
330+
return getNaturalAlignIndirect(Ty,
331+
getCodeGenOpts().UseAllocaASForSrets
332+
? getDataLayout().getAllocaAddrSpace()
333+
: CGT.getTargetAddressSpace(Ty),
324334
RAA == CGCXXABI::RAA_DirectInMemory);
325335

326336
if (const RecordType *RT = Ty->getAs<RecordType>()) {

clang/lib/CodeGen/Targets/X86.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
970970
} else
971971
State.FreeRegs = DefaultNumRegisterParameters;
972972

973-
if (!::classifyReturnType(getCXXABI(), FI, *this)) {
973+
if (!::classifyReturnType(getCXXABI(), FI, *this, CGT)) {
974974
FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
975975
} else if (FI.getReturnInfo().isIndirect()) {
976976
// The C++ ABI is not aware of register usage, so we have to check if the
@@ -2964,7 +2964,7 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
29642964
unsigned FreeSSERegs = IsRegCall ? 16 : 8;
29652965
unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0;
29662966

2967-
if (!::classifyReturnType(getCXXABI(), FI, *this)) {
2967+
if (!::classifyReturnType(getCXXABI(), FI, *this, CGT)) {
29682968
if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
29692969
!FI.getReturnType()->getTypePtr()->isUnionType()) {
29702970
FI.getReturnInfo() = classifyRegCallStructType(

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5618,7 +5618,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
56185618
// We want to compile sycl kernels.
56195619
CmdArgs.push_back("-fsycl-is-device");
56205620
CmdArgs.push_back("-fdeclare-spirv-builtins");
5621-
5621+
56225622
// Set O2 optimization level by default
56235623
if (!Args.getLastArg(options::OPT_O_Group))
56245624
CmdArgs.push_back("-O2");
@@ -5954,10 +5954,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
59545954
// are provided.
59555955
TC.addClangWarningOptions(CmdArgs);
59565956

5957-
// FIXME: Subclass ToolChain for SPIR/SPIR-V and move this to
5958-
// addClangWarningOptions.
5959-
if (Triple.isSPIROrSPIRV())
5957+
if (Triple.isSPIROrSPIRV()) {
5958+
// FIXME: Subclass ToolChain for SPIR/SPIR-V and move this to
5959+
// addClangWarningOptions.
59605960
CmdArgs.push_back("-Wspir-compat");
5961+
// Disable this option for SPIR targets.
5962+
// TODO: This needs to be re-enabled once we have a real fix.
5963+
CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets");
5964+
}
59615965

59625966
// Select the appropriate action.
59635967
RewriteKind rewriteKind = RK_None;
@@ -6263,6 +6267,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
62636267
Args.addOptOutFlag(CmdArgs, options::OPT_foptimize_sibling_calls,
62646268
options::OPT_fno_optimize_sibling_calls);
62656269

6270+
Args.addOptOutFlag(CmdArgs,
6271+
options::OPT_foffload_use_alloca_addrspace_for_srets,
6272+
options::OPT_fno_offload_use_alloca_addrspace_for_srets);
6273+
62666274
RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
62676275
CmdArgs, JA, NoOffloadFP32PrecDiv,
62686276
NoOffloadFP32PrecSqrt);

clang/test/Driver/sycl-device.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,15 @@
5858
// PHASES-PREPROC-DEPS: 0: input, {{.*}}, c++, (device-sycl)
5959
// PHASES-PROPROC-DEPS: 1: preprocessor, {0}, dependencies, (device-sycl)
6060
// PHASES-PREPROC-DEPS: 2: offload, "device-sycl (spir64-unknown-unknown)" {1}, none
61+
62+
/// Check that "-fno-offload-use-alloca-addrspace-for-srets" is not set by
63+
/// default on the command-line in a non-sycl compilation.
64+
// RUN: %clang -### %s 2>&1 \
65+
// RUN: | FileCheck -check-prefix=CHECK-ALLOCA-ADDRSPACE %s
66+
// CHECK-ALLOCA-ADDRSPACE-NOT: clang{{.*}} "-fno-offload-use-alloca-addrspace-for-srets"
67+
68+
/// Check that "-fno-offload-use-alloca-addrspace-for-srets" is set if it is
69+
/// not specified on the command-line by the user with -fsycl
70+
// RUN: %clang -### -fsycl %s 2>&1 \
71+
// RUN: | FileCheck -check-prefix=CHECK-NO-ALLOCA-ADDRSPACE %s
72+
// CHECK-NO-ALLOCA-ADDRSPACE: clang{{.*}} "-fno-offload-use-alloca-addrspace-for-srets"

0 commit comments

Comments
 (0)