Skip to content

Commit 8b55761

Browse files
cdai2wenju-hebader
authored
[OpenCL] Disable vector to scalar types coercion for OpenCL (#8160)
For x86 target, vector types (both result and arguments) can be coerced to scalars of the same size, e.g: define zeroext i1 @_Z18convert_ulong4_rteDv4_t(<4 x i16> %x) ; becomes define zeroext i1 @_Z18convert_ulong4_rteDv4_t(i64 %x.coerced) Such behavior is completely valid for x86, but the backend vectorizer cannot work with scalars instead of vectors. With this patch, argument and result types will be leaved unchanged in the CodeGen. New option fopencl-force-vector-abi is also added to force-disables vector to scalar coercion when provided. --------- Co-authored-by: Wenju He <[email protected]> Co-authored-by: Alexey Bader <[email protected]>
1 parent 7a29f61 commit 8b55761

File tree

5 files changed

+81
-0
lines changed

5 files changed

+81
-0
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ LANGOPT(ObjCDisableDirectMethodsForTesting, 1, 0,
350350
"Disable recognition of objc_direct methods")
351351
LANGOPT(CFProtectionBranch , 1, 0, "Control-Flow Branch Protection enabled")
352352
LANGOPT(FakeAddressSpaceMap , 1, 0, "OpenCL fake address space map")
353+
LANGOPT(OpenCLForceVectorABI, 1, 0, "OpenCL vector to scalar coercion disabling")
353354
ENUM_LANGOPT(AddressSpaceMapMangling , AddrSpaceMapMangling, 2, ASMM_Target, "OpenCL address space map mangling mode")
354355
LANGOPT(IncludeDefaultHeader, 1, 0, "Include default header file for OpenCL")
355356
LANGOPT(DeclareOpenCLBuiltins, 1, 0, "Declare OpenCL builtin functions")

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6468,6 +6468,9 @@ defm const_strings : BoolOption<"f", "const-strings",
64686468
def fno_bitfield_type_align : Flag<["-"], "fno-bitfield-type-align">,
64696469
HelpText<"Ignore bit-field types when aligning structures">,
64706470
MarshallingInfoFlag<LangOpts<"NoBitFieldTypeAlign">>;
6471+
def fopencl_force_vector_abi : Flag<["-"], "fopencl-force-vector-abi">,
6472+
HelpText<"Disable vector to scalar coercion for OpenCL">,
6473+
MarshallingInfoFlag<LangOpts<"OpenCLForceVectorABI">>;
64716474
def ffake_address_space_map : Flag<["-"], "ffake-address-space-map">,
64726475
HelpText<"Use a fake address space map; OpenCL testing purposes only">,
64736476
MarshallingInfoFlag<LangOpts<"FakeAddressSpaceMap">>;

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,41 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
100100
return Address::invalid();
101101
}
102102

103+
static ABIArgInfo classifyOpenCL(QualType Ty, ASTContext &Context) {
104+
if (Ty->isVoidType())
105+
return ABIArgInfo::getIgnore();
106+
107+
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
108+
Ty = EnumTy->getDecl()->getIntegerType();
109+
110+
if (const RecordType *RT = Ty->getAs<RecordType>())
111+
return ABIArgInfo::getIndirect(Context.getTypeAlignInChars(RT),
112+
/*ByVal=*/false);
113+
114+
if (Context.isPromotableIntegerType(Ty))
115+
return ABIArgInfo::getExtend(Ty);
116+
117+
return ABIArgInfo::getDirect();
118+
}
119+
120+
static bool doOpenCLClassification(CGFunctionInfo &FI, ASTContext &Context) {
121+
if (!Context.getLangOpts().OpenCL)
122+
return false;
123+
if (!Context.getLangOpts().OpenCLForceVectorABI)
124+
return false;
125+
126+
// Use OpenCL classify to prevent coercing.
127+
// Vector ABI must be enforced by enabling the corresponding option.
128+
// Otherwise, vector types will be coerced to a matching integer
129+
// type to conform with ABI, e.g.: <8 x i8> will be coerced to i64.
130+
FI.getReturnInfo() = classifyOpenCL(FI.getReturnType(), Context);
131+
132+
for (auto &Arg : FI.arguments())
133+
Arg.info = classifyOpenCL(Arg.type, Context);
134+
135+
return true;
136+
}
137+
103138
static llvm::Type *getVAListElementType(CodeGenFunction &CGF) {
104139
return CGF.ConvertTypeForMem(
105140
CGF.getContext().getBuiltinVaListType()->getPointeeType());
@@ -1984,6 +2019,10 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
19842019
}
19852020

19862021
void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
2022+
ASTContext &Context = getContext();
2023+
if (doOpenCLClassification(FI, Context))
2024+
return;
2025+
19872026
CCState State(FI);
19882027
if (IsMCUABI)
19892028
State.FreeRegs = 3;
@@ -3970,6 +4009,9 @@ X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
39704009
}
39714010

39724011
void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
4012+
ASTContext &Context = getContext();
4013+
if (doOpenCLClassification(FI, Context))
4014+
return;
39734015

39744016
const unsigned CallingConv = FI.getCallingConvention();
39754017
// It is possible to force Win64 calling convention on any x86_64 target by
@@ -4427,6 +4469,10 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
44274469
}
44284470

44294471
void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
4472+
ASTContext &Context = getContext();
4473+
if (doOpenCLClassification(FI, Context))
4474+
return;
4475+
44304476
const unsigned CC = FI.getCallingConvention();
44314477
bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
44324478
bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3928,6 +3928,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
39283928
}
39293929
}
39303930

3931+
Opts.OpenCLForceVectorABI = Args.hasArg(OPT_fopencl_force_vector_abi);
3932+
39313933
// Check if -fopenmp is specified and set default version to 5.0.
39323934
Opts.OpenMP = Args.hasArg(OPT_fopenmp) ? 50 : 0;
39333935
// Check if -fopenmp-simd is specified.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: %clang_cc1 -x cl -triple i686-pc-win32-gnu -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
2+
// RUN: %clang_cc1 -x cl -triple x86_64-unknown-linux -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
3+
// RUN: %clang_cc1 -x cl -triple x86_64-pc-win32-gnu -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
4+
5+
// RUN: %clang_cc1 -x cl -triple i686-pc-win32-gnu %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER32CL
6+
// RUN: %clang_cc1 -x cl -triple x86_64-unknown-linux %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER64
7+
// RUN: %clang_cc1 -x cl -triple x86_64-pc-win32-gnu %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
8+
9+
// RUN: %clang_cc1 -x c -triple i686-pc-win32-gnu %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER32
10+
// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER64
11+
// RUN: %clang_cc1 -x c -triple x86_64-pc-win32-gnu %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER-C-WIN
12+
13+
typedef unsigned short ushort;
14+
typedef ushort ushort4 __attribute__((ext_vector_type(4)));
15+
16+
typedef unsigned long ulong;
17+
typedef ulong ulong4 __attribute__((ext_vector_type(4)));
18+
19+
ulong4 __attribute__((const)) __attribute__((overloadable)) convert_ulong4_rte(ushort4 x)
20+
{
21+
return 1;
22+
}
23+
24+
// NOCOER: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(<4 x i16> noundef %{{.*}})
25+
// NOCOER-C-WIN: define {{.*}}<4 x i32> @_Z18convert_ulong4_rteDv4_t(<4 x i16> noundef %{{.*}})
26+
// COER32CL: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(i64 noundef %{{.*}})
27+
// COER32: define {{.*}}<4 x i32> @_Z18convert_ulong4_rteDv4_t(i64 noundef %{{.*}})
28+
// FIXME: <4 x i16> should be coerced to i64 instead of double
29+
// COER64: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(double noundef %{{.*}})

0 commit comments

Comments
 (0)