Skip to content

Commit d226c17

Browse files
Merge remote-tracking branch 'upstream/sycl' into sycl-mlir
2 parents e945d42 + a1787de commit d226c17

File tree

74 files changed

+3483
-907
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+3483
-907
lines changed

.github/workflows/gh_pages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ on:
1313

1414
jobs:
1515
build:
16-
runs-on: ubuntu-latest
16+
runs-on: ubuntu-20.04
1717
if: github.repository == 'intel/llvm'
1818
steps:
1919
- uses: actions/checkout@v3

.github/workflows/sycl_cleanup.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ on:
1010

1111
jobs:
1212
cleanup:
13-
runs-on: ubuntu-latest
13+
runs-on: ubuntu-20.04
1414
steps:
1515
- uses: actions/github-script@v6
1616
with:

.github/workflows/sycl_containers.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
base_image_ubuntu2004:
2626
if: github.repository == 'intel/llvm'
2727
name: Base Ubuntu 20.04 Docker image
28-
runs-on: ubuntu-latest
28+
runs-on: ubuntu-20.04
2929
steps:
3030
- name: Checkout
3131
uses: actions/checkout@v3
@@ -44,7 +44,7 @@ jobs:
4444
build_image_ubuntu2004:
4545
if: github.repository == 'intel/llvm'
4646
name: Build Ubuntu Docker image
47-
runs-on: ubuntu-latest
47+
runs-on: ubuntu-20.04
4848
steps:
4949
- name: Checkout
5050
uses: actions/checkout@v3
@@ -66,7 +66,7 @@ jobs:
6666
drivers_image_ubuntu2004:
6767
if: github.repository == 'intel/llvm'
6868
name: Intel Drivers Ubuntu 20.04 Docker image
69-
runs-on: ubuntu-latest
69+
runs-on: ubuntu-20.04
7070
needs: base_image_ubuntu2004
7171
steps:
7272
- name: Checkout
@@ -105,7 +105,7 @@ jobs:
105105
drivers_image_ubuntu2004_unstable:
106106
if: github.repository == 'intel/llvm'
107107
name: Intel Drivers (unstable) Ubuntu 20.04 Docker image
108-
runs-on: ubuntu-latest
108+
runs-on: ubuntu-20.04
109109
needs: base_image_ubuntu2004
110110
steps:
111111
- name: Checkout
@@ -136,7 +136,7 @@ jobs:
136136
base_image_ubuntu2204:
137137
if: github.repository == 'intel/llvm'
138138
name: Base Ubuntu 22.04 Docker image
139-
runs-on: ubuntu-latest
139+
runs-on: ubuntu-20.04
140140
steps:
141141
- name: Checkout
142142
uses: actions/checkout@v3
@@ -155,7 +155,7 @@ jobs:
155155
build_image_ubuntu2204:
156156
if: github.repository == 'intel/llvm'
157157
name: Build Ubuntu Docker image
158-
runs-on: ubuntu-latest
158+
runs-on: ubuntu-20.04
159159
steps:
160160
- name: Checkout
161161
uses: actions/checkout@v3
@@ -177,7 +177,7 @@ jobs:
177177
drivers_image_ubuntu2204:
178178
if: github.repository == 'intel/llvm'
179179
name: Intel Drivers Ubuntu 22.04 Docker image
180-
runs-on: ubuntu-latest
180+
runs-on: ubuntu-20.04
181181
needs: base_image_ubuntu2204
182182
steps:
183183
- name: Checkout
@@ -215,7 +215,7 @@ jobs:
215215
drivers_image_ubuntu2204_unstable:
216216
if: github.repository == 'intel/llvm'
217217
name: Intel Drivers (unstable) Ubuntu 22.04 Docker image
218-
runs-on: ubuntu-latest
218+
runs-on: ubuntu-20.04
219219
needs: base_image_ubuntu2204
220220
steps:
221221
- name: Checkout

.github/workflows/sycl_gen_test_matrix.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ on:
4646
jobs:
4747
test_matrix:
4848
name: Generate Test Matrix
49-
runs-on: ubuntu-latest
49+
runs-on: ubuntu-20.04
5050
outputs:
5151
lts_matrix: ${{ steps.work.outputs.lts_matrix }}
5252
cts_matrix: ${{ steps.work.outputs.cts_matrix }}

.github/workflows/sycl_stale_issues.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66

77
jobs:
88
close-issues:
9-
runs-on: ubuntu-latest
9+
runs-on: ubuntu-20.04
1010
steps:
1111
- uses: actions/stale@v4
1212
with:

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ LANGOPT(ObjCDisableDirectMethodsForTesting, 1, 0,
350350
"Disable recognition of objc_direct methods")
351351
LANGOPT(CFProtectionBranch , 1, 0, "Control-Flow Branch Protection enabled")
352352
LANGOPT(FakeAddressSpaceMap , 1, 0, "OpenCL fake address space map")
353+
LANGOPT(OpenCLForceVectorABI, 1, 0, "OpenCL vector to scalar coercion disabling")
353354
ENUM_LANGOPT(AddressSpaceMapMangling , AddrSpaceMapMangling, 2, ASMM_Target, "OpenCL address space map mangling mode")
354355
LANGOPT(IncludeDefaultHeader, 1, 0, "Include default header file for OpenCL")
355356
LANGOPT(DeclareOpenCLBuiltins, 1, 0, "Declare OpenCL builtin functions")

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6474,6 +6474,9 @@ defm const_strings : BoolOption<"f", "const-strings",
64746474
def fno_bitfield_type_align : Flag<["-"], "fno-bitfield-type-align">,
64756475
HelpText<"Ignore bit-field types when aligning structures">,
64766476
MarshallingInfoFlag<LangOpts<"NoBitFieldTypeAlign">>;
6477+
def fopencl_force_vector_abi : Flag<["-"], "fopencl-force-vector-abi">,
6478+
HelpText<"Disable vector to scalar coercion for OpenCL">,
6479+
MarshallingInfoFlag<LangOpts<"OpenCLForceVectorABI">>;
64776480
def ffake_address_space_map : Flag<["-"], "ffake-address-space-map">,
64786481
HelpText<"Use a fake address space map; OpenCL testing purposes only">,
64796482
MarshallingInfoFlag<LangOpts<"FakeAddressSpaceMap">>;

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
169169
Builder.defineMacro("__PTX__");
170170
Builder.defineMacro("__NVPTX__");
171171
if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || Opts.SYCLIsDevice) {
172-
// Set __CUDA_ARCH__ for the GPU specified.
172+
// Set __CUDA_ARCH__ or __SYCL_CUDA_ARCH__ for the GPU specified.
173+
// The SYCL-specific macro is used to distinguish the SYCL and CUDA APIs.
173174
std::string CUDAArchCode = [this] {
174175
switch (GPU) {
175176
case CudaArch::GFX600:
@@ -260,7 +261,12 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
260261
}
261262
llvm_unreachable("unhandled CudaArch");
262263
}();
263-
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
264+
265+
if (Opts.SYCLIsDevice) {
266+
Builder.defineMacro("__SYCL_CUDA_ARCH__", CUDAArchCode);
267+
} else {
268+
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
269+
}
264270
}
265271
}
266272

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,41 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
100100
return Address::invalid();
101101
}
102102

103+
static ABIArgInfo classifyOpenCL(QualType Ty, ASTContext &Context) {
104+
if (Ty->isVoidType())
105+
return ABIArgInfo::getIgnore();
106+
107+
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
108+
Ty = EnumTy->getDecl()->getIntegerType();
109+
110+
if (const RecordType *RT = Ty->getAs<RecordType>())
111+
return ABIArgInfo::getIndirect(Context.getTypeAlignInChars(RT),
112+
/*ByVal=*/false);
113+
114+
if (Context.isPromotableIntegerType(Ty))
115+
return ABIArgInfo::getExtend(Ty);
116+
117+
return ABIArgInfo::getDirect();
118+
}
119+
120+
static bool doOpenCLClassification(CGFunctionInfo &FI, ASTContext &Context) {
121+
if (!Context.getLangOpts().OpenCL)
122+
return false;
123+
if (!Context.getLangOpts().OpenCLForceVectorABI)
124+
return false;
125+
126+
// Use OpenCL classify to prevent coercing.
127+
// Vector ABI must be enforced by enabling the corresponding option.
128+
// Otherwise, vector types will be coerced to a matching integer
129+
// type to conform with ABI, e.g.: <8 x i8> will be coerced to i64.
130+
FI.getReturnInfo() = classifyOpenCL(FI.getReturnType(), Context);
131+
132+
for (auto &Arg : FI.arguments())
133+
Arg.info = classifyOpenCL(Arg.type, Context);
134+
135+
return true;
136+
}
137+
103138
static llvm::Type *getVAListElementType(CodeGenFunction &CGF) {
104139
return CGF.ConvertTypeForMem(
105140
CGF.getContext().getBuiltinVaListType()->getPointeeType());
@@ -1984,6 +2019,10 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
19842019
}
19852020

19862021
void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
2022+
ASTContext &Context = getContext();
2023+
if (doOpenCLClassification(FI, Context))
2024+
return;
2025+
19872026
CCState State(FI);
19882027
if (IsMCUABI)
19892028
State.FreeRegs = 3;
@@ -3970,6 +4009,9 @@ X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
39704009
}
39714010

39724011
void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
4012+
ASTContext &Context = getContext();
4013+
if (doOpenCLClassification(FI, Context))
4014+
return;
39734015

39744016
const unsigned CallingConv = FI.getCallingConvention();
39754017
// It is possible to force Win64 calling convention on any x86_64 target by
@@ -4427,6 +4469,10 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
44274469
}
44284470

44294471
void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
4472+
ASTContext &Context = getContext();
4473+
if (doOpenCLClassification(FI, Context))
4474+
return;
4475+
44304476
const unsigned CC = FI.getCallingConvention();
44314477
bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
44324478
bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -901,17 +901,14 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args,
901901
OptNoTriple = A->getOption().matches(Opt);
902902
if (A->getOption().matches(Opt_EQ)) {
903903
// Passing device args: -X<Opt>=<triple> -opt=val.
904-
if (getDriver().MakeSYCLDeviceTriple(A->getValue()) != getTriple())
904+
StringRef GenDevice = SYCL::gen::resolveGenDevice(A->getValue());
905+
if (getDriver().MakeSYCLDeviceTriple(A->getValue()) != getTriple() &&
906+
GenDevice.empty())
905907
// Provided triple does not match current tool chain.
906908
continue;
907-
if (getTriple().isSPIR() &&
908-
getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) {
909-
if (Device.empty() && StringRef(A->getValue()).startswith("intel_gpu"))
910-
continue;
911-
if (!Device.empty() &&
912-
getDriver().MakeSYCLDeviceTriple(A->getValue()) == getTriple())
913-
continue;
914-
}
909+
if (Device != GenDevice && getTriple().isSPIR() &&
910+
getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)
911+
continue;
915912
} else if (!OptNoTriple)
916913
// Don't worry about any of the other args, we only want to pass what is
917914
// passed in -X<Opt>

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3928,6 +3928,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
39283928
}
39293929
}
39303930

3931+
Opts.OpenCLForceVectorABI = Args.hasArg(OPT_fopencl_force_vector_abi);
3932+
39313933
// Check if -fopenmp is specified and set default version to 5.0.
39323934
Opts.OpenMP = Args.hasArg(OPT_fopenmp) ? 50 : 0;
39333935
// Check if -fopenmp-simd is specified.

clang/test/CodeGen/sycl-instrumentation-option.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// RUN: %clang_cc1 -fsycl-instrument-device-code -triple spir64_gen-unknown-unknown %s -emit-llvm -o - 2>&1 | FileCheck %s
66
// RUN: %clang_cc1 -fsycl-instrument-device-code -triple spir64_fpga-unknown-unknown %s -emit-llvm -o - 2>&1 | FileCheck %s
77
// RUN: %clang_cc1 -fsycl-instrument-device-code -triple spir64_x86_64-unknown-unknown %s -emit-llvm -o - 2>&1 | FileCheck %s
8-
// CHECK-NOT: error
8+
// CHECK-NOT: error:
99

1010
// RUN: not %clang_cc1 -fsycl-instrument-device-code -triple spirv32 -emit-llvm %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-ERR
1111
// RUN: not %clang_cc1 -fsycl-instrument-device-code -triple spirv64 -emit-llvm %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-ERR
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: %clang_cc1 -x cl -triple i686-pc-win32-gnu -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
2+
// RUN: %clang_cc1 -x cl -triple x86_64-unknown-linux -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
3+
// RUN: %clang_cc1 -x cl -triple x86_64-pc-win32-gnu -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
4+
5+
// RUN: %clang_cc1 -x cl -triple i686-pc-win32-gnu %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER32CL
6+
// RUN: %clang_cc1 -x cl -triple x86_64-unknown-linux %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER64
7+
// RUN: %clang_cc1 -x cl -triple x86_64-pc-win32-gnu %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
8+
9+
// RUN: %clang_cc1 -x c -triple i686-pc-win32-gnu %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER32
10+
// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER64
11+
// RUN: %clang_cc1 -x c -triple x86_64-pc-win32-gnu %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER-C-WIN
12+
13+
typedef unsigned short ushort;
14+
typedef ushort ushort4 __attribute__((ext_vector_type(4)));
15+
16+
typedef unsigned long ulong;
17+
typedef ulong ulong4 __attribute__((ext_vector_type(4)));
18+
19+
ulong4 __attribute__((const)) __attribute__((overloadable)) convert_ulong4_rte(ushort4 x)
20+
{
21+
return 1;
22+
}
23+
24+
// NOCOER: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(<4 x i16> noundef %{{.*}})
25+
// NOCOER-C-WIN: define {{.*}}<4 x i32> @_Z18convert_ulong4_rteDv4_t(<4 x i16> noundef %{{.*}})
26+
// COER32CL: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(i64 noundef %{{.*}})
27+
// COER32: define {{.*}}<4 x i32> @_Z18convert_ulong4_rteDv4_t(i64 noundef %{{.*}})
28+
// FIXME: <4 x i16> should be coerced to i64 instead of double
29+
// COER64: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(double noundef %{{.*}})

0 commit comments

Comments
 (0)