Skip to content

Commit ee2ca45

Browse files
Merge remote-tracking branch 'whitneywhtsang/WI7956' into sycl-mlir
2 parents 70b6d9e + 9899121 commit ee2ca45

38 files changed

+129
-108
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4999,6 +4999,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
49994999
CmdArgs.push_back("-Wno-sycl-strict");
50005000
}
50015001

5002+
// Set O2 optimization level by default
5003+
if (!Args.getLastArg(options::OPT_O_Group))
5004+
CmdArgs.push_back("-O2");
5005+
50025006
// Add the integration header option to generate the header.
50035007
StringRef Header(D.getIntegrationHeader(Input.getBaseInput()));
50045008
if (!Header.empty()) {

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,9 @@ static bool FixupInvocation(CompilerInvocation &Invocation,
579579
static unsigned getOptimizationLevel(ArgList &Args, InputKind IK,
580580
DiagnosticsEngine &Diags) {
581581
unsigned DefaultOpt = llvm::CodeGenOpt::None;
582-
if (((IK.getLanguage() == Language::OpenCL ||
583-
IK.getLanguage() == Language::OpenCLCXX) &&
584-
!Args.hasArg(OPT_cl_opt_disable)) ||
585-
Args.hasArg(OPT_fsycl_is_device))
582+
if ((IK.getLanguage() == Language::OpenCL ||
583+
IK.getLanguage() == Language::OpenCLCXX) &&
584+
!Args.hasArg(OPT_cl_opt_disable))
586585
DefaultOpt = llvm::CodeGenOpt::Default;
587586

588587
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {

clang/test/CodeGenSYCL/address-space-cond-op.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,27 @@ struct S {
55
unsigned short x;
66
};
77

8-
// CHECK-LABEL: @_Z3foobR1SS_(
8+
// CHECK-LABEL: define {{[^@]+}}@_Z3foobR1SS_(
99
// CHECK: entry:
1010
// CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1
1111
// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca ptr addrspace(4), align 8
1212
// CHECK-NEXT: [[COND_ADDR_ASCAST:%.*]] = addrspacecast ptr [[COND_ADDR]] to ptr addrspace(4)
1313
// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast ptr [[LHS_ADDR]] to ptr addrspace(4)
1414
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND:%.*]] to i8
15-
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1, [[TBAA12:!tbaa !.*]]
16-
// CHECK-NEXT: store ptr addrspace(4) [[LHS:%.*]], ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8, [[TBAA5:!tbaa !.*]]
15+
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1
16+
// CHECK-NEXT: store ptr addrspace(4) [[LHS:%.*]], ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8
1717
// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast ptr [[RHS:%.*]] to ptr addrspace(4)
18-
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1, [[TBAA12]], [[RNG14:!range !.*]]
18+
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1
1919
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
2020
// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2121
// CHECK: cond.true:
22-
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8, [[TBAA5]]
22+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8
2323
// CHECK-NEXT: br label [[COND_END:%.*]]
2424
// CHECK: cond.false:
2525
// CHECK-NEXT: br label [[COND_END]]
2626
// CHECK: cond.end:
2727
// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi ptr addrspace(4) [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ]
28-
// CHECK-NEXT: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 2 %agg.result, ptr addrspace(4) align 2 [[COND_LVALUE]], i64 2, i1 false), !tbaa.struct !{{[0-9]+}}
28+
// CHECK-NEXT: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) align 2 [[COND_LVALUE]], i64 2, i1 false)
2929
// CHECK-NEXT: ret void
3030
//
3131
S foo(bool cond, S &lhs, S rhs) {

clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -O2 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
44
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],

clang/test/CodeGenSYCL/const-wg-init.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,5 @@ int main() {
1515
const int WG_CONST = 10;
1616
});
1717
// CHECK: store i32 10, ptr addrspace(4) addrspacecast (ptr addrspace(3) @{{.*}}WG_CONST{{.*}} to ptr addrspace(4))
18-
// CHECK: %{{[0-9]+}} = call ptr @llvm.invariant.start.p4(i64 4, ptr addrspace(4) addrspacecast (ptr addrspace(3) @{{.*}}WG_CONST{{.*}} to ptr addrspace(4)))
1918
return 0;
2019
}

clang/test/CodeGenSYCL/functionptr-addrspace.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
77
kernelFunc();
88
}
99

10-
// CHECK: define dso_local spir_func{{.*}}invoke_function{{.*}}(ptr nocapture noundef %fptr, ptr addrspace(4) nocapture noundef %ptr)
10+
// CHECK: define dso_local spir_func{{.*}}invoke_function{{.*}}(ptr noundef %fptr, ptr addrspace(4) noundef %ptr)
1111
void invoke_function(int (*fptr)(), int *ptr) {}
1212

1313
int f() { return 0; }

clang/test/CodeGenSYCL/group-local-memory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Check that SYCLLowerWGLocalMemory pass is added to the SYCL device
22
// compilation pipeline with the inliner pass (new Pass Manager).
33

4-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm \
4+
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O2 \
55
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
66
// RUN: | FileCheck %s -check-prefixes=CHECK-INL,CHECK
77

clang/test/CodeGenSYCL/inline_asm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -opaque-pointers -emit-llvm -x c++ %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -O1 -fsycl-is-device -triple spir64-unknown-unknown -opaque-pointers -emit-llvm -x c++ %s -o - | FileCheck %s
22

33
class kernel;
44

clang/test/CodeGenSYCL/inlining.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s
1+
// RUN: %clang_cc1 -O1 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s
2+
// RUN: %clang_cc1 -O0 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-O0
23

34
template <typename name, typename Func>
45
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
56
kernelFunc();
67
}
78

89
int main() {
10+
// CHECK-O0: noinline
911
// CHECK-NOT: noinline
1012
kernel_single_task<class kernel_function>([]() {});
1113
return 0;

clang/test/CodeGenSYCL/intel-fpga-loops.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -O2 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
22

33
// CHECK: br label %for.cond, !llvm.loop ![[MD_DLP:[0-9]+]]
44
// CHECK: br label %for.cond, !llvm.loop ![[MD_II:[0-9]+]]

clang/test/CodeGenSYCL/max-concurrency.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,22 @@
1515
// CHECK: %inc = add nsw i32 [[TMP2]], 1
1616
// CHECK: store i32 %inc, ptr addrspace(4) %i.ascast, align 4
1717
// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]]
18-
// CHECK: store i32 %inc10, ptr addrspace(4) %i1.ascast, align 4
18+
// CHECK: store i32 %inc8, ptr addrspace(4) %i1.ascast, align 4
1919
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]]
2020
// CHECK: ret void
2121

2222
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#[0-9]+]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
2323
// CHECK: entry:
2424
// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1
2525
// CHECK: [[F1_ASCAST:%.*]] = addrspacecast ptr [[F1]] to ptr addrspace(4)
26-
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F1]])
2726
// CHECK: call spir_func void @_ZNK8Functor1clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]])
28-
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F1]])
2927
// CHECK: ret void
3028

3129
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
3230
// CHECK: entry
3331
// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1
3432
// CHECK: [[F3_ASCAST:%.*]] = addrspacecast ptr [[F3]] to ptr addrspace(4)
35-
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F3]])
3633
// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]])
37-
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F3]]
3834
// CHECK: ret void
3935

4036
// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv
@@ -49,9 +45,7 @@
4945
// CHECK: entry:
5046
// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1
5147
// CHECK: [[H2:%.*]] = addrspacecast ptr [[H1]] to ptr addrspace(4)
52-
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[H1]])
5348
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[H2]])
54-
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[H1]])
5549
// CHECK: ret void
5650

5751
// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv

clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ void foo() {
3333

3434
// Store the int and the float into the struct created
3535
// CHECK: %x = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %__SYCLKernel{{.*}}, i32 0, i32 0
36-
// CHECK: %1 = load i32, i32 addrspace(4)* %_arg_x.addr
37-
// CHECK: store i32 %1, i32 addrspace(4)* %x
36+
// CHECK: %0 = load i32, i32 addrspace(4)* %_arg_x.addr
37+
// CHECK: store i32 %0, i32 addrspace(4)* %x
3838
// CHECK: %f2 = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %__SYCLKernel{{.*}}, i32 0, i32 1
39-
// CHECK: %2 = load float, float addrspace(4)* %_arg_f2.addr
40-
// CHECK: store float %2, float addrspace(4)* %f2
39+
// CHECK: %1 = load float, float addrspace(4)* %_arg_f2.addr
40+
// CHECK: store float %1, float addrspace(4)* %f2
4141

4242
// Call the lambda
4343
// CHECK: call spir_func void @{{.*}}foo{{.*}}(%class.anon addrspace(4)* {{.*}} %__SYCLKernel{{.*}})

clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,16 @@
99

1010
// CHECK: define{{.*}} void @_Z3fooDB4096_S_(i4096 addrspace(4)* {{.*}} sret(i4096) align 8 %agg.result, i4096* {{.*}} byval(i4096) align 8 %[[ARG1:[0-9]+]], i4096* {{.*}} byval(i4096) align 8 %[[ARG2:[0-9]+]])
1111
signed _BitInt(4096) foo(signed _BitInt(4096) a, signed _BitInt(4096) b) {
12-
// CHECK: %[[VAR_A:a]] = load i4096, i4096* %[[ARG1]], align 8
13-
// CHECK: %[[VAR_B:b]] = load i4096, i4096* %[[ARG2]], align 8
14-
// CHECK: %[[RES:div]] = sdiv i4096 %[[VAR_A]], %[[VAR_B]]
15-
// CHECK: store i4096 %[[RES]], i4096 addrspace(4)* %agg.result, align 8
12+
// CHECK: %a.addr.ascast = addrspacecast i4096* %a.addr to i4096 addrspace(4)*
13+
// CHECK: %b.addr.ascast = addrspacecast i4096* %b.addr to i4096 addrspace(4)*
14+
// CHECK: %a = load i4096, i4096* %[[ARG1]], align 8
15+
// CHECK: %b = load i4096, i4096* %[[ARG2]], align 8
16+
// CHECK: store i4096 %a, i4096 addrspace(4)* %a.addr.ascast, align 8
17+
// CHECK: store i4096 %b, i4096 addrspace(4)* %b.addr.ascast, align 8
18+
// CHECK: %2 = load i4096, i4096 addrspace(4)* %a.addr.ascast, align 8
19+
// CHECK: %3 = load i4096, i4096 addrspace(4)* %b.addr.ascast, align 8
20+
// CHECK: %div = sdiv i4096 %2, %3
21+
// CHECK: store i4096 %div, i4096 addrspace(4)* %agg.result, align 8
1622
// CHECK: ret void
1723
return a / b;
1824
}

clang/test/CodeGenSYCL/no_opaque_address-space-cond-op.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,29 @@ struct S {
55
unsigned short x;
66
};
77

8-
// CHECK-LABEL: @_Z3foobR1SS_(
8+
// CHECK-LABEL: define {{[^@]+}}@_Z3foobR1SS_(
99
// CHECK: entry:
1010
// CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1
11-
// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca [[STRUCT__ZTS1S_S:%.*]] addrspace(4)*, align 8
11+
// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca [[STRUCT_S:%.*]] addrspace(4)*, align 8
1212
// CHECK-NEXT: [[COND_ADDR_ASCAST:%.*]] = addrspacecast i8* [[COND_ADDR]] to i8 addrspace(4)*
13-
// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast [[STRUCT__ZTS1S_S]] addrspace(4)** [[LHS_ADDR]] to [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)*
13+
// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast [[STRUCT_S]] addrspace(4)** [[LHS_ADDR]] to [[STRUCT_S]] addrspace(4)* addrspace(4)*
1414
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND:%.*]] to i8
15-
// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1, [[TBAA12:!tbaa !.*]]
16-
// CHECK-NEXT: store [[STRUCT__ZTS1S_S]] addrspace(4)* [[LHS:%.*]], [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8, [[TBAA5:!tbaa !.*]]
17-
// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast %struct.S* [[RHS:%.*]] to [[STRUCT__ZTS1S_S]] addrspace(4)*
18-
// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1, [[TBAA12]], [[RNG14:!range !.*]]
15+
// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1
16+
// CHECK-NEXT: store [[STRUCT_S]] addrspace(4)* [[LHS:%.*]], [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8
17+
// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast %struct.S* [[RHS:%.*]] to [[STRUCT_S]] addrspace(4)*
18+
// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1
1919
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
2020
// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2121
// CHECK: cond.true:
22-
// CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT__ZTS1S_S]] addrspace(4)*, [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8, [[TBAA5]]
22+
// CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT_S]] addrspace(4)*, [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8
2323
// CHECK-NEXT: br label [[COND_END:%.*]]
2424
// CHECK: cond.false:
2525
// CHECK-NEXT: br label [[COND_END]]
2626
// CHECK: cond.end:
27-
// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi [[STRUCT__ZTS1S_S]] addrspace(4)* [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ]
28-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT__ZTS1S_S]] addrspace(4)* [[AGG_RESULT:%.*]] to i8 addrspace(4)*
29-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[STRUCT__ZTS1S_S]] addrspace(4)* [[COND_LVALUE]] to i8 addrspace(4)*
30-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 2 [[TMP2]], i8 addrspace(4)* align 2 [[TMP3]], i64 2, i1 false), !tbaa.struct !{{[0-9]+}}
27+
// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi [[STRUCT_S]] addrspace(4)* [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ]
28+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[AGG_RESULT:%.*]] to i8 addrspace(4)*
29+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[COND_LVALUE]] to i8 addrspace(4)*
30+
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 2 [[TMP2]], i8 addrspace(4)* align 2 [[TMP3]], i64 2, i1 false)
3131
// CHECK-NEXT: ret void
3232
//
3333
S foo(bool cond, S &lhs, S rhs) {

clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -O2 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
44
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],

clang/test/CodeGenSYCL/no_opaque_const-wg-init.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ int main() {
1515
const int WG_CONST = 10;
1616
});
1717
// CHECK: store i32 10, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @{{.*}}WG_CONST{{.*}} to i32 addrspace(4)*)
18-
// CHECK: %{{[0-9]+}} = call {}* @llvm.invariant.start.p4i8(i64 4, i8 addrspace(4)* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @{{.*}}WG_CONST{{.*}} to i8 addrspace(3)*) to i8 addrspace(4)*))
1918

2019
return 0;
2120
}

clang/test/CodeGenSYCL/no_opaque_inline_asm.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,20 @@ class kernel;
55
template <typename name, typename Func>
66
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
77
// CHECK: %[[ARRAY_A:[0-9a-z]+]] = alloca [100 x i32], align 4
8-
// CHECK: %[[IDX:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[ARRAY_A]], i64 0, i64 0
9-
// CHECK: %[[IDX4:.*]] = addrspacecast i32* %[[IDX]] to i32 addrspace(4)*
8+
// CHECK: %[[I:[0-9a-z]+]] = alloca i32, align 4
9+
// CHECK: %[[ARRAY_A]].ascast = addrspacecast [100 x i32]* %[[ARRAY_A]] to [100 x i32] addrspace(4)*
10+
// CHECK: %[[I]].ascast = addrspacecast i32* %[[I]] to i32 addrspace(4)*
11+
// CHECK: store i32 0, i32 addrspace(4)* %[[I]].ascast, align 4
12+
// CHECK: %0 = load i32, i32 addrspace(4)* %[[I]].ascast, align 4
13+
// CHECK: %[[IDXPROM:[0-9a-z]+]] = sext i32 %0 to i64
14+
// CHECK: %[[IDX:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(4)* %[[ARRAY_A]].ascast, i64 0, i64 %[[IDXPROM]]
1015
int a[100], i = 0;
1116
// CHECK-NEXT: call void asm sideeffect
1217
// CHECK: ".decl V52 v_type=G type=d num_elts=16 align=GRF
1318
// CHECK: svm_gather.4.1 (M1, 16) $0.0 V52.0
1419
// CHECK: add(M1, 16) V52(0, 0)<1> V52(0, 0)<1; 1, 0> 0x1
1520
// CHECK: svm_scatter.4.1 (M1, 16) $0.0 V52.0",
16-
// CHECK: "rw"(i32 addrspace(4)* %[[IDX4]])
21+
// CHECK: "rw"(i32 addrspace(4)* %[[IDX]])
1722
// TODO: nonnull attribute missing?
1823
asm volatile(".decl V52 v_type=G type=d num_elts=16 align=GRF\n"
1924
"svm_gather.4.1 (M1, 16) %0.0 V52.0\n"

clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -O2 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
#include "sycl.hpp"
44

clang/test/CodeGenSYCL/no_opaque_sampler.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
// CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8
66
// CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)*
77
// CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8
8-
// CHECK-NEXT: [[BITCAST:%[0-9]+]] = bitcast %class.anon* [[ANON]] to i8*
9-
// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) #4
108
// CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANONCAST]], i32 0, i32 0
119
// CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8
1210
// CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(%"class.sycl::_V1::sampler" addrspace(4)* {{[^,]*}} [[GEP]], %opencl.sampler_t addrspace(2)* [[LOAD_SAMPLER_ARG]])

clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ class Foo {
2626
int main() {
2727
q.submit([&](handler &h) {
2828
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]]
29-
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2{{.*}} !stall_enable ![[NUM4]]
29+
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 comdat align 2{{.*}} !stall_enable ![[NUM4]]
3030
h.single_task<class test_kernel1>(
3131
FuncObj());
3232

3333
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]]
34-
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2{{.*}} !stall_enable ![[NUM4]]
34+
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 comdat align 2{{.*}} !stall_enable ![[NUM4]]
3535
Foo f;
3636
h.single_task<class test_kernel2>(f);
3737

@@ -47,7 +47,7 @@ int main() {
4747
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4()
4848
// CHECK-NOT: !stall_enable
4949
// CHECK-SAME: {
50-
// CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #4 align 2{{.*}} !stall_enable ![[NUM4]]
50+
// CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 align 2{{.*}} !stall_enable ![[NUM4]]
5151
h.single_task<class test_kernel4>(
5252
[]() { func1(); });
5353

0 commit comments

Comments
 (0)