|
1 |
| -// RUN: %clangxx -O0 -fsycl -fsycl-device-only -Xclang -emit-llvm -Xclang -opaque-pointers %s -o %t |
| 1 | +// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm -Xclang -opaque-pointers %s -o %t |
2 | 2 | // RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
|
3 |
| -// RUN: FileCheck %s -input-file=%t_esimd_0.ll |
| 3 | +// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL |
| 4 | + |
| 5 | +// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm -Xclang -opaque-pointers %s -o %t |
| 6 | +// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table |
| 7 | +// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS |
4 | 8 |
|
5 | 9 | // Checks ESIMD intrinsic translation with opaque pointers.
|
6 | 10 | // NOTE: must be run in -O0, as optimizer optimizes away some of the code
|
@@ -266,31 +270,35 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
|
266 | 270 |
|
267 | 271 | // 4-byte element gather
|
268 | 272 | simd<int, 8> v = gather<int, 8>(acc, offsets, 100);
|
269 |
| - // CHECK: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) |
270 |
| - // CHECK: store i32 %[[SI3_VAL]], ptr addrspace(4) %[[SI3_ADDR:[0-9a-zA-Z_.]+]] |
271 |
| - // CHECK: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI3_ADDR]] |
272 |
| - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) |
| 273 | + // CHECK-STATEFUL: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) |
| 274 | + // CHECK-STATEFUL: store i32 %[[SI3_VAL]], ptr addrspace(4) %[[SI3_ADDR:[0-9a-zA-Z_.]+]] |
| 275 | + // CHECK-STATEFUL: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI3_ADDR]] |
| 276 | + // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) |
| 277 | + // CHECK-STATELESS: call <8 x i32> @llvm.genx.svm.gather.v8i32.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> undef) |
273 | 278 |
|
274 | 279 | // 4-byte element scatter
|
275 | 280 | scatter<int, 8>(acc, offsets, v, 100, pred);
|
276 |
| - // CHECK: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) |
277 |
| - // CHECK: store i32 %[[SI4_VAL]], ptr addrspace(4) %[[SI4_ADDR:[0-9a-zA-Z_.]+]] |
278 |
| - // CHECK: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI4_ADDR]] |
279 |
| - // CHECK: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) |
| 281 | + // CHECK-STATEFUL: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) |
| 282 | + // CHECK-STATEFUL: store i32 %[[SI4_VAL]], ptr addrspace(4) %[[SI4_ADDR:[0-9a-zA-Z_.]+]] |
| 283 | + // CHECK-STATEFUL: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI4_ADDR]] |
| 284 | + // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) |
| 285 | + // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) |
280 | 286 |
|
281 | 287 | // 1-byte element gather
|
282 | 288 | simd<unsigned char, 8> v1 = gather<unsigned char, 8>(acc, offsets, 100);
|
283 |
| - // CHECK: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) |
284 |
| - // CHECK: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]] |
285 |
| - // CHECK: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]] |
286 |
| - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) |
| 289 | + // CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) |
| 290 | + // CHECK-STATEFUL: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]] |
| 291 | + // CHECK-STATEFUL: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]] |
| 292 | + // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) |
| 293 | + // CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> undef) |
287 | 294 |
|
288 | 295 | // 1-byte element scatter
|
289 | 296 | scatter<unsigned char, 8>(acc, offsets, v1, 100, pred);
|
290 |
| - // CHECK: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) |
291 |
| - // CHECK: store i32 %[[SI6_VAL]], ptr addrspace(4) %[[SI6_ADDR:[0-9a-zA-Z_.]+]] |
292 |
| - // CHECK: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI6_ADDR]] |
293 |
| - // CHECK: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) |
| 297 | + // CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) |
| 298 | + // CHECK-STATEFUL: store i32 %[[SI6_VAL]], ptr addrspace(4) %[[SI6_ADDR:[0-9a-zA-Z_.]+]] |
| 299 | + // CHECK-STATEFUL: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI6_ADDR]] |
| 300 | + // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) |
| 301 | + // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v32i8(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> %{{[0-9a-zA-Z_.]+}}) |
294 | 302 | }
|
295 | 303 | __esimd_fence(fence_mask::global_coherent_fence);
|
296 | 304 | // CHECK: call void @llvm.genx.fence(i8 1)
|
|
0 commit comments