|
| 1 | +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +// REQUIRES: amdgpu-registered-target |
| 3 | +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx950 -emit-llvm -fcuda-is-device -o - %s | FileCheck %s |
| 4 | + |
| 5 | +// COM: Most tests are in the OpenCL semastics, this is just a verification for HIP |
| 6 | + |
| 7 | +#define __device__ __attribute__((device)) |
| 8 | +#define __shared__ __attribute__((shared)) |
| 9 | + |
| 10 | +typedef unsigned int u32; |
| 11 | + |
| 12 | +// CHECK-LABEL: define dso_local void @_Z20test_load_to_lds_u32PjS_( |
| 13 | +// CHECK-SAME: ptr noundef [[SRC:%.*]], ptr noundef [[DST:%.*]]) #[[ATTR0:[0-9]+]] { |
| 14 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 15 | +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 16 | +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 17 | +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr |
| 18 | +// CHECK-NEXT: [[DST_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST_ADDR]] to ptr |
| 19 | +// CHECK-NEXT: store ptr [[SRC]], ptr [[SRC_ADDR_ASCAST]], align 8 |
| 20 | +// CHECK-NEXT: store ptr [[DST]], ptr [[DST_ADDR_ASCAST]], align 8 |
| 21 | +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_ADDR_ASCAST]], align 8 |
| 22 | +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DST_ADDR_ASCAST]], align 8 |
| 23 | +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3) |
| 24 | +// CHECK-NEXT: call void @llvm.amdgcn.load.to.lds.p0(ptr [[TMP0]], ptr addrspace(3) [[TMP2]], i32 4, i32 0, i32 0) |
| 25 | +// CHECK-NEXT: ret void |
| 26 | +// |
| 27 | +__device__ void test_load_to_lds_u32(u32* src, __shared__ u32 *dst) { |
| 28 | + __builtin_amdgcn_load_to_lds(src, dst, /*size=*/4, /*offset=*/0, /*aux=*/0); |
| 29 | +} |
| 30 | + |
| 31 | +// CHECK-LABEL: define dso_local void @_Z20test_load_to_lds_128PvS_( |
| 32 | +// CHECK-SAME: ptr noundef [[SRC:%.*]], ptr noundef [[DST:%.*]]) #[[ATTR0]] { |
| 33 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 34 | +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 35 | +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 36 | +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr |
| 37 | +// CHECK-NEXT: [[DST_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST_ADDR]] to ptr |
| 38 | +// CHECK-NEXT: store ptr [[SRC]], ptr [[SRC_ADDR_ASCAST]], align 8 |
| 39 | +// CHECK-NEXT: store ptr [[DST]], ptr [[DST_ADDR_ASCAST]], align 8 |
| 40 | +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_ADDR_ASCAST]], align 8 |
| 41 | +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DST_ADDR_ASCAST]], align 8 |
| 42 | +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3) |
| 43 | +// CHECK-NEXT: call void @llvm.amdgcn.load.to.lds.p0(ptr [[TMP0]], ptr addrspace(3) [[TMP2]], i32 16, i32 0, i32 0) |
| 44 | +// CHECK-NEXT: ret void |
| 45 | +// |
| 46 | +__device__ void test_load_to_lds_128(void* src, __shared__ void *dst) { |
| 47 | + __builtin_amdgcn_load_to_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); |
| 48 | +} |
0 commit comments