|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc -march=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=DEFAULT %s |
| 3 | +; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-waitcnt-load-forcezero < %s | FileCheck --check-prefixes=LDZERO %s |
| 4 | + |
| 5 | +define amdgpu_kernel void @copy(ptr addrspace(1) noalias nocapture readonly %src1, ptr addrspace(1) noalias nocapture readonly %src2, ptr addrspace(1) noalias nocapture writeonly %dst1, ptr addrspace(1) noalias nocapture writeonly %dst2) { |
| 6 | +; DEFAULT-LABEL: copy: |
| 7 | +; DEFAULT: ; %bb.0: |
| 8 | +; DEFAULT-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 |
| 9 | +; DEFAULT-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 10 | +; DEFAULT-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 11 | +; DEFAULT-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 12 | +; DEFAULT-NEXT: s_waitcnt lgkmcnt(0) |
| 13 | +; DEFAULT-NEXT: s_clause 0x1 |
| 14 | +; DEFAULT-NEXT: global_load_b32 v1, v0, s[0:1] |
| 15 | +; DEFAULT-NEXT: global_load_b32 v2, v0, s[2:3] |
| 16 | +; DEFAULT-NEXT: s_waitcnt vmcnt(1) |
| 17 | +; DEFAULT-NEXT: global_store_b32 v0, v1, s[4:5] |
| 18 | +; DEFAULT-NEXT: s_waitcnt vmcnt(0) |
| 19 | +; DEFAULT-NEXT: global_store_b32 v0, v2, s[6:7] |
| 20 | +; DEFAULT-NEXT: s_endpgm |
| 21 | +; |
| 22 | +; LDZERO-LABEL: copy: |
| 23 | +; LDZERO: ; %bb.0: |
| 24 | +; LDZERO-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 |
| 25 | +; LDZERO-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 26 | +; LDZERO-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 27 | +; LDZERO-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 28 | +; LDZERO-NEXT: s_waitcnt lgkmcnt(0) |
| 29 | +; LDZERO-NEXT: s_clause 0x1 |
| 30 | +; LDZERO-NEXT: global_load_b32 v1, v0, s[0:1] |
| 31 | +; LDZERO-NEXT: global_load_b32 v2, v0, s[2:3] |
| 32 | +; LDZERO-NEXT: s_waitcnt vmcnt(0) |
| 33 | +; LDZERO-NEXT: s_clause 0x1 |
| 34 | +; LDZERO-NEXT: global_store_b32 v0, v1, s[4:5] |
| 35 | +; LDZERO-NEXT: global_store_b32 v0, v2, s[6:7] |
| 36 | +; LDZERO-NEXT: s_endpgm |
| 37 | + %id = tail call i32 @llvm.amdgcn.workitem.id.x() |
| 38 | + %idx = zext i32 %id to i64 |
| 39 | + %gep.ld1 = getelementptr inbounds nuw float, ptr addrspace(1) %src1, i64 %idx |
| 40 | + %v1 = load float, ptr addrspace(1) %gep.ld1, align 4 |
| 41 | + %gep.ld2 = getelementptr inbounds nuw float, ptr addrspace(1) %src2, i64 %idx |
| 42 | + %v2 = load float, ptr addrspace(1) %gep.ld2, align 4 |
| 43 | + %gep.st1 = getelementptr inbounds nuw float, ptr addrspace(1) %dst1, i64 %idx |
| 44 | + store float %v1, ptr addrspace(1) %gep.st1, align 4 |
| 45 | + %gep.st2 = getelementptr inbounds nuw float, ptr addrspace(1) %dst2, i64 %idx |
| 46 | + store float %v2, ptr addrspace(1) %gep.st2, align 4 |
| 47 | + ret void |
| 48 | +} |
0 commit comments