|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
1 | 2 | ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
2 | 3 | ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
3 | 4 |
|
4 |
| -; GCN-LABEL: {{^}}main: |
5 |
| -; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} |
6 |
| -; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1 |
7 | 5 | define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
|
| 6 | +; SI-LABEL: main: |
| 7 | +; SI: ; %bb.0: ; %bb |
| 8 | +; SI-NEXT: v_cvt_i32_f32_e32 v0, v0 |
| 9 | +; SI-NEXT: s_mov_b32 s0, 0 |
| 10 | +; SI-NEXT: s_mov_b32 s1, s0 |
| 11 | +; SI-NEXT: s_mov_b32 s2, s0 |
| 12 | +; SI-NEXT: s_mov_b32 s3, s0 |
| 13 | +; SI-NEXT: s_mov_b32 s4, s0 |
| 14 | +; SI-NEXT: s_mov_b32 s5, s0 |
| 15 | +; SI-NEXT: s_mov_b32 s6, s0 |
| 16 | +; SI-NEXT: s_mov_b32 s7, s0 |
| 17 | +; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm |
| 18 | +; SI-NEXT: v_and_b32_e32 v0, 7, v0 |
| 19 | +; SI-NEXT: v_lshl_b32_e32 v0, 1, v0 |
| 20 | +; SI-NEXT: s_waitcnt vmcnt(0) |
| 21 | +; SI-NEXT: v_and_b32_e32 v0, v2, v0 |
| 22 | +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| 23 | +; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc |
| 24 | +; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, s0, v0 |
| 25 | +; SI-NEXT: ; return to shader part epilog |
| 26 | +; |
| 27 | +; VI-LABEL: main: |
| 28 | +; VI: ; %bb.0: ; %bb |
| 29 | +; VI-NEXT: v_cvt_i32_f32_e32 v0, v0 |
| 30 | +; VI-NEXT: s_mov_b32 s0, 0 |
| 31 | +; VI-NEXT: s_mov_b32 s1, s0 |
| 32 | +; VI-NEXT: s_mov_b32 s2, s0 |
| 33 | +; VI-NEXT: s_mov_b32 s3, s0 |
| 34 | +; VI-NEXT: s_mov_b32 s4, s0 |
| 35 | +; VI-NEXT: s_mov_b32 s5, s0 |
| 36 | +; VI-NEXT: s_mov_b32 s6, s0 |
| 37 | +; VI-NEXT: s_mov_b32 s7, s0 |
| 38 | +; VI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm |
| 39 | +; VI-NEXT: v_and_b32_e32 v0, 7, v0 |
| 40 | +; VI-NEXT: v_lshlrev_b32_e64 v0, v0, 1 |
| 41 | +; VI-NEXT: s_waitcnt vmcnt(0) |
| 42 | +; VI-NEXT: v_and_b32_e32 v0, v2, v0 |
| 43 | +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| 44 | +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc |
| 45 | +; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, s0, v0 |
| 46 | +; VI-NEXT: ; return to shader part epilog |
8 | 47 | bb:
|
9 | 48 | %tmp = fptosi float %arg0 to i32
|
10 | 49 | %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
|
|
0 commit comments