|
| 1 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s |
| 2 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s |
| 3 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s |
| 4 | + |
| 5 | +define <4 x float> @rebroadcast_v4f32(ptr addrspace(1) %arg0) { |
| 6 | +; GFX9-LABEL: rebroadcast_v4f32: |
| 7 | +; GFX9: ; %bb.0: ; %entry |
| 8 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 9 | +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| 10 | +; GFX9-NEXT: s_waitcnt vmcnt(0) |
| 11 | +; GFX9-NEXT: v_mov_b32_e32 v0, v1 |
| 12 | +; GFX9-NEXT: v_mov_b32_e32 v2, v1 |
| 13 | +; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| 14 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 15 | +; |
| 16 | +; GFX10-LABEL: rebroadcast_v4f32: |
| 17 | +; GFX10: ; %bb.0: ; %entry |
| 18 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 19 | +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| 20 | +; GFX10-NEXT: s_waitcnt vmcnt(0) |
| 21 | +; GFX10-NEXT: v_mov_b32_e32 v0, v1 |
| 22 | +; GFX10-NEXT: v_mov_b32_e32 v2, v1 |
| 23 | +; GFX10-NEXT: v_mov_b32_e32 v3, v1 |
| 24 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 25 | +; |
| 26 | +; GFX11-LABEL: rebroadcast_v4f32: |
| 27 | +; GFX11: ; %bb.0: ; %entry |
| 28 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 29 | +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off |
| 30 | +; GFX11-NEXT: s_waitcnt vmcnt(0) |
| 31 | +; GFX11-NEXT: v_mov_b32_e32 v0, v1 |
| 32 | +; GFX11-NEXT: v_mov_b32_e32 v2, v1 |
| 33 | +; GFX11-NEXT: v_mov_b32_e32 v3, v1 |
| 34 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 35 | +entry: |
| 36 | + %val0 = load <4 x float>, ptr addrspace(1) %arg0 |
| 37 | + %val1 = shufflevector <4 x float> %val0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
| 38 | + ret <4 x float> %val1 |
| 39 | +} |
0 commit comments