Skip to content

Commit 3e36490

Browse files
author
Leon Clark
committed
[AMDGPU] Add tests for vector rebroadcast.
1 parent 235cea7 commit 3e36490

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
4+
5+
define <4 x float> @rebroadcast_v4f32(ptr addrspace(1) %arg0) {
6+
; GFX9-LABEL: rebroadcast_v4f32:
7+
; GFX9: ; %bb.0: ; %entry
8+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
10+
; GFX9-NEXT: s_waitcnt vmcnt(0)
11+
; GFX9-NEXT: v_mov_b32_e32 v0, v1
12+
; GFX9-NEXT: v_mov_b32_e32 v2, v1
13+
; GFX9-NEXT: v_mov_b32_e32 v3, v1
14+
; GFX9-NEXT: s_setpc_b64 s[30:31]
15+
;
16+
; GFX10-LABEL: rebroadcast_v4f32:
17+
; GFX10: ; %bb.0: ; %entry
18+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19+
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
20+
; GFX10-NEXT: s_waitcnt vmcnt(0)
21+
; GFX10-NEXT: v_mov_b32_e32 v0, v1
22+
; GFX10-NEXT: v_mov_b32_e32 v2, v1
23+
; GFX10-NEXT: v_mov_b32_e32 v3, v1
24+
; GFX10-NEXT: s_setpc_b64 s[30:31]
25+
;
26+
; GFX11-LABEL: rebroadcast_v4f32:
27+
; GFX11: ; %bb.0: ; %entry
28+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
30+
; GFX11-NEXT: s_waitcnt vmcnt(0)
31+
; GFX11-NEXT: v_mov_b32_e32 v0, v1
32+
; GFX11-NEXT: v_mov_b32_e32 v2, v1
33+
; GFX11-NEXT: v_mov_b32_e32 v3, v1
34+
; GFX11-NEXT: s_setpc_b64 s[30:31]
35+
entry:
36+
%val0 = load <4 x float>, ptr addrspace(1) %arg0
37+
%val1 = shufflevector <4 x float> %val0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
38+
ret <4 x float> %val1
39+
}

0 commit comments

Comments
 (0)