Skip to content

Commit a71b462

Browse files
committed
AMDGPU: Add gfx9 run line to scalar_to_vector test
1 parent acbd822 commit a71b462

File tree

1 file changed

+93
-11
lines changed

1 file changed

+93
-11
lines changed

llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI
3-
; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI
3+
; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX89,VI
4+
; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -| FileCheck %s --check-prefixes=GFX89,GFX9
45

56
; XXX - Why the packing?
67
define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
@@ -43,6 +44,27 @@ define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr add
4344
; VI-NEXT: v_mov_b32_e32 v1, v0
4445
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4546
; VI-NEXT: s_endpgm
47+
;
48+
; GFX9-LABEL: scalar_to_vector_v2i32:
49+
; GFX9: ; %bb.0:
50+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
51+
; GFX9-NEXT: s_mov_b32 s7, 0xf000
52+
; GFX9-NEXT: s_mov_b32 s6, -1
53+
; GFX9-NEXT: s_mov_b32 s10, s6
54+
; GFX9-NEXT: s_mov_b32 s11, s7
55+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
56+
; GFX9-NEXT: s_mov_b32 s8, s2
57+
; GFX9-NEXT: s_mov_b32 s9, s3
58+
; GFX9-NEXT: buffer_load_dword v0, off, s[8:11], 0
59+
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff0000
60+
; GFX9-NEXT: s_mov_b32 s4, s0
61+
; GFX9-NEXT: s_mov_b32 s5, s1
62+
; GFX9-NEXT: s_waitcnt vmcnt(0)
63+
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
64+
; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
65+
; GFX9-NEXT: v_mov_b32_e32 v1, v0
66+
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
67+
; GFX9-NEXT: s_endpgm
4668
%tmp1 = load i32, ptr addrspace(1) %in, align 4
4769
%bc = bitcast i32 %tmp1 to <2 x i16>
4870
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -90,6 +112,27 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(ptr addrspace(1) %out, ptr add
90112
; VI-NEXT: v_mov_b32_e32 v1, v0
91113
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
92114
; VI-NEXT: s_endpgm
115+
;
116+
; GFX9-LABEL: scalar_to_vector_v2f32:
117+
; GFX9: ; %bb.0:
118+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
119+
; GFX9-NEXT: s_mov_b32 s7, 0xf000
120+
; GFX9-NEXT: s_mov_b32 s6, -1
121+
; GFX9-NEXT: s_mov_b32 s10, s6
122+
; GFX9-NEXT: s_mov_b32 s11, s7
123+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
124+
; GFX9-NEXT: s_mov_b32 s8, s2
125+
; GFX9-NEXT: s_mov_b32 s9, s3
126+
; GFX9-NEXT: buffer_load_dword v0, off, s[8:11], 0
127+
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff0000
128+
; GFX9-NEXT: s_mov_b32 s4, s0
129+
; GFX9-NEXT: s_mov_b32 s5, s1
130+
; GFX9-NEXT: s_waitcnt vmcnt(0)
131+
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
132+
; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
133+
; GFX9-NEXT: v_mov_b32_e32 v1, v0
134+
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
135+
; GFX9-NEXT: s_endpgm
93136
%tmp1 = load float, ptr addrspace(1) %in, align 4
94137
%bc = bitcast float %tmp1 to <2 x i16>
95138
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -130,6 +173,23 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
130173
; VI-NEXT: v_mov_b32_e32 v1, s0
131174
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
132175
; VI-NEXT: s_endpgm
176+
;
177+
; GFX9-LABEL: scalar_to_vector_v4i16:
178+
; GFX9: ; %bb.0: ; %bb
179+
; GFX9-NEXT: s_mov_b32 s3, 0xf000
180+
; GFX9-NEXT: s_mov_b32 s2, -1
181+
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
182+
; GFX9-NEXT: s_waitcnt vmcnt(0)
183+
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
184+
; GFX9-NEXT: s_lshl_b32 s1, s0, 8
185+
; GFX9-NEXT: s_or_b32 s0, s0, s1
186+
; GFX9-NEXT: s_and_b32 s1, s0, 0xffff
187+
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
188+
; GFX9-NEXT: s_or_b32 s0, s1, s0
189+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
190+
; GFX9-NEXT: v_mov_b32_e32 v1, s0
191+
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
192+
; GFX9-NEXT: s_endpgm
133193
bb:
134194
%tmp = load <2 x i8>, ptr addrspace(1) undef, align 1
135195
%tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -176,6 +236,28 @@ define amdgpu_kernel void @scalar_to_vector_v4f16() {
176236
; VI-NEXT: v_mov_b32_e32 v1, s1
177237
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
178238
; VI-NEXT: s_endpgm
239+
;
240+
; GFX9-LABEL: scalar_to_vector_v4f16:
241+
; GFX9: ; %bb.0: ; %bb
242+
; GFX9-NEXT: s_mov_b32 s3, 0xf000
243+
; GFX9-NEXT: s_mov_b32 s2, -1
244+
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
245+
; GFX9-NEXT: s_waitcnt vmcnt(0)
246+
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
247+
; GFX9-NEXT: s_lshl_b32 s1, s0, 8
248+
; GFX9-NEXT: s_or_b32 s0, s1, s0
249+
; GFX9-NEXT: s_and_b32 s1, s0, 0xff00
250+
; GFX9-NEXT: s_bfe_u32 s4, s0, 0x80008
251+
; GFX9-NEXT: s_or_b32 s1, s4, s1
252+
; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
253+
; GFX9-NEXT: s_and_b32 s4, s1, 0xffff
254+
; GFX9-NEXT: s_lshl_b32 s1, s1, 16
255+
; GFX9-NEXT: s_or_b32 s4, s4, s1
256+
; GFX9-NEXT: s_or_b32 s0, s0, s1
257+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
258+
; GFX9-NEXT: v_mov_b32_e32 v1, s4
259+
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
260+
; GFX9-NEXT: s_endpgm
179261
bb:
180262
%load = load half, ptr addrspace(1) undef, align 1
181263
%tmp = bitcast half %load to <2 x i8>
@@ -235,16 +317,16 @@ define amdgpu_kernel void @scalar_to_vector_test6(ptr addrspace(1) %out, i8 zero
235317
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
236318
; SI-NEXT: s_endpgm
237319
;
238-
; VI-LABEL: scalar_to_vector_test6:
239-
; VI: ; %bb.0:
240-
; VI-NEXT: s_load_dword s6, s[4:5], 0x2c
241-
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
242-
; VI-NEXT: s_mov_b32 s3, 0xf000
243-
; VI-NEXT: s_mov_b32 s2, -1
244-
; VI-NEXT: s_waitcnt lgkmcnt(0)
245-
; VI-NEXT: v_mov_b32_e32 v0, s6
246-
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
247-
; VI-NEXT: s_endpgm
320+
; GFX89-LABEL: scalar_to_vector_test6:
321+
; GFX89: ; %bb.0:
322+
; GFX89-NEXT: s_load_dword s6, s[4:5], 0x2c
323+
; GFX89-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
324+
; GFX89-NEXT: s_mov_b32 s3, 0xf000
325+
; GFX89-NEXT: s_mov_b32 s2, -1
326+
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
327+
; GFX89-NEXT: v_mov_b32_e32 v0, s6
328+
; GFX89-NEXT: buffer_store_dword v0, off, s[0:3], 0
329+
; GFX89-NEXT: s_endpgm
248330
%newvec0 = insertelement <4 x i8> undef, i8 %val, i32 0
249331
%bc = bitcast <4 x i8> %newvec0 to <2 x half>
250332
store <2 x half> %bc, ptr addrspace(1) %out

0 commit comments

Comments
 (0)