Skip to content

Commit 5cedf25

Browse files
committed
Remove dead test and add test case to rotr.ll
1 parent 957644f commit 5cedf25

File tree

2 files changed

+128
-32
lines changed

2 files changed

+128
-32
lines changed

llvm/test/CodeGen/AMDGPU/rotr-v2i32.ll

Lines changed: 0 additions & 32 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/rotr.ll

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,134 @@ entry:
228228
ret void
229229
}
230230

231+
define amdgpu_kernel void @rotr_v8i32(ptr addrspace(1) %in, <8 x i32> %x, <8 x i32> %y) {
232+
; R600-LABEL: rotr_v8i32:
233+
; R600: ; %bb.0: ; %entry
234+
; R600-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[]
235+
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T3.X, 0
236+
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
237+
; R600-NEXT: CF_END
238+
; R600-NEXT: ALU clause starting at 4:
239+
; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[5].X, KC0[5].X, KC0[7].X,
240+
; R600-NEXT: BIT_ALIGN_INT * T0.Z, KC0[4].W, KC0[4].W, KC0[6].W,
241+
; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[4].Z, KC0[4].Z, KC0[6].Z,
242+
; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[4].Y, KC0[4].Y, KC0[6].Y,
243+
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
244+
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
245+
; R600-NEXT: BIT_ALIGN_INT * T2.W, KC0[6].X, KC0[6].X, KC0[8].X,
246+
; R600-NEXT: BIT_ALIGN_INT * T2.Z, KC0[5].W, KC0[5].W, KC0[7].W,
247+
; R600-NEXT: BIT_ALIGN_INT * T2.Y, KC0[5].Z, KC0[5].Z, KC0[7].Z,
248+
; R600-NEXT: BIT_ALIGN_INT * T2.X, KC0[5].Y, KC0[5].Y, KC0[7].Y,
249+
; R600-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
250+
; R600-NEXT: 16(2.242078e-44), 0(0.000000e+00)
251+
; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
252+
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
253+
;
254+
; SI-LABEL: rotr_v8i32:
255+
; SI: ; %bb.0: ; %entry
256+
; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x11
257+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
258+
; SI-NEXT: s_mov_b32 s3, 0xf000
259+
; SI-NEXT: s_mov_b32 s2, -1
260+
; SI-NEXT: s_waitcnt lgkmcnt(0)
261+
; SI-NEXT: v_mov_b32_e32 v0, s19
262+
; SI-NEXT: v_alignbit_b32 v3, s11, s11, v0
263+
; SI-NEXT: v_mov_b32_e32 v0, s18
264+
; SI-NEXT: v_alignbit_b32 v2, s10, s10, v0
265+
; SI-NEXT: v_mov_b32_e32 v0, s17
266+
; SI-NEXT: v_alignbit_b32 v1, s9, s9, v0
267+
; SI-NEXT: v_mov_b32_e32 v0, s16
268+
; SI-NEXT: v_alignbit_b32 v0, s8, s8, v0
269+
; SI-NEXT: v_mov_b32_e32 v4, s23
270+
; SI-NEXT: v_alignbit_b32 v7, s15, s15, v4
271+
; SI-NEXT: v_mov_b32_e32 v4, s22
272+
; SI-NEXT: v_alignbit_b32 v6, s14, s14, v4
273+
; SI-NEXT: v_mov_b32_e32 v4, s21
274+
; SI-NEXT: v_alignbit_b32 v5, s13, s13, v4
275+
; SI-NEXT: v_mov_b32_e32 v4, s20
276+
; SI-NEXT: v_alignbit_b32 v4, s12, s12, v4
277+
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
278+
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
279+
; SI-NEXT: s_endpgm
280+
;
281+
; GFX8-LABEL: rotr_v8i32:
282+
; GFX8: ; %bb.0: ; %entry
283+
; GFX8-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x44
284+
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
285+
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
286+
; GFX8-NEXT: v_mov_b32_e32 v1, s18
287+
; GFX8-NEXT: v_mov_b32_e32 v4, s17
288+
; GFX8-NEXT: v_alignbit_b32 v2, s10, s10, v1
289+
; GFX8-NEXT: v_alignbit_b32 v1, s9, s9, v4
290+
; GFX8-NEXT: v_mov_b32_e32 v4, s23
291+
; GFX8-NEXT: v_alignbit_b32 v7, s15, s15, v4
292+
; GFX8-NEXT: v_mov_b32_e32 v4, s22
293+
; GFX8-NEXT: s_add_u32 s2, s0, 16
294+
; GFX8-NEXT: v_alignbit_b32 v6, s14, s14, v4
295+
; GFX8-NEXT: v_mov_b32_e32 v4, s21
296+
; GFX8-NEXT: s_addc_u32 s3, s1, 0
297+
; GFX8-NEXT: v_alignbit_b32 v5, s13, s13, v4
298+
; GFX8-NEXT: v_mov_b32_e32 v4, s20
299+
; GFX8-NEXT: v_mov_b32_e32 v9, s3
300+
; GFX8-NEXT: v_mov_b32_e32 v0, s19
301+
; GFX8-NEXT: v_alignbit_b32 v4, s12, s12, v4
302+
; GFX8-NEXT: v_mov_b32_e32 v8, s2
303+
; GFX8-NEXT: v_alignbit_b32 v3, s11, s11, v0
304+
; GFX8-NEXT: v_mov_b32_e32 v0, s16
305+
; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
306+
; GFX8-NEXT: v_alignbit_b32 v0, s8, s8, v0
307+
; GFX8-NEXT: v_mov_b32_e32 v5, s1
308+
; GFX8-NEXT: v_mov_b32_e32 v4, s0
309+
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
310+
; GFX8-NEXT: s_endpgm
311+
;
312+
; GFX10-LABEL: rotr_v8i32:
313+
; GFX10: ; %bb.0: ; %entry
314+
; GFX10-NEXT: s_clause 0x1
315+
; GFX10-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x44
316+
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
317+
; GFX10-NEXT: v_mov_b32_e32 v8, 0
318+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
319+
; GFX10-NEXT: v_alignbit_b32 v7, s15, s15, s23
320+
; GFX10-NEXT: v_alignbit_b32 v6, s14, s14, s22
321+
; GFX10-NEXT: v_alignbit_b32 v5, s13, s13, s21
322+
; GFX10-NEXT: v_alignbit_b32 v4, s12, s12, s20
323+
; GFX10-NEXT: v_alignbit_b32 v3, s11, s11, s19
324+
; GFX10-NEXT: v_alignbit_b32 v2, s10, s10, s18
325+
; GFX10-NEXT: v_alignbit_b32 v1, s9, s9, s17
326+
; GFX10-NEXT: v_alignbit_b32 v0, s8, s8, s16
327+
; GFX10-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
328+
; GFX10-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
329+
; GFX10-NEXT: s_endpgm
330+
;
331+
; GFX11-LABEL: rotr_v8i32:
332+
; GFX11: ; %bb.0: ; %entry
333+
; GFX11-NEXT: s_clause 0x1
334+
; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x44
335+
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
336+
; GFX11-NEXT: v_mov_b32_e32 v8, 0
337+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
338+
; GFX11-NEXT: v_alignbit_b32 v7, s15, s15, s23
339+
; GFX11-NEXT: v_alignbit_b32 v6, s14, s14, s22
340+
; GFX11-NEXT: v_alignbit_b32 v5, s13, s13, s21
341+
; GFX11-NEXT: v_alignbit_b32 v4, s12, s12, s20
342+
; GFX11-NEXT: v_alignbit_b32 v3, s11, s11, s19
343+
; GFX11-NEXT: v_alignbit_b32 v2, s10, s10, s18
344+
; GFX11-NEXT: v_alignbit_b32 v1, s9, s9, s17
345+
; GFX11-NEXT: v_alignbit_b32 v0, s8, s8, s16
346+
; GFX11-NEXT: s_clause 0x1
347+
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
348+
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
349+
; GFX11-NEXT: s_endpgm
350+
entry:
351+
%tmp0 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, %y
352+
%tmp1 = shl <8 x i32> %x, %tmp0
353+
%tmp2 = lshr <8 x i32> %x, %y
354+
%tmp3 = or <8 x i32> %tmp1, %tmp2
355+
store <8 x i32> %tmp3, ptr addrspace(1) %in
356+
ret void
357+
}
358+
231359
declare i16 @llvm.fshr.i16(i16, i16, i16)
232360

233361
define void @test_rotr_i16(ptr addrspace(1) nocapture readonly %sourceA, ptr addrspace(1) nocapture readonly %sourceB, ptr addrspace(1) nocapture %destValues) {

0 commit comments

Comments
 (0)