Skip to content

Commit 7839be8

Browse files
committed
Add end-to-end test
1 parent 02279b4 commit 7839be8

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -o - < %s | FileCheck -check-prefix=CHECK %s
2+
3+
; CHECK-NOT: v_lshlrev_b32_sdwa v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4+
5+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
6+
target triple = "amdgcn-amd-amdhsa"
7+
8+
define amdgpu_kernel void @kernel(ptr addrspace(1) %input.coerce, i32 %0, i1 %cmp3.i, i32 %add5.1, ptr addrspace(3) %1, ptr addrspace(3) %2) {
9+
; CHECK-LABEL: kernel:
10+
; CHECK-NEXT: ; %bb.0: ; %entry
11+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
12+
; CHECK-NEXT: v_mov_b32_e32 v2, 8
13+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
14+
; CHECK-NEXT: s_clause 0x1
15+
; CHECK-NEXT: global_load_ushort v1, v0, s[0:1]
16+
; CHECK-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2
17+
; CHECK-NEXT: s_bitcmp1_b32 s3, 0
18+
; CHECK-NEXT: s_cselect_b32 s3, -1, 0
19+
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s3
20+
; CHECK-NEXT: s_waitcnt vmcnt(1)
21+
; CHECK-NEXT: v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
22+
; CHECK-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
23+
; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1
24+
; CHECK-NEXT: s_waitcnt vmcnt(0)
25+
; CHECK-NEXT: v_lshl_or_b32 v0, v0, 16, v1
26+
; CHECK-NEXT: s_cbranch_vccz .LBB0_2
27+
; CHECK-NEXT: ; %bb.1: ; %if.then.i
28+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
29+
; CHECK-NEXT: ds_write_b32 v1, v1
30+
; CHECK-NEXT: .LBB0_2: ; %if.end.i
31+
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
32+
; CHECK-NEXT: s_mov_b32 s3, exec_lo
33+
; CHECK-NEXT: v_cmpx_ne_u16_e32 0, v1
34+
; CHECK-NEXT: s_xor_b32 s3, exec_lo, s3
35+
; CHECK-NEXT: s_cbranch_execz .LBB0_4
36+
; CHECK-NEXT: ; %bb.3: ; %if.then.i.i.i.i.i
37+
; CHECK-NEXT: v_mov_b32_e32 v2, 2
38+
; CHECK-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
39+
; CHECK-NEXT: v_mov_b32_e32 v2, s2
40+
; CHECK-NEXT: ds_write_b32 v1, v2 offset:84
41+
42+
entry:
43+
%3 = tail call i32 @llvm.amdgcn.workitem.id.x()
44+
%idxprom = zext i32 %3 to i64
45+
%arrayidx = getelementptr i8, ptr addrspace(1) %input.coerce, i64 %idxprom
46+
%4 = load i8, ptr addrspace(1) %arrayidx, align 1
47+
%add5.11 = or disjoint i32 %3, 1
48+
%idxprom.1 = zext i32 %add5.11 to i64
49+
%arrayidx.1 = getelementptr i8, ptr addrspace(1) %input.coerce, i64 %idxprom.1
50+
%5 = load i8, ptr addrspace(1) %arrayidx.1, align 1
51+
%add5.2 = or disjoint i32 %3, 2
52+
%idxprom.2 = zext i32 %add5.2 to i64
53+
%arrayidx.2 = getelementptr i8, ptr addrspace(1) %input.coerce, i64 %idxprom.2
54+
%6 = load i8, ptr addrspace(1) %arrayidx.2, align 1
55+
br i1 %cmp3.i, label %if.then.i, label %if.end.i
56+
57+
if.then.i.i.i.i.i: ; preds = %if.end.i
58+
%7 = zext i8 %6 to i32
59+
%arrayidx7.i.i.i.i.i = getelementptr nusw [14 x i32], ptr addrspace(3) inttoptr (i32 84 to ptr addrspace(3)), i32 0, i32 %7
60+
store i32 %0, ptr addrspace(3) %arrayidx7.i.i.i.i.i, align 4
61+
br label %func.exit.i.i.i
62+
63+
func.exit.i.i.i: ; preds = %if.end.i, %if.then.i.i.i.i.i
64+
%8 = zext i8 %5 to i32
65+
%arrayidx7.i.i.1.i.i.i = getelementptr [14 x i32], ptr addrspace(3) %1, i32 0, i32 %8
66+
store i32 0, ptr addrspace(3) %arrayidx7.i.i.1.i.i.i, align 4
67+
%9 = zext i8 %4 to i32
68+
%arrayidx12.i = getelementptr [14 x i32], ptr addrspace(3) %2, i32 0, i32 %9
69+
store i32 0, ptr addrspace(3) %arrayidx12.i, align 4
70+
store i32 0, ptr addrspace(1) %input.coerce, align 4
71+
ret void
72+
73+
if.then.i: ; preds = %entry
74+
store i32 0, ptr addrspace(3) null, align 4
75+
br label %if.end.i
76+
77+
if.end.i: ; preds = %if.then.i, %entry
78+
%cmp.not.i.i.i.i.not.i = icmp eq i8 %6, 0
79+
br i1 %cmp.not.i.i.i.i.not.i, label %func.exit.i.i.i, label %if.then.i.i.i.i.i
80+
}
81+
82+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
83+
declare noundef i32 @llvm.amdgcn.workitem.id.x() #0
84+
85+
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

0 commit comments

Comments
 (0)