Skip to content

Commit 49d6e39

Browse files
authored
[AMDGPU] add tests for loop definition of bitconvert (#133052)
This PR add test cases for all types of bit conversation, it prepares for PR: #132899 All tests passed due to: 1. For DAG, pattern will not separate SReg and VReg. One of the sample is: ``` define <2 x double> @v_bitcast_v4f32_to_v2f64(<4 x float> inreg %a, i32 %b) { %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false cmp.true: %a1 = fadd <4 x float> %a, splat (float 1.000000e+00) %a2 = bitcast <4 x float> %a1 to <2 x double> br label %end cmp.false: %a3 = bitcast <4 x float> %a to <2 x double> br label %end end: %phi = phi <2 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ] ret <2 x double> %phi } ``` It suppose to select from scalar register patterns. But the Vreg pattern is matched is as follow: ``` Debug log: ISEL: Starting selection on root node: t3: v2f64 = bitcast t2 ISEL: Starting pattern match Initial Opcode index to 440336 Skipped scope entry (due to false predicate) at index 440339, continuing at 440367 Skipped scope entry (due to false predicate) at index 440368, continuing at 440396 Skipped scope entry (due to false predicate) at index 440397, continuing at 440435 Skipped scope entry (due to false predicate) at index 440436, continuing at 440467 Skipped scope entry (due to false predicate) at index 440468, continuing at 440499 Skipped scope entry (due to false predicate) at index 440500, continuing at 440552 Skipped scope entry (due to false predicate) at index 440553, continuing at 440587 Skipped scope entry (due to false predicate) at index 440588, continuing at 440622 Skipped scope entry (due to false predicate) at index 440623, continuing at 440657 Skipped scope entry (due to false predicate) at index 440658, continuing at 440692 Skipped scope entry (due to false predicate) at index 440693, continuing at 440727 Skipped scope entry (due to false predicate) at index 440728, continuing at 440769 Skipped scope entry (due to false predicate) at index 440770, continuing at 440798 Skipped scope entry (due to false predicate) at index 440799, continuing at 440836 Skipped scope entry (due to false predicate) at index 440837, continuing at 440870 TypeSwitch[v2f64] from 440873 to 440892 Patterns: /*440892*/ OPC_CompleteMatch, 1, 0, // Src: (bitconvert:{ *:[v2f64] } VReg_128:{ *:[v4f32] }:$src0) - Complexity = 3 // Dst: VReg_128:{ *:[v2f64] }:$src0 ``` 2. Global isel will use `Select_COPY` to select bitcast
1 parent ac42b08 commit 49d6e39

26 files changed

+219198
-33893
lines changed

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 108541 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll

Lines changed: 11387 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
6+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
7+
8+
define <5 x float> @bitcast_v5i32_to_v5f32(<5 x i32> %a, i32 %b) {
9+
; GCN-LABEL: bitcast_v5i32_to_v5f32:
10+
; GCN: ; %bb.0:
11+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
13+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
14+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
15+
; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
16+
; GCN-NEXT: s_cbranch_execz .LBB0_2
17+
; GCN-NEXT: ; %bb.1: ; %cmp.true
18+
; GCN-NEXT: v_add_i32_e32 v4, vcc, 3, v4
19+
; GCN-NEXT: v_add_i32_e32 v3, vcc, 3, v3
20+
; GCN-NEXT: v_add_i32_e32 v2, vcc, 3, v2
21+
; GCN-NEXT: v_add_i32_e32 v1, vcc, 3, v1
22+
; GCN-NEXT: v_add_i32_e32 v0, vcc, 3, v0
23+
; GCN-NEXT: .LBB0_2: ; %end
24+
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
25+
; GCN-NEXT: s_setpc_b64 s[30:31]
26+
;
27+
; VI-LABEL: bitcast_v5i32_to_v5f32:
28+
; VI: ; %bb.0:
29+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30+
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
31+
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
32+
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
33+
; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
34+
; VI-NEXT: ; %bb.1: ; %cmp.true
35+
; VI-NEXT: v_add_u32_e32 v4, vcc, 3, v4
36+
; VI-NEXT: v_add_u32_e32 v3, vcc, 3, v3
37+
; VI-NEXT: v_add_u32_e32 v2, vcc, 3, v2
38+
; VI-NEXT: v_add_u32_e32 v1, vcc, 3, v1
39+
; VI-NEXT: v_add_u32_e32 v0, vcc, 3, v0
40+
; VI-NEXT: ; %bb.2: ; %end
41+
; VI-NEXT: s_or_b64 exec, exec, s[4:5]
42+
; VI-NEXT: s_setpc_b64 s[30:31]
43+
;
44+
; GFX9-LABEL: bitcast_v5i32_to_v5f32:
45+
; GFX9: ; %bb.0:
46+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47+
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
48+
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
49+
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
50+
; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
51+
; GFX9-NEXT: ; %bb.1: ; %cmp.true
52+
; GFX9-NEXT: v_add_u32_e32 v4, 3, v4
53+
; GFX9-NEXT: v_add_u32_e32 v3, 3, v3
54+
; GFX9-NEXT: v_add_u32_e32 v2, 3, v2
55+
; GFX9-NEXT: v_add_u32_e32 v1, 3, v1
56+
; GFX9-NEXT: v_add_u32_e32 v0, 3, v0
57+
; GFX9-NEXT: ; %bb.2: ; %end
58+
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
59+
; GFX9-NEXT: s_setpc_b64 s[30:31]
60+
;
61+
; GFX11-LABEL: bitcast_v5i32_to_v5f32:
62+
; GFX11: ; %bb.0:
63+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+
; GFX11-NEXT: s_mov_b32 s0, exec_lo
65+
; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v5
66+
; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
67+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
68+
; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
69+
; GFX11-NEXT: ; %bb.1: ; %cmp.true
70+
; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v4
71+
; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v3
72+
; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v2
73+
; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v1
74+
; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v0
75+
; GFX11-NEXT: ; %bb.2: ; %end
76+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
77+
; GFX11-NEXT: s_setpc_b64 s[30:31]
78+
%cmp = icmp eq i32 %b, 0
79+
br i1 %cmp, label %cmp.true, label %cmp.false
80+
81+
cmp.true:
82+
%a1 = add <5 x i32> %a, splat (i32 3)
83+
%a2 = bitcast <5 x i32> %a1 to <5 x float>
84+
br label %end
85+
86+
cmp.false:
87+
%a3 = bitcast <5 x i32> %a to <5 x float>
88+
br label %end
89+
90+
end:
91+
%phi = phi <5 x float> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
92+
ret <5 x float> %phi
93+
}
94+
95+
define <5 x i32> @bitcast_v5f32_to_v5i32(<5 x float> %a, i32 %b) {
96+
; GCN-LABEL: bitcast_v5f32_to_v5i32:
97+
; GCN: ; %bb.0:
98+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
100+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
101+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
102+
; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
103+
; GCN-NEXT: s_cbranch_execz .LBB1_2
104+
; GCN-NEXT: ; %bb.1: ; %cmp.true
105+
; GCN-NEXT: v_add_f32_e32 v4, 1.0, v4
106+
; GCN-NEXT: v_add_f32_e32 v3, 1.0, v3
107+
; GCN-NEXT: v_add_f32_e32 v2, 1.0, v2
108+
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
109+
; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
110+
; GCN-NEXT: .LBB1_2: ; %end
111+
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
112+
; GCN-NEXT: s_setpc_b64 s[30:31]
113+
;
114+
; VI-LABEL: bitcast_v5f32_to_v5i32:
115+
; VI: ; %bb.0:
116+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117+
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
118+
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
119+
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
120+
; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
121+
; VI-NEXT: ; %bb.1: ; %cmp.true
122+
; VI-NEXT: v_add_f32_e32 v4, 1.0, v4
123+
; VI-NEXT: v_add_f32_e32 v3, 1.0, v3
124+
; VI-NEXT: v_add_f32_e32 v2, 1.0, v2
125+
; VI-NEXT: v_add_f32_e32 v1, 1.0, v1
126+
; VI-NEXT: v_add_f32_e32 v0, 1.0, v0
127+
; VI-NEXT: ; %bb.2: ; %end
128+
; VI-NEXT: s_or_b64 exec, exec, s[4:5]
129+
; VI-NEXT: s_setpc_b64 s[30:31]
130+
;
131+
; GFX9-LABEL: bitcast_v5f32_to_v5i32:
132+
; GFX9: ; %bb.0:
133+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134+
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
135+
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
136+
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
137+
; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
138+
; GFX9-NEXT: ; %bb.1: ; %cmp.true
139+
; GFX9-NEXT: v_add_f32_e32 v4, 1.0, v4
140+
; GFX9-NEXT: v_add_f32_e32 v3, 1.0, v3
141+
; GFX9-NEXT: v_add_f32_e32 v2, 1.0, v2
142+
; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
143+
; GFX9-NEXT: v_add_f32_e32 v0, 1.0, v0
144+
; GFX9-NEXT: ; %bb.2: ; %end
145+
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
146+
; GFX9-NEXT: s_setpc_b64 s[30:31]
147+
;
148+
; GFX11-LABEL: bitcast_v5f32_to_v5i32:
149+
; GFX11: ; %bb.0:
150+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151+
; GFX11-NEXT: s_mov_b32 s0, exec_lo
152+
; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v5
153+
; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
154+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
155+
; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
156+
; GFX11-NEXT: ; %bb.1: ; %cmp.true
157+
; GFX11-NEXT: v_dual_add_f32 v4, 1.0, v4 :: v_dual_add_f32 v3, 1.0, v3
158+
; GFX11-NEXT: v_dual_add_f32 v2, 1.0, v2 :: v_dual_add_f32 v1, 1.0, v1
159+
; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
160+
; GFX11-NEXT: ; %bb.2: ; %end
161+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
162+
; GFX11-NEXT: s_setpc_b64 s[30:31]
163+
%cmp = icmp eq i32 %b, 0
164+
br i1 %cmp, label %cmp.true, label %cmp.false
165+
166+
cmp.true:
167+
%a1 = fadd <5 x float> %a, splat (float 1.000000e+00)
168+
%a2 = bitcast <5 x float> %a1 to <5 x i32>
169+
br label %end
170+
171+
cmp.false:
172+
%a3 = bitcast <5 x float> %a to <5 x i32>
173+
br label %end
174+
175+
end:
176+
%phi = phi <5 x i32> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
177+
ret <5 x i32> %phi
178+
}

0 commit comments

Comments
 (0)