@@ -56,3 +56,145 @@ if.true:
56
56
%val = load volatile i32 , i32 addrspace (1 )* undef
57
57
br label %endif
58
58
}
59
+
60
+ ; Make sure and 1 is inserted on llvm.amdgcn.if
61
+ define i32 @divergent_if_nonboolean_condition0 (i32 %value ) {
62
+ ; CHECK-LABEL: divergent_if_nonboolean_condition0:
63
+ ; CHECK: ; %bb.0: ; %entry
64
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65
+ ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
66
+ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
67
+ ; CHECK-NEXT: ; implicit-def: $vgpr0
68
+ ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
69
+ ; CHECK-NEXT: s_cbranch_execz BB2_2
70
+ ; CHECK-NEXT: ; %bb.1: ; %if.true
71
+ ; CHECK-NEXT: global_load_dword v0, v[0:1], off
72
+ ; CHECK-NEXT: BB2_2: ; %endif
73
+ ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
74
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
75
+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
76
+ entry:
77
+ %c = trunc i32 %value to i1
78
+ br i1 %c , label %if.true , label %endif
79
+
80
+ if.true:
81
+ %val = load volatile i32 , i32 addrspace (1 )* undef
82
+ br label %endif
83
+
84
+ endif:
85
+ %v = phi i32 [ %val , %if.true ], [ undef , %entry ]
86
+ ret i32 %v
87
+ }
88
+
89
+ ; Make sure and 1 is inserted on llvm.amdgcn.if
90
+ define i32 @divergent_if_nonboolean_condition1 (i32 addrspace (1 )* %ptr ) {
91
+ ; CHECK-LABEL: divergent_if_nonboolean_condition1:
92
+ ; CHECK: ; %bb.0: ; %entry
93
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94
+ ; CHECK-NEXT: global_load_dword v0, v[0:1], off
95
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
96
+ ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
97
+ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
98
+ ; CHECK-NEXT: ; implicit-def: $vgpr0
99
+ ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
100
+ ; CHECK-NEXT: s_cbranch_execz BB3_2
101
+ ; CHECK-NEXT: ; %bb.1: ; %if.true
102
+ ; CHECK-NEXT: global_load_dword v0, v[0:1], off
103
+ ; CHECK-NEXT: BB3_2: ; %endif
104
+ ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
105
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
106
+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
107
+ entry:
108
+ %value = load i32 , i32 addrspace (1 )* %ptr
109
+ %c = trunc i32 %value to i1
110
+ br i1 %c , label %if.true , label %endif
111
+
112
+ if.true:
113
+ %val = load volatile i32 , i32 addrspace (1 )* undef
114
+ br label %endif
115
+
116
+ endif:
117
+ %v = phi i32 [ %val , %if.true ], [ undef , %entry ]
118
+ ret i32 %v
119
+ }
120
+
121
+ @external_constant = external addrspace (4 ) constant i32 , align 4
122
+ @const.ptr = external addrspace (4 ) constant float *, align 4
123
+
124
+ ; Make sure this case compiles. G_ICMP was mis-mapped due to having
125
+ ; the result register class constrained by llvm.amdgcn.if lowering.
126
+ define void @constrained_if_register_class () {
127
+ ; CHECK-LABEL: constrained_if_register_class:
128
+ ; CHECK: ; %bb.0: ; %bb
129
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130
+ ; CHECK-NEXT: s_getpc_b64 s[4:5]
131
+ ; CHECK-NEXT: s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4
132
+ ; CHECK-NEXT: s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+4
133
+ ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
134
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
135
+ ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x0
136
+ ; CHECK-NEXT: s_getpc_b64 s[4:5]
137
+ ; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
138
+ ; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+4
139
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
140
+ ; CHECK-NEXT: s_cmp_lg_u32 s6, 0
141
+ ; CHECK-NEXT: s_cselect_b32 s6, 1, 0
142
+ ; CHECK-NEXT: s_and_b32 s6, s6, 1
143
+ ; CHECK-NEXT: s_cmp_lg_u32 s6, 0
144
+ ; CHECK-NEXT: s_cbranch_scc1 BB4_6
145
+ ; CHECK-NEXT: ; %bb.1: ; %bb2
146
+ ; CHECK-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
147
+ ; CHECK-NEXT: s_mov_b32 s4, -1
148
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
149
+ ; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
150
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
151
+ ; CHECK-NEXT: v_mov_b32_e32 v0, s6
152
+ ; CHECK-NEXT: v_mov_b32_e32 v1, s7
153
+ ; CHECK-NEXT: flat_load_dword v0, v[0:1]
154
+ ; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1
155
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
156
+ ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
157
+ ; CHECK-NEXT: s_xor_b64 s[8:9], vcc, s[6:7]
158
+ ; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
159
+ ; CHECK-NEXT: ; %bb.2: ; %bb7
160
+ ; CHECK-NEXT: s_mov_b32 s4, 0
161
+ ; CHECK-NEXT: ; %bb.3: ; %bb8
162
+ ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
163
+ ; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], s4, 0
164
+ ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[6:7]
165
+ ; CHECK-NEXT: s_cbranch_execz BB4_5
166
+ ; CHECK-NEXT: ; %bb.4: ; %bb11
167
+ ; CHECK-NEXT: v_mov_b32_e32 v0, 4.0
168
+ ; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], s33 offen
169
+ ; CHECK-NEXT: BB4_5: ; %Flow
170
+ ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
171
+ ; CHECK-NEXT: BB4_6: ; %bb12
172
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
173
+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
174
+ bb:
175
+ %tmp = load i32 , i32 addrspace (4 )* @external_constant
176
+ %ptr = load float *, float * addrspace (4 )* @const.ptr
177
+ %tmp1 = icmp ne i32 %tmp , 0
178
+ br i1 %tmp1 , label %bb12 , label %bb2
179
+
180
+ bb2:
181
+ %tmp4 = load float , float * %ptr , align 4
182
+ %tmp5 = fcmp olt float %tmp4 , 1 .0
183
+ %tmp6 = or i1 %tmp5 , false
184
+ br i1 %tmp6 , label %bb8 , label %bb7
185
+
186
+ bb7:
187
+ br label %bb8
188
+
189
+ bb8:
190
+ %tmp9 = phi i32 [ 0 , %bb7 ], [ -1 , %bb2 ]
191
+ %tmp10 = icmp eq i32 %tmp9 , 0
192
+ br i1 %tmp10 , label %bb11 , label %bb12
193
+
194
+ bb11:
195
+ store float 4 .0 , float addrspace (5 )* undef , align 4
196
+ br label %bb12
197
+
198
+ bb12:
199
+ ret void
200
+ }
0 commit comments