Skip to content

Commit d12bd25

Browse files
committed
DAG: Fix vector bin op scalarize defining a partially undef vector
This avoids some of the pending regressions after AMDGPU implements isExtractVecEltCheap. In a case like shl <value, undef>, splat k, because the second operand was fully defined, we would fall through and use the splat value for the first operand, losing the undef high bits. This would result in an additional instruction to handle the high bits. Add some reduced testcases for different opcodes for one of the regressions.
1 parent acbd822 commit d12bd25

File tree

2 files changed

+337
-2
lines changed

2 files changed

+337
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27525,8 +27525,12 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
2752527525
// If all lanes but 1 are undefined, no need to splat the scalar result.
2752627526
// TODO: Keep track of undefs and use that info in the general case.
2752727527
if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27528-
count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
27529-
count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
27528+
// This is assuming if either input is undef, the result will fold out.
27529+
//
27530+
// TODO: Do we need to check if the opcode/operand propagates undef?
27531+
// Should we ignore operation identity values?
27532+
((count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) ||
27533+
(count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1))) {
2753027534
// bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
2753127535
// build_vec ..undef, (bo X, Y), undef...
2753227536
SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));

llvm/test/CodeGen/AMDGPU/trunc-combine.ll

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,334 @@ define <2 x i16> @trunc_v2i64_arg_to_v2i16(<2 x i64> %arg0) #0 {
156156
%trunc = trunc <2 x i64> %arg0 to <2 x i16>
157157
ret <2 x i16> %trunc
158158
}
159+
160+
; Test for regression where an unnecessary v_alignbit_b32 was inserted
161+
; on the final result, due to losing the fact that the upper half of
162+
; the lhs vector was undef.
163+
define <2 x i16> @vector_trunc_high_bits_undef_lshr_lhs_alignbit_regression(i32 %arg0) {
164+
; SI-LABEL: vector_trunc_high_bits_undef_lshr_lhs_alignbit_regression:
165+
; SI: ; %bb.0:
166+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167+
; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
168+
; SI-NEXT: v_mov_b32_e32 v1, 0
169+
; SI-NEXT: s_setpc_b64 s[30:31]
170+
;
171+
; VI-LABEL: vector_trunc_high_bits_undef_lshr_lhs_alignbit_regression:
172+
; VI: ; %bb.0:
173+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174+
; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
175+
; VI-NEXT: s_setpc_b64 s[30:31]
176+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
177+
%lshr = lshr <2 x i32> %undef.hi.elt, splat (i32 16)
178+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
179+
ret <2 x i16> %trunc
180+
}
181+
182+
define <2 x i16> @vector_trunc_high_bits_undef_lshr_rhs_alignbit_regression(i32 %arg0) {
183+
; SI-LABEL: vector_trunc_high_bits_undef_lshr_rhs_alignbit_regression:
184+
; SI: ; %bb.0:
185+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186+
; SI-NEXT: v_lshr_b32_e32 v0, 16, v0
187+
; SI-NEXT: s_setpc_b64 s[30:31]
188+
;
189+
; VI-LABEL: vector_trunc_high_bits_undef_lshr_rhs_alignbit_regression:
190+
; VI: ; %bb.0:
191+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192+
; VI-NEXT: v_lshrrev_b32_e64 v0, v0, 16
193+
; VI-NEXT: s_setpc_b64 s[30:31]
194+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
195+
%lshr = lshr <2 x i32> splat (i32 16), %undef.hi.elt
196+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
197+
ret <2 x i16> %trunc
198+
}
199+
200+
define <2 x i16> @vector_trunc_high_bits_undef_ashr_lhs_alignbit_regression(i32 %arg0) {
201+
; SI-LABEL: vector_trunc_high_bits_undef_ashr_lhs_alignbit_regression:
202+
; SI: ; %bb.0:
203+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204+
; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
205+
; SI-NEXT: v_mov_b32_e32 v1, 0
206+
; SI-NEXT: s_setpc_b64 s[30:31]
207+
;
208+
; VI-LABEL: vector_trunc_high_bits_undef_ashr_lhs_alignbit_regression:
209+
; VI: ; %bb.0:
210+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211+
; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
212+
; VI-NEXT: s_setpc_b64 s[30:31]
213+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
214+
%ashr = ashr <2 x i32> %undef.hi.elt, splat (i32 16)
215+
%trunc = trunc <2 x i32> %ashr to <2 x i16>
216+
ret <2 x i16> %trunc
217+
}
218+
219+
define <2 x i16> @vector_trunc_high_bits_undef_ashr_rhs_alignbit_regression(i32 %arg0) {
220+
; SI-LABEL: vector_trunc_high_bits_undef_ashr_rhs_alignbit_regression:
221+
; SI: ; %bb.0:
222+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223+
; SI-NEXT: v_ashr_i32_e32 v0, -4, v0
224+
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
225+
; SI-NEXT: v_mov_b32_e32 v1, 0
226+
; SI-NEXT: s_setpc_b64 s[30:31]
227+
;
228+
; VI-LABEL: vector_trunc_high_bits_undef_ashr_rhs_alignbit_regression:
229+
; VI: ; %bb.0:
230+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231+
; VI-NEXT: v_ashrrev_i32_e64 v0, v0, -4
232+
; VI-NEXT: s_setpc_b64 s[30:31]
233+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
234+
%lshr = ashr <2 x i32> splat (i32 -4), %undef.hi.elt
235+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
236+
ret <2 x i16> %trunc
237+
}
238+
239+
define <2 x i16> @vector_trunc_high_bits_undef_add_lhs_alignbit_regression(i32 %arg0) {
240+
; SI-LABEL: vector_trunc_high_bits_undef_add_lhs_alignbit_regression:
241+
; SI: ; %bb.0:
242+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243+
; SI-NEXT: v_add_i32_e32 v0, vcc, 16, v0
244+
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
245+
; SI-NEXT: v_mov_b32_e32 v1, 0
246+
; SI-NEXT: s_setpc_b64 s[30:31]
247+
;
248+
; VI-LABEL: vector_trunc_high_bits_undef_add_lhs_alignbit_regression:
249+
; VI: ; %bb.0:
250+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251+
; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0
252+
; VI-NEXT: s_setpc_b64 s[30:31]
253+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
254+
%lshr = add <2 x i32> %undef.hi.elt, splat (i32 16)
255+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
256+
ret <2 x i16> %trunc
257+
}
258+
259+
define <2 x i16> @vector_trunc_high_bits_undef_shl_rhs_alignbit_regression(i32 %arg0) {
260+
; SI-LABEL: vector_trunc_high_bits_undef_shl_rhs_alignbit_regression:
261+
; SI: ; %bb.0:
262+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263+
; SI-NEXT: v_lshl_b32_e32 v0, 2, v0
264+
; SI-NEXT: v_and_b32_e32 v0, 0xfffe, v0
265+
; SI-NEXT: v_mov_b32_e32 v1, 0
266+
; SI-NEXT: s_setpc_b64 s[30:31]
267+
;
268+
; VI-LABEL: vector_trunc_high_bits_undef_shl_rhs_alignbit_regression:
269+
; VI: ; %bb.0:
270+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
271+
; VI-NEXT: v_lshlrev_b32_e64 v0, v0, 2
272+
; VI-NEXT: s_setpc_b64 s[30:31]
273+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
274+
%lshr = shl <2 x i32> splat (i32 2), %undef.hi.elt
275+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
276+
ret <2 x i16> %trunc
277+
}
278+
279+
define <2 x i16> @vector_trunc_high_bits_undef_sub_lhs_alignbit_regression(i32 %arg0) {
280+
; SI-LABEL: vector_trunc_high_bits_undef_sub_lhs_alignbit_regression:
281+
; SI: ; %bb.0:
282+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283+
; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0
284+
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
285+
; SI-NEXT: v_mov_b32_e32 v1, 0
286+
; SI-NEXT: s_setpc_b64 s[30:31]
287+
;
288+
; VI-LABEL: vector_trunc_high_bits_undef_sub_lhs_alignbit_regression:
289+
; VI: ; %bb.0:
290+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291+
; VI-NEXT: v_add_u32_e32 v0, vcc, -16, v0
292+
; VI-NEXT: s_setpc_b64 s[30:31]
293+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
294+
%lshr = sub <2 x i32> %undef.hi.elt, splat (i32 16)
295+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
296+
ret <2 x i16> %trunc
297+
}
298+
299+
define <2 x i16> @vector_trunc_high_bits_undef_or_lhs_alignbit_regression(i32 %arg0) {
300+
; SI-LABEL: vector_trunc_high_bits_undef_or_lhs_alignbit_regression:
301+
; SI: ; %bb.0:
302+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303+
; SI-NEXT: v_or_b32_e32 v0, 0xffff0011, v0
304+
; SI-NEXT: v_mov_b32_e32 v1, 0xffff
305+
; SI-NEXT: s_setpc_b64 s[30:31]
306+
;
307+
; VI-LABEL: vector_trunc_high_bits_undef_or_lhs_alignbit_regression:
308+
; VI: ; %bb.0:
309+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310+
; VI-NEXT: v_or_b32_e32 v0, 0xffff0011, v0
311+
; VI-NEXT: s_setpc_b64 s[30:31]
312+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
313+
%lshr = or <2 x i32> %undef.hi.elt, splat (i32 17)
314+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
315+
ret <2 x i16> %trunc
316+
}
317+
318+
define <2 x i16> @vector_trunc_high_bits_undef_xor_lhs_alignbit_regression(i32 %arg0) {
319+
; SI-LABEL: vector_trunc_high_bits_undef_xor_lhs_alignbit_regression:
320+
; SI: ; %bb.0:
321+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322+
; SI-NEXT: v_xor_b32_e32 v0, 17, v0
323+
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
324+
; SI-NEXT: v_mov_b32_e32 v1, 0
325+
; SI-NEXT: s_setpc_b64 s[30:31]
326+
;
327+
; VI-LABEL: vector_trunc_high_bits_undef_xor_lhs_alignbit_regression:
328+
; VI: ; %bb.0:
329+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330+
; VI-NEXT: v_xor_b32_e32 v0, 17, v0
331+
; VI-NEXT: s_setpc_b64 s[30:31]
332+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
333+
%lshr = xor <2 x i32> %undef.hi.elt, splat (i32 17)
334+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
335+
ret <2 x i16> %trunc
336+
}
337+
338+
define <2 x i16> @vector_trunc_high_bits_undef_shl_lhs_alignbit_regression(i32 %arg0) {
339+
; SI-LABEL: vector_trunc_high_bits_undef_shl_lhs_alignbit_regression:
340+
; SI: ; %bb.0:
341+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342+
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
343+
; SI-NEXT: v_and_b32_e32 v0, 0xfffc, v0
344+
; SI-NEXT: v_mov_b32_e32 v1, 0
345+
; SI-NEXT: s_setpc_b64 s[30:31]
346+
;
347+
; VI-LABEL: vector_trunc_high_bits_undef_shl_lhs_alignbit_regression:
348+
; VI: ; %bb.0:
349+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350+
; VI-NEXT: v_lshlrev_b16_e32 v0, 2, v0
351+
; VI-NEXT: s_setpc_b64 s[30:31]
352+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
353+
%shl = shl <2 x i32> %undef.hi.elt, splat (i32 2)
354+
%trunc = trunc <2 x i32> %shl to <2 x i16>
355+
ret <2 x i16> %trunc
356+
}
357+
358+
define <2 x i16> @vector_trunc_high_bits_undef_mul_lhs_alignbit_regression(i32 %arg0) {
359+
; SI-LABEL: vector_trunc_high_bits_undef_mul_lhs_alignbit_regression:
360+
; SI: ; %bb.0:
361+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362+
; SI-NEXT: v_mul_lo_u32 v0, v0, 18
363+
; SI-NEXT: v_mov_b32_e32 v1, 0
364+
; SI-NEXT: v_and_b32_e32 v0, 0xfffe, v0
365+
; SI-NEXT: s_setpc_b64 s[30:31]
366+
;
367+
; VI-LABEL: vector_trunc_high_bits_undef_mul_lhs_alignbit_regression:
368+
; VI: ; %bb.0:
369+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370+
; VI-NEXT: v_mul_lo_u32 v0, v0, 18
371+
; VI-NEXT: v_and_b32_e32 v0, 0xfffe, v0
372+
; VI-NEXT: s_setpc_b64 s[30:31]
373+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
374+
%lshr = mul <2 x i32> %undef.hi.elt, splat (i32 18)
375+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
376+
ret <2 x i16> %trunc
377+
}
378+
379+
define <2 x i16> @vector_trunc_high_bits_undef_sdiv_lhs_alignbit_regression(i32 %arg0) {
380+
; SI-LABEL: vector_trunc_high_bits_undef_sdiv_lhs_alignbit_regression:
381+
; SI: ; %bb.0:
382+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383+
; SI-NEXT: s_mov_b32 s4, 0x38e38e39
384+
; SI-NEXT: v_mul_hi_i32 v0, v0, s4
385+
; SI-NEXT: v_lshrrev_b32_e32 v1, 31, v0
386+
; SI-NEXT: v_lshrrev_b32_e32 v0, 2, v0
387+
; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
388+
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
389+
; SI-NEXT: v_mov_b32_e32 v1, 0
390+
; SI-NEXT: s_setpc_b64 s[30:31]
391+
;
392+
; VI-LABEL: vector_trunc_high_bits_undef_sdiv_lhs_alignbit_regression:
393+
; VI: ; %bb.0:
394+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395+
; VI-NEXT: s_mov_b32 s4, 0x38e38e39
396+
; VI-NEXT: v_mul_hi_i32 v0, v0, s4
397+
; VI-NEXT: v_lshrrev_b32_e32 v1, 31, v0
398+
; VI-NEXT: v_ashrrev_i32_e32 v0, 2, v0
399+
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
400+
; VI-NEXT: s_setpc_b64 s[30:31]
401+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
402+
%lshr = sdiv <2 x i32> %undef.hi.elt, splat (i32 18)
403+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
404+
ret <2 x i16> %trunc
405+
}
406+
407+
define <2 x i16> @vector_trunc_high_bits_undef_srem_lhs_alignbit_regression(i32 %arg0) {
408+
; SI-LABEL: vector_trunc_high_bits_undef_srem_lhs_alignbit_regression:
409+
; SI: ; %bb.0:
410+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411+
; SI-NEXT: s_mov_b32 s4, 0x38e38e39
412+
; SI-NEXT: v_mul_hi_i32 v1, v0, s4
413+
; SI-NEXT: v_lshrrev_b32_e32 v2, 31, v1
414+
; SI-NEXT: v_lshrrev_b32_e32 v1, 2, v1
415+
; SI-NEXT: v_add_i32_e32 v1, vcc, v1, v2
416+
; SI-NEXT: v_mul_lo_u32 v1, v1, 18
417+
; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
418+
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
419+
; SI-NEXT: v_mov_b32_e32 v1, 0
420+
; SI-NEXT: s_setpc_b64 s[30:31]
421+
;
422+
; VI-LABEL: vector_trunc_high_bits_undef_srem_lhs_alignbit_regression:
423+
; VI: ; %bb.0:
424+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425+
; VI-NEXT: s_mov_b32 s4, 0x38e38e39
426+
; VI-NEXT: v_mul_hi_i32 v1, v0, s4
427+
; VI-NEXT: v_lshrrev_b32_e32 v2, 31, v1
428+
; VI-NEXT: v_ashrrev_i32_e32 v1, 2, v1
429+
; VI-NEXT: v_add_u32_e32 v1, vcc, v1, v2
430+
; VI-NEXT: v_mul_lo_u32 v1, v1, 18
431+
; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
432+
; VI-NEXT: s_setpc_b64 s[30:31]
433+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
434+
%lshr = srem <2 x i32> %undef.hi.elt, splat (i32 18)
435+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
436+
ret <2 x i16> %trunc
437+
}
438+
439+
440+
define <2 x i16> @vector_trunc_high_bits_undef_udiv_lhs_alignbit_regression(i32 %arg0) {
441+
; SI-LABEL: vector_trunc_high_bits_undef_udiv_lhs_alignbit_regression:
442+
; SI: ; %bb.0:
443+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444+
; SI-NEXT: s_mov_b32 s4, 0x38e38e39
445+
; SI-NEXT: v_mul_hi_u32 v0, v0, s4
446+
; SI-NEXT: v_mov_b32_e32 v1, 0
447+
; SI-NEXT: v_bfe_u32 v0, v0, 2, 16
448+
; SI-NEXT: s_setpc_b64 s[30:31]
449+
;
450+
; VI-LABEL: vector_trunc_high_bits_undef_udiv_lhs_alignbit_regression:
451+
; VI: ; %bb.0:
452+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453+
; VI-NEXT: s_mov_b32 s4, 0x38e38e39
454+
; VI-NEXT: v_mul_hi_u32 v0, v0, s4
455+
; VI-NEXT: v_lshrrev_b32_e32 v0, 2, v0
456+
; VI-NEXT: s_setpc_b64 s[30:31]
457+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
458+
%lshr = udiv <2 x i32> %undef.hi.elt, splat (i32 18)
459+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
460+
ret <2 x i16> %trunc
461+
}
462+
463+
define <2 x i16> @vector_trunc_high_bits_undef_urem_lhs_alignbit_regression(i32 %arg0) {
464+
; SI-LABEL: vector_trunc_high_bits_undef_urem_lhs_alignbit_regression:
465+
; SI: ; %bb.0:
466+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
467+
; SI-NEXT: s_mov_b32 s4, 0x38e38e39
468+
; SI-NEXT: v_mul_hi_u32 v1, v0, s4
469+
; SI-NEXT: v_lshrrev_b32_e32 v1, 2, v1
470+
; SI-NEXT: v_mul_lo_u32 v1, v1, 18
471+
; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
472+
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
473+
; SI-NEXT: v_mov_b32_e32 v1, 0
474+
; SI-NEXT: s_setpc_b64 s[30:31]
475+
;
476+
; VI-LABEL: vector_trunc_high_bits_undef_urem_lhs_alignbit_regression:
477+
; VI: ; %bb.0:
478+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
479+
; VI-NEXT: s_mov_b32 s4, 0x38e38e39
480+
; VI-NEXT: v_mul_hi_u32 v1, v0, s4
481+
; VI-NEXT: v_lshrrev_b32_e32 v1, 2, v1
482+
; VI-NEXT: v_mul_lo_u32 v1, v1, 18
483+
; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
484+
; VI-NEXT: s_setpc_b64 s[30:31]
485+
%undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0
486+
%lshr = urem <2 x i32> %undef.hi.elt, splat (i32 18)
487+
%trunc = trunc <2 x i32> %lshr to <2 x i16>
488+
ret <2 x i16> %trunc
489+
}

0 commit comments

Comments
 (0)