Skip to content

Commit 7e72caf

Browse files
authored
[SelectionDAG] Add MaskedValueIsZero check to allow folding of zero extended variables we know are safe to extend (#85573)
Add ones for every high bit that will cleared. This will allow us to evaluate variables that have their bits known to see if they have no risk of overflow despite the shift amount being greater than the difference between the two types.
1 parent 8779edb commit 7e72caf

File tree

2 files changed

+139
-3
lines changed

2 files changed

+139
-3
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13832,11 +13832,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1383213832
if (N0.getOpcode() == ISD::SHL) {
1383313833
// If the original shl may be shifting out bits, do not perform this
1383413834
// transformation.
13835-
// TODO: Add MaskedValueIsZero check.
1383613835
unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
1383713836
ShVal.getOperand(0).getValueSizeInBits();
13838-
if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
13839-
return SDValue();
13837+
if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
13838+
// If the shift is too large, then see if we can deduce that the
13839+
// shift is safe anyway.
13840+
// Create a mask that has ones for the bits being shifted out.
13841+
APInt ShiftOutMask =
13842+
APInt::getHighBitsSet(ShVal.getValueSizeInBits(),
13843+
ShAmtC->getAPIntValue().getZExtValue());
13844+
13845+
// Check if the bits being shifted out are known to be zero.
13846+
if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
13847+
return SDValue();
13848+
}
1384013849
}
1384113850

1384213851
// Ensure that the shift amount is wide enough for the shifted value.

llvm/test/CodeGen/X86/dagcombine-shifts.ll

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,5 +322,132 @@ define void @g(i32 %a) nounwind {
322322
ret void
323323
}
324324

325+
define i32 @shift_zext_shl(i8 zeroext %x) {
326+
; X86-LABEL: shift_zext_shl:
327+
; X86: # %bb.0:
328+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
329+
; X86-NEXT: andl $64, %eax
330+
; X86-NEXT: shll $9, %eax
331+
; X86-NEXT: retl
332+
;
333+
; X64-LABEL: shift_zext_shl:
334+
; X64: # %bb.0:
335+
; X64-NEXT: movl %edi, %eax
336+
; X64-NEXT: andl $64, %eax
337+
; X64-NEXT: shll $9, %eax
338+
; X64-NEXT: retq
339+
%a = and i8 %x, 64
340+
%b = zext i8 %a to i16
341+
%c = shl i16 %b, 9
342+
%d = zext i16 %c to i32
343+
ret i32 %d
344+
}
345+
346+
define i32 @shift_zext_shl2(i8 zeroext %x) {
347+
; X86-LABEL: shift_zext_shl2:
348+
; X86: # %bb.0:
349+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
350+
; X86-NEXT: andl $64, %eax
351+
; X86-NEXT: shll $9, %eax
352+
; X86-NEXT: retl
353+
;
354+
; X64-LABEL: shift_zext_shl2:
355+
; X64: # %bb.0:
356+
; X64-NEXT: movl %edi, %eax
357+
; X64-NEXT: andl $64, %eax
358+
; X64-NEXT: shll $9, %eax
359+
; X64-NEXT: retq
360+
%a = and i8 %x, 64
361+
%b = zext i8 %a to i32
362+
%c = shl i32 %b, 9
363+
ret i32 %c
364+
}
365+
366+
define <4 x i32> @shift_zext_shl_vec(<4 x i8> %x) nounwind {
367+
; X86-LABEL: shift_zext_shl_vec:
368+
; X86: # %bb.0:
369+
; X86-NEXT: pushl %edi
370+
; X86-NEXT: pushl %esi
371+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
372+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
373+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
374+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
375+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
376+
; X86-NEXT: andl $64, %ecx
377+
; X86-NEXT: shll $9, %ecx
378+
; X86-NEXT: andl $63, %edx
379+
; X86-NEXT: shll $8, %edx
380+
; X86-NEXT: andl $31, %esi
381+
; X86-NEXT: shll $7, %esi
382+
; X86-NEXT: andl $23, %edi
383+
; X86-NEXT: shll $6, %edi
384+
; X86-NEXT: movl %edi, 12(%eax)
385+
; X86-NEXT: movl %esi, 8(%eax)
386+
; X86-NEXT: movl %edx, 4(%eax)
387+
; X86-NEXT: movl %ecx, (%eax)
388+
; X86-NEXT: popl %esi
389+
; X86-NEXT: popl %edi
390+
; X86-NEXT: retl $4
391+
;
392+
; X64-LABEL: shift_zext_shl_vec:
393+
; X64: # %bb.0:
394+
; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
395+
; X64-NEXT: pxor %xmm1, %xmm1
396+
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
397+
; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
398+
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
399+
; X64-NEXT: retq
400+
%a = and <4 x i8> %x, <i8 64, i8 63, i8 31, i8 23>
401+
%b = zext <4 x i8> %a to <4 x i16>
402+
%c = shl <4 x i16> %b, <i16 9, i16 8, i16 7, i16 6>
403+
%d = zext <4 x i16> %c to <4 x i32>
404+
ret <4 x i32> %d
405+
}
406+
407+
define <4 x i32> @shift_zext_shl2_vec(<4 x i8> %x) nounwind {
408+
; X86-LABEL: shift_zext_shl2_vec:
409+
; X86: # %bb.0:
410+
; X86-NEXT: pushl %edi
411+
; X86-NEXT: pushl %esi
412+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
413+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
414+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
415+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
416+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
417+
; X86-NEXT: andl $23, %edi
418+
; X86-NEXT: andl $31, %esi
419+
; X86-NEXT: andl $63, %edx
420+
; X86-NEXT: andl $64, %ecx
421+
; X86-NEXT: shll $9, %ecx
422+
; X86-NEXT: shll $8, %edx
423+
; X86-NEXT: shll $7, %esi
424+
; X86-NEXT: shll $6, %edi
425+
; X86-NEXT: movl %edi, 12(%eax)
426+
; X86-NEXT: movl %esi, 8(%eax)
427+
; X86-NEXT: movl %edx, 4(%eax)
428+
; X86-NEXT: movl %ecx, (%eax)
429+
; X86-NEXT: popl %esi
430+
; X86-NEXT: popl %edi
431+
; X86-NEXT: retl $4
432+
;
433+
; X64-LABEL: shift_zext_shl2_vec:
434+
; X64: # %bb.0:
435+
; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
436+
; X64-NEXT: pxor %xmm1, %xmm1
437+
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
438+
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
439+
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
440+
; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
441+
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
442+
; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
443+
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
444+
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
445+
; X64-NEXT: retq
446+
%a = and <4 x i8> %x, <i8 64, i8 63, i8 31, i8 23>
447+
%b = zext <4 x i8> %a to <4 x i32>
448+
%c = shl <4 x i32> %b, <i32 9, i32 8, i32 7, i32 6>
449+
ret <4 x i32> %c
450+
}
451+
325452
declare dso_local void @f(i64)
326453

0 commit comments

Comments
 (0)