Skip to content

Commit 6ca9ff2

Browse files
committed
[SelectionDAG] Let ComputeKnownSignBits handle (shl (ext X), C)
Add simple support for looking through ZEXT/ANYEXT/SEXT when doing ComputeKnownSignBits for SHL. This is valid for the case when all extended bits are shifted out, because then the number of sign bits can be found by analysing the EXT operand. A future improvement could be to pass along the "shifted left by" information in the recursive calls to ComputeKnownSignBits. Allowing us to handle this more generically.
1 parent ce0ec52 commit 6ca9ff2

File tree

2 files changed

+29
-44
lines changed

2 files changed

+29
-44
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4615,12 +4615,30 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
46154615
Tmp = std::min<uint64_t>(Tmp + *ShAmt, VTBits);
46164616
return Tmp;
46174617
case ISD::SHL:
4618-
if (std::optional<uint64_t> ShAmt =
4618+
if (std::optional<uint64_t> MaxShAmt =
46194619
getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
4620+
if (Op.getOperand(0).getOpcode() == ISD::ANY_EXTEND ||
4621+
Op.getOperand(0).getOpcode() == ISD::ZERO_EXTEND ||
4622+
Op.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)
4623+
if (std::optional<uint64_t> MinShAmt =
4624+
getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) {
4625+
SDValue Src = Op.getOperand(0);
4626+
EVT SrcVT = Src.getValueType();
4627+
SDValue ExtendedOp = Op.getOperand(0).getOperand(0);
4628+
EVT ExtendedOpVT = ExtendedOp.getValueType();
4629+
uint64_t ExtendedWidth =
4630+
SrcVT.getScalarSizeInBits() - ExtendedOpVT.getScalarSizeInBits();
4631+
if (ExtendedWidth <= *MinShAmt) {
4632+
Tmp = ComputeNumSignBits(ExtendedOp, DemandedElts, Depth + 1);
4633+
Tmp += ExtendedWidth;
4634+
if (*MaxShAmt < Tmp)
4635+
return Tmp - *MaxShAmt;
4636+
}
4637+
}
46204638
// shl destroys sign bits, ensure it doesn't shift out all sign bits.
46214639
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
4622-
if (*ShAmt < Tmp)
4623-
return Tmp - *ShAmt;
4640+
if (*MaxShAmt < Tmp)
4641+
return Tmp - *MaxShAmt;
46244642
}
46254643
break;
46264644
case ISD::AND:

llvm/test/CodeGen/X86/computenumsignbits-shl.ll

Lines changed: 8 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,12 @@ define void @computeNumSignBits_shl_zext_1(i8 %x, ptr %p) nounwind {
1212
; X64-NEXT: sarb $5, %dil
1313
; X64-NEXT: movzbl %dil, %eax
1414
; X64-NEXT: movl %eax, %ecx
15-
; X64-NEXT: shll $10, %ecx
16-
; X64-NEXT: xorl %edx, %edx
17-
; X64-NEXT: testw %cx, %cx
18-
; X64-NEXT: sets %dl
19-
; X64-NEXT: addl $32767, %edx # imm = 0x7FFF
20-
; X64-NEXT: movl %eax, %edi
21-
; X64-NEXT: shll $11, %edi
22-
; X64-NEXT: movswl %di, %r8d
23-
; X64-NEXT: shrl %r8d
24-
; X64-NEXT: cmpw %r8w, %cx
25-
; X64-NEXT: cmovnel %edx, %edi
26-
; X64-NEXT: movw %di, (%rsi)
27-
; X64-NEXT: movl %eax, %edi
28-
; X64-NEXT: shll $12, %edi
29-
; X64-NEXT: movswl %di, %r8d
30-
; X64-NEXT: shrl $2, %r8d
31-
; X64-NEXT: cmpw %r8w, %cx
32-
; X64-NEXT: cmovnel %edx, %edi
33-
; X64-NEXT: movw %di, (%rsi)
15+
; X64-NEXT: shll $11, %ecx
16+
; X64-NEXT: movw %cx, (%rsi)
17+
; X64-NEXT: movl %eax, %ecx
18+
; X64-NEXT: shll $12, %ecx
19+
; X64-NEXT: movw %cx, (%rsi)
3420
; X64-NEXT: shll $13, %eax
35-
; X64-NEXT: movswl %ax, %edi
36-
; X64-NEXT: shrl $3, %edi
37-
; X64-NEXT: cmpw %di, %cx
38-
; X64-NEXT: cmovnel %edx, %eax
3921
; X64-NEXT: movw %ax, (%rsi)
4022
; X64-NEXT: retq
4123
%ashr = ashr i8 %x, 5
@@ -88,24 +70,9 @@ define void @computeNumSignBits_shl_zext_vec_1(<2 x i8> %x, ptr %p) nounwind {
8870
; X64-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
8971
; X64-NEXT: pxor %xmm1, %xmm0
9072
; X64-NEXT: psubb %xmm1, %xmm0
91-
; X64-NEXT: pxor %xmm1, %xmm1
92-
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
93-
; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1024,4096,u,u,u,u,u,u]
94-
; X64-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
95-
; X64-NEXT: pand %xmm0, %xmm2
96-
; X64-NEXT: pcmpgtw %xmm0, %xmm1
97-
; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
98-
; X64-NEXT: por %xmm2, %xmm1
99-
; X64-NEXT: movdqa %xmm0, %xmm2
100-
; X64-NEXT: paddw %xmm0, %xmm2
101-
; X64-NEXT: movdqa %xmm2, %xmm3
102-
; X64-NEXT: psraw $1, %xmm3
103-
; X64-NEXT: pcmpeqw %xmm0, %xmm3
104-
; X64-NEXT: movdqa %xmm3, %xmm0
105-
; X64-NEXT: pandn %xmm1, %xmm0
106-
; X64-NEXT: pand %xmm2, %xmm3
107-
; X64-NEXT: por %xmm0, %xmm3
108-
; X64-NEXT: movd %xmm3, (%rdi)
73+
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
74+
; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2048,8192,u,u,u,u,u,u]
75+
; X64-NEXT: movd %xmm0, (%rdi)
10976
; X64-NEXT: retq
11077
%ashr = ashr <2 x i8> %x, <i8 5, i8 5>
11178
%zext = zext <2 x i8> %ashr to <2 x i16>

0 commit comments

Comments
 (0)