Skip to content

Commit a9edba0

Browse files
committed
KnownBits: refine high-bits of mul in signed case
KnownBits::mul suffers from the deficiency that it doesn't account for signed inputs. Fix it by refining known leading zeros when both inputs are signed, and setting known leading ones when one of the inputs is signed. The strategy we've used is to still use umul_ov, after adjusting for signed inputs, and setting known leading ones from the negation of the result, when it is known to be negative, noting that a possibly-zero result is a special case.
1 parent 786df0c commit a9edba0

File tree

2 files changed

+22
-23
lines changed

2 files changed

+22
-23
lines changed

llvm/lib/Support/KnownBits.cpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -796,19 +796,26 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
796796
assert((!NoUndefSelfMultiply || LHS == RHS) &&
797797
"Self multiplication knownbits mismatch");
798798

799-
// Compute the high known-0 bits by multiplying the unsigned max of each side.
800-
// Conservatively, M active bits * N active bits results in M + N bits in the
801-
// result. But if we know a value is a power-of-2 for example, then this
802-
// computes one more leading zero.
803-
// TODO: This could be generalized to number of sign bits (negative numbers).
804-
APInt UMaxLHS = LHS.getMaxValue();
805-
APInt UMaxRHS = RHS.getMaxValue();
806-
807-
// For leading zeros in the result to be valid, the unsigned max product must
799+
// Compute the high known-0 or known-1 bits by multiplying the max of each
800+
// side. Conservatively, M active bits * N active bits results in M + N bits
801+
// in the result. But if we know a value is a power-of-2 for example, then
802+
// this computes one more leading zero or one.
803+
APInt MaxLHS = LHS.isNegative() ? LHS.getMinValue().abs() : LHS.getMaxValue(),
804+
MaxRHS = RHS.isNegative() ? RHS.getMinValue().abs() : RHS.getMaxValue();
805+
806+
// For leading zeros or ones in the result to be valid, the max product must
808807
// fit in the bitwidth (it must not overflow).
809808
bool HasOverflow;
810-
APInt UMaxResult = UMaxLHS.umul_ov(UMaxRHS, HasOverflow);
811-
unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countl_zero();
809+
APInt Result = MaxLHS.umul_ov(MaxRHS, HasOverflow);
810+
bool NegResult = LHS.isNegative() ^ RHS.isNegative();
811+
unsigned LeadZ = 0, LeadO = 0;
812+
if (!HasOverflow) {
813+
// Do not set leading ones unless the result is known to be non-zero.
814+
if (NegResult && LHS.isNonZero() && RHS.isNonZero())
815+
LeadO = (-Result).countLeadingOnes();
816+
else if (!NegResult)
817+
LeadZ = Result.countLeadingZeros();
818+
}
812819

813820
// The result of the bottom bits of an integer multiply can be
814821
// inferred by looking at the bottom bits of both operands and
@@ -873,8 +880,9 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
873880

874881
KnownBits Res(BitWidth);
875882
Res.Zero.setHighBits(LeadZ);
883+
Res.One.setHighBits(LeadO);
876884
Res.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown);
877-
Res.One = BottomKnown.getLoBits(ResultBitsKnown);
885+
Res.One |= BottomKnown.getLoBits(ResultBitsKnown);
878886

879887
// If we're self-multiplying then bit[1] is guaranteed to be zero.
880888
if (NoUndefSelfMultiply && BitWidth > 1) {

llvm/test/Analysis/ValueTracking/knownbits-mul.ll

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,7 @@ define i8 @mul_high_bits_know(i8 %xx, i8 %yy) {
7272
define i8 @mul_high_bits_know2(i8 %xx, i8 %yy) {
7373
; CHECK-LABEL: define i8 @mul_high_bits_know2(
7474
; CHECK-SAME: i8 [[XX:%.*]], i8 [[YY:%.*]]) {
75-
; CHECK-NEXT: [[X:%.*]] = or i8 [[XX]], -2
76-
; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY]], 4
77-
; CHECK-NEXT: [[Y_NONZERO:%.*]] = or disjoint i8 [[Y]], 1
78-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i8 [[X]], [[Y_NONZERO]]
79-
; CHECK-NEXT: [[R:%.*]] = and i8 [[MUL]], -16
80-
; CHECK-NEXT: ret i8 [[R]]
75+
; CHECK-NEXT: ret i8 -16
8176
;
8277
%x = or i8 %xx, -2
8378
%y = and i8 %yy, 4
@@ -90,11 +85,7 @@ define i8 @mul_high_bits_know2(i8 %xx, i8 %yy) {
9085
define i8 @mul_high_bits_know3(i8 %xx, i8 %yy) {
9186
; CHECK-LABEL: define i8 @mul_high_bits_know3(
9287
; CHECK-SAME: i8 [[XX:%.*]], i8 [[YY:%.*]]) {
93-
; CHECK-NEXT: [[X:%.*]] = or i8 [[XX]], 124
94-
; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY]], 126
95-
; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[X]], [[Y]]
96-
; CHECK-NEXT: [[R:%.*]] = and i8 [[MUL]], 112
97-
; CHECK-NEXT: ret i8 [[R]]
88+
; CHECK-NEXT: ret i8 0
9889
;
9990
%x = or i8 %xx, -4
10091
%y = or i8 %yy, -2

0 commit comments

Comments
 (0)