Skip to content

Commit f35e3fa

Browse files
committed
Add transforms for (max/min (xor X, Pow2), X) -> (and/or X, Pow2/~Pow2)
X ^ Pow2 is guranteed to flip one bit. We can use this to speedup max/min by just selecting X with/without (or/andnot) the flipped bit respectively. Alive2 Links: smax-neg: https://alive2.llvm.org/ce/z/j3QYFs smin-neg: https://alive2.llvm.org/ce/z/bFYnQW smax-pos: https://alive2.llvm.org/ce/z/4xYSxR smin-pos: https://alive2.llvm.org/ce/z/H3RPKj umax : https://alive2.llvm.org/ce/z/P4oRcX umin : https://alive2.llvm.org/ce/z/vWZG6p Differential Revision: https://reviews.llvm.org/D144606
1 parent 890eb4f commit f35e3fa

File tree

2 files changed

+52
-20
lines changed

2 files changed

+52
-20
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,6 +1468,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
14681468
}
14691469
}
14701470

1471+
// (umax X, (xor X, Pow2))
1472+
// -> (or X, Pow2)
1473+
// (umin X, (xor X, Pow2))
1474+
// -> (and X, ~Pow2)
1475+
// (smax X, (xor X, Pos_Pow2))
1476+
// -> (or X, Pos_Pow2)
1477+
// (smin X, (xor X, Pos_Pow2))
1478+
// -> (and X, ~Pos_Pow2)
1479+
// (smax X, (xor X, Neg_Pow2))
1480+
// -> (and X, ~Neg_Pow2)
1481+
// (smin X, (xor X, Neg_Pow2))
1482+
// -> (or X, Neg_Pow2)
1483+
if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
1484+
match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
1485+
isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
1486+
bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
1487+
bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
1488+
1489+
if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
1490+
auto KnownSign = getKnownSign(X, II, DL, &AC, &DT);
1491+
if (KnownSign == std::nullopt) {
1492+
UseOr = false;
1493+
UseAndN = false;
1494+
} else if (*KnownSign /* true is Signed. */) {
1495+
UseOr ^= true;
1496+
UseAndN ^= true;
1497+
Type *Ty = I0->getType();
1498+
// Negative power of 2 must be IntMin. It's possible to be able to
1499+
// prove negative / power of 2 without actually having known bits, so
1500+
// just get the value by hand.
1501+
X = Constant::getIntegerValue(
1502+
Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
1503+
}
1504+
}
1505+
if (UseOr)
1506+
return BinaryOperator::CreateOr(I0, X);
1507+
else if (UseAndN)
1508+
return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
1509+
}
1510+
14711511
// If we can eliminate ~A and Y is free to invert:
14721512
// max ~A, Y --> ~(min A, ~Y)
14731513
//

llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ declare void @barrier()
1515

1616
define <2 x i8> @umax_xor_Cpow2(<2 x i8> %x) {
1717
; CHECK-LABEL: @umax_xor_Cpow2(
18-
; CHECK-NEXT: [[X_XOR:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
19-
; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[X]], <2 x i8> [[X_XOR]])
18+
; CHECK-NEXT: [[R:%.*]] = or <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
2019
; CHECK-NEXT: ret <2 x i8> [[R]]
2120
;
2221
%x_xor = xor <2 x i8> %x, <i8 128, i8 128>
@@ -26,8 +25,7 @@ define <2 x i8> @umax_xor_Cpow2(<2 x i8> %x) {
2625

2726
define i8 @umin_xor_Cpow2(i8 %x) {
2827
; CHECK-LABEL: @umin_xor_Cpow2(
29-
; CHECK-NEXT: [[X_XOR:%.*]] = xor i8 [[X:%.*]], 64
30-
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[X_XOR]])
28+
; CHECK-NEXT: [[R:%.*]] = and i8 [[X:%.*]], -65
3129
; CHECK-NEXT: ret i8 [[R]]
3230
;
3331
%x_xor = xor i8 %x, 64
@@ -37,8 +35,7 @@ define i8 @umin_xor_Cpow2(i8 %x) {
3735

3836
define i8 @smax_xor_Cpow2_pos(i8 %x) {
3937
; CHECK-LABEL: @smax_xor_Cpow2_pos(
40-
; CHECK-NEXT: [[X_XOR:%.*]] = xor i8 [[X:%.*]], 32
41-
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[X_XOR]])
38+
; CHECK-NEXT: [[R:%.*]] = or i8 [[X:%.*]], 32
4239
; CHECK-NEXT: ret i8 [[R]]
4340
;
4441
%x_xor = xor i8 %x, 32
@@ -48,8 +45,7 @@ define i8 @smax_xor_Cpow2_pos(i8 %x) {
4845

4946
define <2 x i8> @smin_xor_Cpow2_pos(<2 x i8> %x) {
5047
; CHECK-LABEL: @smin_xor_Cpow2_pos(
51-
; CHECK-NEXT: [[X_XOR:%.*]] = xor <2 x i8> [[X:%.*]], <i8 16, i8 16>
52-
; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X]], <2 x i8> [[X_XOR]])
48+
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], <i8 -17, i8 -17>
5349
; CHECK-NEXT: ret <2 x i8> [[R]]
5450
;
5551
%x_xor = xor <2 x i8> %x, <i8 16, i8 16>
@@ -59,8 +55,7 @@ define <2 x i8> @smin_xor_Cpow2_pos(<2 x i8> %x) {
5955

6056
define <2 x i8> @smax_xor_Cpow2_neg(<2 x i8> %x) {
6157
; CHECK-LABEL: @smax_xor_Cpow2_neg(
62-
; CHECK-NEXT: [[X_XOR:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
63-
; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X]], <2 x i8> [[X_XOR]])
58+
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], <i8 127, i8 127>
6459
; CHECK-NEXT: ret <2 x i8> [[R]]
6560
;
6661
%x_xor = xor <2 x i8> %x, <i8 128, i8 128>
@@ -70,8 +65,7 @@ define <2 x i8> @smax_xor_Cpow2_neg(<2 x i8> %x) {
7065

7166
define i8 @smin_xor_Cpow2_neg(i8 %x) {
7267
; CHECK-LABEL: @smin_xor_Cpow2_neg(
73-
; CHECK-NEXT: [[X_XOR:%.*]] = xor i8 [[X:%.*]], -128
74-
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[X_XOR]])
68+
; CHECK-NEXT: [[R:%.*]] = or i8 [[X:%.*]], -128
7569
; CHECK-NEXT: ret i8 [[R]]
7670
;
7771
%x_xor = xor i8 %x, 128
@@ -83,8 +77,7 @@ define i8 @umax_xor_pow2(i8 %x, i8 %y) {
8377
; CHECK-LABEL: @umax_xor_pow2(
8478
; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[Y:%.*]]
8579
; CHECK-NEXT: [[YP2:%.*]] = and i8 [[NY]], [[Y]]
86-
; CHECK-NEXT: [[X_XOR:%.*]] = xor i8 [[YP2]], [[X:%.*]]
87-
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[X_XOR]])
80+
; CHECK-NEXT: [[R:%.*]] = or i8 [[YP2]], [[X:%.*]]
8881
; CHECK-NEXT: ret i8 [[R]]
8982
;
9083
%ny = sub i8 0, %y
@@ -98,8 +91,8 @@ define <2 x i8> @umin_xor_pow2(<2 x i8> %x, <2 x i8> %y) {
9891
; CHECK-LABEL: @umin_xor_pow2(
9992
; CHECK-NEXT: [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
10093
; CHECK-NEXT: [[YP2:%.*]] = and <2 x i8> [[NY]], [[Y]]
101-
; CHECK-NEXT: [[X_XOR:%.*]] = xor <2 x i8> [[YP2]], [[X:%.*]]
102-
; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X]], <2 x i8> [[X_XOR]])
94+
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[YP2]], <i8 -1, i8 -1>
95+
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]]
10396
; CHECK-NEXT: ret <2 x i8> [[R]]
10497
;
10598
%ny = sub <2 x i8> <i8 0, i8 0>, %y
@@ -146,8 +139,7 @@ define i8 @smax_xor_pow2_neg(i8 %x, i8 %y) {
146139
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[YP2]], 0
147140
; CHECK-NEXT: br i1 [[CMP]], label [[NEG:%.*]], label [[POS:%.*]]
148141
; CHECK: neg:
149-
; CHECK-NEXT: [[X_XOR:%.*]] = xor i8 [[YP2]], [[X:%.*]]
150-
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[X_XOR]])
142+
; CHECK-NEXT: [[R:%.*]] = and i8 [[X:%.*]], 127
151143
; CHECK-NEXT: ret i8 [[R]]
152144
; CHECK: pos:
153145
; CHECK-NEXT: call void @barrier()
@@ -173,8 +165,8 @@ define i8 @smin_xor_pow2_pos(i8 %x, i8 %y) {
173165
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[YP2]], 0
174166
; CHECK-NEXT: br i1 [[CMP]], label [[NEG:%.*]], label [[POS:%.*]]
175167
; CHECK: neg:
176-
; CHECK-NEXT: [[X_XOR:%.*]] = xor i8 [[YP2]], [[X:%.*]]
177-
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[X_XOR]])
168+
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[YP2]], -1
169+
; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP1]], [[X:%.*]]
178170
; CHECK-NEXT: ret i8 [[R]]
179171
; CHECK: pos:
180172
; CHECK-NEXT: call void @barrier()

0 commit comments

Comments
 (0)