Skip to content

Commit 2c812e8

Browse files
committed
[InstCombine] Fold bitwise logic with intrinsics
1 parent e8f733f commit 2c812e8

File tree

4 files changed

+94
-76
lines changed

4 files changed

+94
-76
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 67 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -46,44 +46,6 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
4646
return Builder.CreateFCmp(NewPred, LHS, RHS);
4747
}
4848

49-
/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
50-
/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B))
51-
/// \param I Binary operator to transform.
52-
/// \return Pointer to node that must replace the original binary operator, or
53-
/// null pointer if no transformation was made.
54-
static Value *SimplifyBSwap(BinaryOperator &I,
55-
InstCombiner::BuilderTy &Builder) {
56-
assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying");
57-
58-
Value *OldLHS = I.getOperand(0);
59-
Value *OldRHS = I.getOperand(1);
60-
61-
Value *NewLHS;
62-
if (!match(OldLHS, m_BSwap(m_Value(NewLHS))))
63-
return nullptr;
64-
65-
Value *NewRHS;
66-
const APInt *C;
67-
68-
if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) {
69-
// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
70-
if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse())
71-
return nullptr;
72-
// NewRHS initialized by the matcher.
73-
} else if (match(OldRHS, m_APInt(C))) {
74-
// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
75-
if (!OldLHS->hasOneUse())
76-
return nullptr;
77-
NewRHS = ConstantInt::get(I.getType(), C->byteSwap());
78-
} else
79-
return nullptr;
80-
81-
Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS);
82-
Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap,
83-
I.getType());
84-
return Builder.CreateCall(F, BinOp);
85-
}
86-
8749
/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
8850
/// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates
8951
/// whether to treat V, Lo, and Hi as signed or not.
@@ -2159,6 +2121,64 @@ Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) {
21592121
return BinaryOperator::Create(ShiftOp, NewC, ShAmt);
21602122
}
21612123

2124+
// Fold and/or/xor with two equal intrinsic IDs:
2125+
// bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt))
2126+
// -> fshl(bitwise(A, C), bitwise(B, D), ShAmt)
2127+
// bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt))
2128+
// -> fshr(bitwise(A, C), bitwise(B, D), ShAmt)
2129+
// bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B))
2130+
// bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C)))
2131+
// bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B))
2132+
// bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C)))
2133+
static Instruction *
2134+
foldBitwiseLogicWithIntrinsics(BinaryOperator &I,
2135+
InstCombiner::BuilderTy &Builder) {
2136+
assert(I.isBitwiseLogicOp() && "Should and/or/xor");
2137+
if (!I.getOperand(0)->hasOneUse())
2138+
return nullptr;
2139+
IntrinsicInst *X = dyn_cast<IntrinsicInst>(I.getOperand(0));
2140+
if (!X)
2141+
return nullptr;
2142+
2143+
IntrinsicInst *Y = dyn_cast<IntrinsicInst>(I.getOperand(1));
2144+
if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID()))
2145+
return nullptr;
2146+
2147+
Intrinsic::ID IID = X->getIntrinsicID();
2148+
const APInt *RHSC;
2149+
// Try to match constant RHS.
2150+
if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) ||
2151+
!match(I.getOperand(1), m_APInt(RHSC))))
2152+
return nullptr;
2153+
2154+
switch (IID) {
2155+
case Intrinsic::fshl:
2156+
case Intrinsic::fshr: {
2157+
if (X->getOperand(2) != Y->getOperand(2))
2158+
return nullptr;
2159+
Value *NewOp0 =
2160+
Builder.CreateBinOp(I.getOpcode(), X->getOperand(0), Y->getOperand(0));
2161+
Value *NewOp1 =
2162+
Builder.CreateBinOp(I.getOpcode(), X->getOperand(1), Y->getOperand(1));
2163+
Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType());
2164+
return CallInst::Create(F, {NewOp0, NewOp1, X->getOperand(2)});
2165+
}
2166+
case Intrinsic::bswap:
2167+
case Intrinsic::bitreverse: {
2168+
Value *NewOp0 = Builder.CreateBinOp(
2169+
I.getOpcode(), X->getOperand(0),
2170+
Y ? Y->getOperand(0)
2171+
: ConstantInt::get(I.getType(), IID == Intrinsic::bswap
2172+
? RHSC->byteSwap()
2173+
: RHSC->reverseBits()));
2174+
Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType());
2175+
return CallInst::Create(F, {NewOp0});
2176+
}
2177+
default:
2178+
return nullptr;
2179+
}
2180+
}
2181+
21622182
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
21632183
// here. We should standardize that construct where it is needed or choose some
21642184
// other way to ensure that commutated variants of patterns are not missed.
@@ -2194,9 +2214,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
21942214
if (Value *V = foldUsingDistributiveLaws(I))
21952215
return replaceInstUsesWith(I, V);
21962216

2197-
if (Value *V = SimplifyBSwap(I, Builder))
2198-
return replaceInstUsesWith(I, V);
2199-
22002217
if (Instruction *R = foldBinOpShiftWithShift(I))
22012218
return R;
22022219

@@ -2688,6 +2705,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
26882705
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
26892706
return Res;
26902707

2708+
if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))
2709+
return Res;
2710+
26912711
return nullptr;
26922712
}
26932713

@@ -3347,9 +3367,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
33473367
if (Value *V = foldUsingDistributiveLaws(I))
33483368
return replaceInstUsesWith(I, V);
33493369

3350-
if (Value *V = SimplifyBSwap(I, Builder))
3351-
return replaceInstUsesWith(I, V);
3352-
33533370
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
33543371
Type *Ty = I.getType();
33553372
if (Ty->isIntOrIntVectorTy(1)) {
@@ -3884,6 +3901,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
38843901
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));
38853902
}
38863903

3904+
if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))
3905+
return Res;
3906+
38873907
return nullptr;
38883908
}
38893909

@@ -4507,9 +4527,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
45074527
if (SimplifyDemandedInstructionBits(I))
45084528
return &I;
45094529

4510-
if (Value *V = SimplifyBSwap(I, Builder))
4511-
return replaceInstUsesWith(I, V);
4512-
45134530
if (Instruction *R = foldNot(I))
45144531
return R;
45154532

@@ -4799,5 +4816,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
47994816
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
48004817
return Res;
48014818

4819+
if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))
4820+
return Res;
4821+
48024822
return nullptr;
48034823
}

llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,8 @@ define i1 @test3(i32 %arg) {
4646

4747
define i8 @add_bitreverse(i8 %a) {
4848
; CHECK-LABEL: @add_bitreverse(
49-
; CHECK-NEXT: [[B:%.*]] = and i8 [[A:%.*]], -4
50-
; CHECK-NEXT: [[REVERSE:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[B]]), !range [[RNG0:![0-9]+]]
51-
; CHECK-NEXT: [[C:%.*]] = or disjoint i8 [[REVERSE]], -16
49+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[A:%.*]], 15
50+
; CHECK-NEXT: [[C:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[TMP1]])
5251
; CHECK-NEXT: ret i8 [[C]]
5352
;
5453
%b = and i8 %a, 252

llvm/test/Transforms/InstCombine/bitwiselogic-bitmanip.ll

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
define i32 @test_or_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
55
; CHECK-LABEL: define i32 @test_or_fshl(
66
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
7-
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]])
8-
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH]])
9-
; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], [[VAL2]]
7+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[C]]
8+
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[B]], [[D]]
9+
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]])
1010
; CHECK-NEXT: ret i32 [[RET]]
1111
;
1212
%val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
@@ -17,9 +17,9 @@ define i32 @test_or_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
1717
define i32 @test_and_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
1818
; CHECK-LABEL: define i32 @test_and_fshl(
1919
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
20-
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]])
21-
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH]])
22-
; CHECK-NEXT: [[RET:%.*]] = and i32 [[VAL1]], [[VAL2]]
20+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A]], [[C]]
21+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[B]], [[D]]
22+
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]])
2323
; CHECK-NEXT: ret i32 [[RET]]
2424
;
2525
%val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
@@ -30,9 +30,9 @@ define i32 @test_and_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
3030
define i32 @test_xor_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
3131
; CHECK-LABEL: define i32 @test_xor_fshl(
3232
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
33-
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]])
34-
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH]])
35-
; CHECK-NEXT: [[RET:%.*]] = xor i32 [[VAL1]], [[VAL2]]
33+
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], [[C]]
34+
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[B]], [[D]]
35+
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]])
3636
; CHECK-NEXT: ret i32 [[RET]]
3737
;
3838
%val1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
@@ -43,9 +43,9 @@ define i32 @test_xor_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
4343
define i32 @test_or_fshr(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
4444
; CHECK-LABEL: define i32 @test_or_fshr(
4545
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
46-
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.fshr.i32(i32 [[A]], i32 [[B]], i32 [[SH]])
47-
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[D]], i32 [[SH]])
48-
; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], [[VAL2]]
46+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[C]]
47+
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[B]], [[D]]
48+
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]])
4949
; CHECK-NEXT: ret i32 [[RET]]
5050
;
5151
%val1 = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %sh)
@@ -56,11 +56,11 @@ define i32 @test_or_fshr(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
5656
define i32 @test_or_fshl_cascade(i32 %a, i32 %b, i32 %c) {
5757
; CHECK-LABEL: define i32 @test_or_fshl_cascade(
5858
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
59-
; CHECK-NEXT: [[FSHL1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[A]], i32 24)
60-
; CHECK-NEXT: [[FSHL2:%.*]] = call i32 @llvm.fshl.i32(i32 [[B]], i32 [[B]], i32 24)
61-
; CHECK-NEXT: [[FSHL3:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[C]], i32 24)
62-
; CHECK-NEXT: [[OR1:%.*]] = or i32 [[FSHL1]], [[FSHL2]]
63-
; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[FSHL3]]
59+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[B]]
60+
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[A]], [[B]]
61+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[C]]
62+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[C]]
63+
; CHECK-NEXT: [[OR2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP3]], i32 [[TMP4]], i32 24)
6464
; CHECK-NEXT: ret i32 [[OR2]]
6565
;
6666
%fshl1 = call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 24)
@@ -73,9 +73,8 @@ define i32 @test_or_fshl_cascade(i32 %a, i32 %b, i32 %c) {
7373
define i32 @test_or_bitreverse(i32 %a, i32 %b) {
7474
; CHECK-LABEL: define i32 @test_or_bitreverse(
7575
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
76-
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[A]])
77-
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[B]])
78-
; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], [[VAL2]]
76+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[B]]
77+
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
7978
; CHECK-NEXT: ret i32 [[RET]]
8079
;
8180
%val1 = call i32 @llvm.bitreverse.i32(i32 %a)
@@ -86,8 +85,8 @@ define i32 @test_or_bitreverse(i32 %a, i32 %b) {
8685
define i32 @test_or_bitreverse_constant(i32 %a, i32 %b) {
8786
; CHECK-LABEL: define i32 @test_or_bitreverse_constant(
8887
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
89-
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[A]])
90-
; CHECK-NEXT: [[RET:%.*]] = or i32 [[VAL1]], -16777216
88+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], 255
89+
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
9190
; CHECK-NEXT: ret i32 [[RET]]
9291
;
9392
%val1 = call i32 @llvm.bitreverse.i32(i32 %a)

llvm/test/Transforms/InstCombine/bswap-fold.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,8 @@ define i64 @bs_and64_multiuse1(i64 %a, i64 %b) #0 {
498498
define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 {
499499
; CHECK-LABEL: @bs_and64_multiuse2(
500500
; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
501-
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A]], [[B:%.*]]
502-
; CHECK-NEXT: [[T3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
501+
; CHECK-NEXT: [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
502+
; CHECK-NEXT: [[T3:%.*]] = and i64 [[T1]], [[T2]]
503503
; CHECK-NEXT: [[T4:%.*]] = mul i64 [[T3]], [[T1]]
504504
; CHECK-NEXT: ret i64 [[T4]]
505505
;
@@ -512,9 +512,9 @@ define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 {
512512

513513
define i64 @bs_and64_multiuse3(i64 %a, i64 %b) #0 {
514514
; CHECK-LABEL: @bs_and64_multiuse3(
515+
; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
515516
; CHECK-NEXT: [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
516-
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], [[B]]
517-
; CHECK-NEXT: [[T3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
517+
; CHECK-NEXT: [[T3:%.*]] = and i64 [[T1]], [[T2]]
518518
; CHECK-NEXT: [[T4:%.*]] = mul i64 [[T3]], [[T2]]
519519
; CHECK-NEXT: ret i64 [[T4]]
520520
;

0 commit comments

Comments
 (0)