-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[InstCombine] Fold bitwise logic with intrinsics #77460
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
571ed36
to
c21135e
Compare
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesAlive2: https://alive2.llvm.org/ce/z/S28Y3G Full diff: https://github.com/llvm/llvm-project/pull/77460.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c03f50d75814d8..6df899e2069089 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2159,6 +2159,35 @@ Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) {
return BinaryOperator::Create(ShiftOp, NewC, ShAmt);
}
+static Instruction *
+foldBitwiseLogicWithFunnelShift(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.isBitwiseLogicOp() && "Should and/or/xor");
+ Value *X = I.getOperand(0);
+ Value *Y = I.getOperand(1);
+ Value *Op0, *Op1, *Op2, *Op3, *ShAmt;
+ if (match(X, m_OneUse(m_FShl(m_Value(Op0), m_Value(Op1), m_Value(ShAmt)))) &&
+ match(Y,
+ m_OneUse(m_FShl(m_Value(Op2), m_Value(Op3), m_Specific(ShAmt))))) {
+ Value *NewOp0 = Builder.CreateBinOp(I.getOpcode(), Op0, Op2);
+ Value *NewOp1 = Builder.CreateBinOp(I.getOpcode(), Op1, Op3);
+ Function *F =
+ Intrinsic::getDeclaration(I.getModule(), Intrinsic::fshl, I.getType());
+ return CallInst::Create(F, {NewOp0, NewOp1, ShAmt});
+ }
+ if (match(X, m_OneUse(m_FShr(m_Value(Op0), m_Value(Op1), m_Value(ShAmt)))) &&
+ match(Y,
+ m_OneUse(m_FShr(m_Value(Op2), m_Value(Op3), m_Specific(ShAmt))))) {
+ Value *NewOp0 = Builder.CreateBinOp(I.getOpcode(), Op0, Op2);
+ Value *NewOp1 = Builder.CreateBinOp(I.getOpcode(), Op1, Op3);
+ Function *F =
+ Intrinsic::getDeclaration(I.getModule(), Intrinsic::fshr, I.getType());
+ return CallInst::Create(F, {NewOp0, NewOp1, ShAmt});
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -2688,6 +2717,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
return Res;
+ if (Instruction *Res = foldBitwiseLogicWithFunnelShift(I, Builder))
+ return Res;
+
return nullptr;
}
@@ -3884,6 +3916,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));
}
+ if (Instruction *Res = foldBitwiseLogicWithFunnelShift(I, Builder))
+ return Res;
+
return nullptr;
}
@@ -4799,5 +4834,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
return Res;
+ if (Instruction *Res = foldBitwiseLogicWithFunnelShift(I, Builder))
+ return Res;
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/bitwiselogic-funnelshift.ll b/llvm/test/Transforms/InstCombine/bitwiselogic-funnelshift.ll
new file mode 100644
index 00000000000000..31d82a53b38009
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitwiselogic-funnelshift.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @test_or_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
+; CHECK-LABEL: define i32 @test_or_fshl(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[C]]
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[B]], [[D]]
+; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]])
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %xor1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
+ %xor2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh)
+ %ret = or i32 %xor1, %xor2
+ ret i32 %ret
+}
+define i32 @test_and_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
+; CHECK-LABEL: define i32 @test_and_fshl(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A]], [[C]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[B]], [[D]]
+; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]])
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %xor1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
+ %xor2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh)
+ %ret = and i32 %xor1, %xor2
+ ret i32 %ret
+}
+define i32 @test_xor_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
+; CHECK-LABEL: define i32 @test_xor_fshl(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], [[C]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[B]], [[D]]
+; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]])
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %xor1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
+ %xor2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh)
+ %ret = xor i32 %xor1, %xor2
+ ret i32 %ret
+}
+define i32 @test_or_fshr(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
+; CHECK-LABEL: define i32 @test_or_fshr(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[C]]
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[B]], [[D]]
+; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[SH]])
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %xor1 = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %sh)
+ %xor2 = call i32 @llvm.fshr.i32(i32 %c, i32 %d, i32 %sh)
+ %ret = or i32 %xor1, %xor2
+ ret i32 %ret
+}
+define i32 @test_or_fshl_cascade(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: define i32 @test_or_fshl_cascade(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A]], [[B]]
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[A]], [[B]]
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[C]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[C]]
+; CHECK-NEXT: [[OR2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP3]], i32 [[TMP4]], i32 24)
+; CHECK-NEXT: ret i32 [[OR2]]
+;
+ %fshl1 = call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 24)
+ %fshl2 = call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 24)
+ %fshl3 = call i32 @llvm.fshl.i32(i32 %c, i32 %c, i32 24)
+ %or1 = or i32 %fshl1, %fshl2
+ %or2 = or i32 %or1, %fshl3
+ ret i32 %or2
+}
+
+; Negative tests
+
+define i32 @test_or_fshl_fshr(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
+; CHECK-LABEL: define i32 @test_or_fshl_fshr(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
+; CHECK-NEXT: [[XOR1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]])
+; CHECK-NEXT: [[XOR2:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[D]], i32 [[SH]])
+; CHECK-NEXT: [[RET:%.*]] = or i32 [[XOR1]], [[XOR2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %xor1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
+ %xor2 = call i32 @llvm.fshr.i32(i32 %c, i32 %d, i32 %sh)
+ %ret = or i32 %xor1, %xor2
+ ret i32 %ret
+}
+define i32 @test_or_fshl_mismatched_shamt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh1, i32 %sh2) {
+; CHECK-LABEL: define i32 @test_or_fshl_mismatched_shamt(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH1:%.*]], i32 [[SH2:%.*]]) {
+; CHECK-NEXT: [[XOR1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH1]])
+; CHECK-NEXT: [[XOR2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH2]])
+; CHECK-NEXT: [[RET:%.*]] = or i32 [[XOR1]], [[XOR2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %xor1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh1)
+ %xor2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh2)
+ %ret = or i32 %xor1, %xor2
+ ret i32 %ret
+}
+define i32 @test_add_fshl(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
+; CHECK-LABEL: define i32 @test_add_fshl(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
+; CHECK-NEXT: [[XOR1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]])
+; CHECK-NEXT: [[XOR2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH]])
+; CHECK-NEXT: [[RET:%.*]] = add i32 [[XOR1]], [[XOR2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %xor1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
+ %xor2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh)
+ %ret = add i32 %xor1, %xor2
+ ret i32 %ret
+}
+define i32 @test_or_fshl_multiuse(i32 %a, i32 %b, i32 %c, i32 %d, i32 %sh) {
+; CHECK-LABEL: define i32 @test_or_fshl_multiuse(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], i32 [[SH:%.*]]) {
+; CHECK-NEXT: [[XOR1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[B]], i32 [[SH]])
+; CHECK-NEXT: call void @use(i32 [[XOR1]])
+; CHECK-NEXT: [[XOR2:%.*]] = call i32 @llvm.fshl.i32(i32 [[C]], i32 [[D]], i32 [[SH]])
+; CHECK-NEXT: [[RET:%.*]] = or i32 [[XOR1]], [[XOR2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %xor1 = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %sh)
+ call void @use(i32 %xor1)
+ %xor2 = call i32 @llvm.fshl.i32(i32 %c, i32 %d, i32 %sh)
+ %ret = or i32 %xor1, %xor2
+ ret i32 %ret
+}
+
+declare void @use(i32)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This also applies to other bitwise intrinsics, e.g. bitreverse (https://alive2.llvm.org/ce/z/Gut-vS) and bswap. I think it would be preferable to directly structure this in terms of "bitwise op with two equal intrinsic IDs".
c21135e
to
2c812e8
Compare
Done. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch does the following folds: ``` bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt)) -> fshl(bitwise(A, C), bitwise(B, D), ShAmt) bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt)) -> fshr(bitwise(A, C), bitwise(B, D), ShAmt) bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B)) bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C))) bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B)) bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C))) ``` Alive2: https://alive2.llvm.org/ce/z/iZN_TL
This patch does the following folds:
Alive2: https://alive2.llvm.org/ce/z/iZN_TL