-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ConstraintElim] Handle switch cases with the same destination #76928
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThe original implementation only adds the fact The original motivation of this patch is to optimize a pattern in cvc5. For example:
Compile-time impact: http://llvm-compile-time-tracker.com/compare.php?from=8c72ff716b3e4b298695fa3faf6add860c6dbcb2&to=24a93e6c04b60b5d7d567379b8a0cd082c6554b0&stat=instructions%3Au
Full diff: https://github.com/llvm/llvm-project/pull/76928.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index cc93c8617c05e2..59c23ea5993f6f 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1057,13 +1057,49 @@ void State::addInfoFor(BasicBlock &BB) {
}
if (auto *Switch = dyn_cast<SwitchInst>(BB.getTerminator())) {
- for (auto &Case : Switch->cases()) {
- BasicBlock *Succ = Case.getCaseSuccessor();
- Value *V = Case.getCaseValue();
- if (!canAddSuccessor(BB, Succ))
+ DenseMap<BasicBlock *, SmallVector<ConstantInt *>> BBMap;
+ for (auto &Case : Switch->cases())
+ BBMap[Case.getCaseSuccessor()].push_back(Case.getCaseValue());
+ // TODO: handle default dest
+ BBMap.erase(Switch->getDefaultDest());
+ Value *Cond = Switch->getCondition();
+ Type *Ty = Switch->getCondition()->getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ for (auto &[Succ, Values] : BBMap) {
+ if (Succ->getUniquePredecessor() != &BB)
continue;
- WorkList.emplace_back(FactOrCheck::getConditionFact(
- DT.getNode(Succ), CmpInst::ICMP_EQ, Switch->getCondition(), V));
+ DomTreeNode *Node = DT.getNode(Succ);
+ if (Values.size() == 1)
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ Node, CmpInst::ICMP_EQ, Cond, Values.back()));
+ else {
+ APInt SignedMin = APInt::getSignedMaxValue(BitWidth);
+ APInt SignedMax = APInt::getSignedMinValue(BitWidth);
+ APInt UnsignedMin = APInt::getMaxValue(BitWidth);
+ APInt UnsignedMax = APInt::getMinValue(BitWidth);
+
+ for (ConstantInt *V : Values) {
+ const APInt &C = V->getValue();
+ SignedMin = APIntOps::smin(SignedMin, C);
+ SignedMax = APIntOps::smax(SignedMax, C);
+ UnsignedMin = APIntOps::umin(UnsignedMin, C);
+ UnsignedMax = APIntOps::umax(UnsignedMax, C);
+ }
+ if (!SignedMin.isMinSignedValue())
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ Node, CmpInst::ICMP_SGE, Cond, ConstantInt::get(Ty, SignedMin)));
+ if (!SignedMax.isMaxSignedValue())
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ Node, CmpInst::ICMP_SLE, Cond, ConstantInt::get(Ty, SignedMax)));
+ if (!UnsignedMin.isMinValue())
+ WorkList.emplace_back(
+ FactOrCheck::getConditionFact(Node, CmpInst::ICMP_UGE, Cond,
+ ConstantInt::get(Ty, UnsignedMin)));
+ if (!UnsignedMax.isMaxValue())
+ WorkList.emplace_back(
+ FactOrCheck::getConditionFact(Node, CmpInst::ICMP_ULE, Cond,
+ ConstantInt::get(Ty, UnsignedMax)));
+ }
}
return;
}
diff --git a/llvm/test/Transforms/ConstraintElimination/switch.ll b/llvm/test/Transforms/ConstraintElimination/switch.ll
index b104b26e83cff2..fd581d081b4d3b 100644
--- a/llvm/test/Transforms/ConstraintElimination/switch.ll
+++ b/llvm/test/Transforms/ConstraintElimination/switch.ll
@@ -151,10 +151,8 @@ define i1 @switch_same_destination_for_different_cases(i8 %x) {
; CHECK: exit.2:
; CHECK-NEXT: [[C_3:%.*]] = icmp ult i8 [[X]], 7
; CHECK-NEXT: call void @use(i1 [[C_3]])
-; CHECK-NEXT: [[C_4:%.*]] = icmp ult i8 [[X]], 6
-; CHECK-NEXT: call void @use(i1 [[C_4]])
-; CHECK-NEXT: [[C_5:%.*]] = icmp ult i8 [[X]], 11
-; CHECK-NEXT: call void @use(i1 [[C_5]])
+; CHECK-NEXT: call void @use(i1 false)
+; CHECK-NEXT: call void @use(i1 true)
; CHECK-NEXT: [[C_6:%.*]] = icmp ult i8 [[X]], 10
; CHECK-NEXT: ret i1 [[C_6]]
;
@@ -181,4 +179,115 @@ exit.2:
ret i1 %c.6
}
+define i1 @test_switch_with_same_dest(i32 %a) {
+; CHECK-LABEL: @test_switch_with_same_dest(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], 1023
+; CHECK-NEXT: switch i32 [[B]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i32 37, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 38, label [[SW_BB]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: ret i1 false
+; CHECK: sw.default:
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %b = and i32 %a, 1023
+ switch i32 %b, label %sw.default [
+ i32 37, label %sw.bb
+ i32 38, label %sw.bb
+ ]
+
+sw.bb:
+ %cmp = icmp eq i32 %b, 1023
+ ret i1 %cmp
+sw.default:
+ ret i1 false
+}
+
+define i1 @test_switch_with_same_dest_zext(i16 %a) {
+; CHECK-LABEL: @test_switch_with_same_dest_zext(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = and i16 [[A:%.*]], 1023
+; CHECK-NEXT: [[B_EXT:%.*]] = zext nneg i16 [[B]] to i32
+; CHECK-NEXT: switch i32 [[B_EXT]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i32 37, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 38, label [[SW_BB]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: ret i1 false
+; CHECK: sw.default:
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %b = and i16 %a, 1023
+ %b.ext = zext nneg i16 %b to i32
+ switch i32 %b.ext, label %sw.default [
+ i32 37, label %sw.bb
+ i32 38, label %sw.bb
+ ]
+
+sw.bb:
+ %cmp = icmp eq i16 %b, 1023
+ ret i1 %cmp
+sw.default:
+ ret i1 false
+}
+
+; Negative tests
+
+define i1 @test_switch_with_same_dest_fail1(i32 %a) {
+; CHECK-LABEL: @test_switch_with_same_dest_fail1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], 1023
+; CHECK-NEXT: switch i32 [[B]], label [[SW_BB:%.*]] [
+; CHECK-NEXT: i32 37, label [[SW_BB]]
+; CHECK-NEXT: i32 38, label [[SW_BB]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B]], 1023
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %b = and i32 %a, 1023
+ switch i32 %b, label %sw.bb [
+ i32 37, label %sw.bb
+ i32 38, label %sw.bb
+ ]
+
+sw.bb:
+ %cmp = icmp eq i32 %b, 1023
+ ret i1 %cmp
+}
+
+define i1 @test_switch_with_same_dest_fail2(i32 %a, i1 %cond) {
+; CHECK-LABEL: @test_switch_with_same_dest_fail2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], 1023
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[SW_BB:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: switch i32 [[B]], label [[SW_BB]] [
+; CHECK-NEXT: i32 37, label [[SW_BB]]
+; CHECK-NEXT: i32 38, label [[SW_BB]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B]], 1023
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %b = and i32 %a, 1023
+ br i1 %cond, label %if.then, label %sw.bb
+
+if.then:
+ switch i32 %b, label %sw.bb [
+ i32 37, label %sw.bb
+ i32 38, label %sw.bb
+ ]
+
+sw.bb:
+ %cmp = icmp eq i32 %b, 1023
+ ret i1 %cmp
+}
+
declare void @use(i1)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is a good fit for ConstraintElimination. I'd expect either IPSCCP or CVP to optimize such cases. If they don't, we should fix it there.
We have handled this in CVP: llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp Lines 1428 to 1475 in ce61b0e
However, it cannot fix the original issue because the condition of switch is godbolt: https://godbolt.org/z/38TMcv4xE |
@dtcxzyw Can we solve this by folding |
This patch folds `switch(zext/sext(X))` into `switch(X)`. The original motivation of this patch is to optimize a pattern found in cvc5. For example: ``` %bf.load.i = load i16, ptr %d_kind.i, align 8 %bf.clear.i = and i16 %bf.load.i, 1023 %bf.cast.i = zext nneg i16 %bf.clear.i to i32 switch i32 %bf.cast.i, label %if.else [ i32 335, label %if.then i32 303, label %if.then ] if.then: ; preds = %entry, %entry %d_children.i.i = getelementptr inbounds %"class.cvc5::internal::expr::NodeValue", ptr %0, i64 0, i32 3 %cmp.i.i.i.i.i = icmp eq i16 %bf.clear.i, 1023 %cond.i.i.i.i.i = select i1 %cmp.i.i.i.i.i, i32 -1, i32 %bf.cast.i ``` `%cmp.i.i.i.i.i` always evaluates to false because `%bf.clear.i` can only be 335 or 303. Folding `switch i32 %bf.cast.i` to `switch i16 %bf.clear.i` will help `CVP` to handle this case. See also #76928 (comment). Compile-time impact: http://llvm-compile-time-tracker.com/compare.php?from=7954c57124b495fbdc73674d71f2e366e4afe522&to=502b13ed34e561d995ae1f724cf06d20008bd86f&stat=instructions:u |stage1-O3|stage1-ReleaseThinLTO|stage1-ReleaseLTO-g|stage1-O0-g|stage2-O3|stage2-O0-g|stage2-clang| |--|--|--|--|--|--|--| |+0.03%|+0.06%|+0.07%|+0.00%|-0.02%|-0.03%|+0.02%|
Is this still needed now that the other changes have landed? |
Looks like there are other cases which are not covered by #76988.
|
This patch folds `switch(zext/sext(X))` into `switch(X)`. The original motivation of this patch is to optimize a pattern found in cvc5. For example: ``` %bf.load.i = load i16, ptr %d_kind.i, align 8 %bf.clear.i = and i16 %bf.load.i, 1023 %bf.cast.i = zext nneg i16 %bf.clear.i to i32 switch i32 %bf.cast.i, label %if.else [ i32 335, label %if.then i32 303, label %if.then ] if.then: ; preds = %entry, %entry %d_children.i.i = getelementptr inbounds %"class.cvc5::internal::expr::NodeValue", ptr %0, i64 0, i32 3 %cmp.i.i.i.i.i = icmp eq i16 %bf.clear.i, 1023 %cond.i.i.i.i.i = select i1 %cmp.i.i.i.i.i, i32 -1, i32 %bf.cast.i ``` `%cmp.i.i.i.i.i` always evaluates to false because `%bf.clear.i` can only be 335 or 303. Folding `switch i32 %bf.cast.i` to `switch i16 %bf.clear.i` will help `CVP` to handle this case. See also llvm#76928 (comment). Compile-time impact: http://llvm-compile-time-tracker.com/compare.php?from=7954c57124b495fbdc73674d71f2e366e4afe522&to=502b13ed34e561d995ae1f724cf06d20008bd86f&stat=instructions:u |stage1-O3|stage1-ReleaseThinLTO|stage1-ReleaseLTO-g|stage1-O0-g|stage2-O3|stage2-O0-g|stage2-clang| |--|--|--|--|--|--|--| |+0.03%|+0.06%|+0.07%|+0.00%|-0.02%|-0.03%|+0.02%|
The original implementation only adds the fact
cond == C
if BB dominates DestBB and there is a unique edge from BB to DestBB.This patch adds more range information for switch cases with the same destination if BB is the unique predecessor of DestBB.
The original motivation of this patch is to optimize a pattern in cvc5. For example:
%cmp.i.i.i.i.i
always evaluates to false because%bf.clear.i
can only be 335 or 303.Compile-time impact: http://llvm-compile-time-tracker.com/compare.php?from=8c72ff716b3e4b298695fa3faf6add860c6dbcb2&to=24a93e6c04b60b5d7d567379b8a0cd082c6554b0&stat=instructions%3Au