-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SimplifyCFG] Emit rotl
directly in ReduceSwitchRange
#77603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch emits See also #73441 and the IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/115/files. Full diff: https://github.com/llvm/llvm-project/pull/77603.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 61d891d65346bd..7515e539e7fb78 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6919,18 +6919,17 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
Builder.SetInsertPoint(SI);
- auto *ShiftC = ConstantInt::get(Ty, Shift);
- auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
- auto *LShr = Builder.CreateLShr(Sub, ShiftC);
- auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
- auto *Rot = Builder.CreateOr(LShr, Shl);
+ Value *Sub =
+ Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
+ Value *Rot = Builder.CreateIntrinsic(
+ Ty, Intrinsic::fshl,
+ {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
SI->replaceUsesOfWith(SI->getCondition(), Rot);
for (auto Case : SI->cases()) {
auto *Orig = Case.getCaseValue();
auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
- Case.setValue(
- cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
+ Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
}
return true;
}
diff --git a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
index b1a3802a2bb58b..d47bf5f9541881 100644
--- a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
+++ b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
@@ -7,13 +7,11 @@ target datalayout = "e-n32"
define i32 @test1(i32 %a) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 4
-; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 30)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
; CHECK: switch.lookup:
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test1, i32 0, i32 [[TMP4]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test1, i32 0, i32 [[TMP2]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[COMMON_RET]]
; CHECK: common.ret:
@@ -183,13 +181,11 @@ three:
define i32 @test6(i32 %a) optsize {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], -109
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 4
-; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 30)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
; CHECK: switch.lookup:
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test6, i32 0, i32 [[TMP4]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test6, i32 0, i32 [[TMP2]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[COMMON_RET]]
; CHECK: common.ret:
@@ -218,15 +214,13 @@ define i8 @test7(i8 %a) optsize {
; CHECK-LABEL: @test7(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[TMP0:%.*]] = sub i8 [[A:%.*]], -36
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP0]], 6
-; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 4
-; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i8 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[TMP0]], i8 [[TMP0]], i8 6)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 4
+; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i32 [[SWITCH_CAST]], 8
; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i32 -943228976, [[SWITCH_SHIFTAMT]]
; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i32 [[SWITCH_DOWNSHIFT]] to i8
-; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[TMP4]], i8 [[SWITCH_MASKED]], i8 -93
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[TMP2]], i8 [[SWITCH_MASKED]], i8 -93
; CHECK-NEXT: ret i8 [[COMMON_RET_OP]]
;
switch i8 %a, label %def [
@@ -250,13 +244,11 @@ three:
define i32 @test8(i32 %a) optsize {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 5
-; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 30)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 5
+; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
; CHECK: switch.lookup:
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i32], ptr @switch.table.test8, i32 0, i32 [[TMP4]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i32], ptr @switch.table.test8, i32 0, i32 [[TMP2]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[COMMON_RET]]
; CHECK: common.ret:
@@ -284,13 +276,11 @@ three:
define i32 @test9(i32 %a) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 6
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 31
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 8
-; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 31)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 8
+; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
; CHECK: switch.lookup:
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.test9, i32 0, i32 [[TMP4]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.test9, i32 0, i32 [[TMP2]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[COMMON_RET]]
; CHECK: common.ret:
|
LGTM. If |
We always transform this pattern into |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch emits `ROTL(Cond, BitWidth - Shift)` directly in `ReduceSwitchRange`. This should give better codegen because `SimplifyDemandedBits` will break the rotation patterns in the original form. See also llvm#73441 and the IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/115/files. This patch should cover most of cases handled by llvm#73441.
This patch emits
ROTL(Cond, BitWidth - Shift)
directly inReduceSwitchRange
. This should give better codegen becauseSimplifyDemandedBits
will break the rotation patterns in the original form.See also #73441 and the IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/115/files.
This patch should cover most of cases handled by #73441.