Skip to content

Commit 45be680

Browse files
authored
[SimplifyCFG] Emit rotl directly in ReduceSwitchRange (#77603)
This patch emits `ROTL(Cond, BitWidth - Shift)` directly in `ReduceSwitchRange`. This should give better codegen because `SimplifyDemandedBits` will break the rotation patterns in the original form. See also #73441 and the IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/115/files. This patch should cover most of cases handled by #73441.
1 parent 113bce0 commit 45be680

File tree

2 files changed

+26
-37
lines changed

2 files changed

+26
-37
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6919,18 +6919,17 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
69196919

69206920
auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
69216921
Builder.SetInsertPoint(SI);
6922-
auto *ShiftC = ConstantInt::get(Ty, Shift);
6923-
auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6924-
auto *LShr = Builder.CreateLShr(Sub, ShiftC);
6925-
auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
6926-
auto *Rot = Builder.CreateOr(LShr, Shl);
6922+
Value *Sub =
6923+
Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6924+
Value *Rot = Builder.CreateIntrinsic(
6925+
Ty, Intrinsic::fshl,
6926+
{Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
69276927
SI->replaceUsesOfWith(SI->getCondition(), Rot);
69286928

69296929
for (auto Case : SI->cases()) {
69306930
auto *Orig = Case.getCaseValue();
69316931
auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
6932-
Case.setValue(
6933-
cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
6932+
Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
69346933
}
69356934
return true;
69366935
}

llvm/test/Transforms/SimplifyCFG/rangereduce.ll

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,11 @@ target datalayout = "e-n32"
77
define i32 @test1(i32 %a) {
88
; CHECK-LABEL: @test1(
99
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97
10-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
11-
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30
12-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
13-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 4
14-
; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
10+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 30)
11+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 4
12+
; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
1513
; CHECK: switch.lookup:
16-
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test1, i32 0, i32 [[TMP4]]
14+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test1, i32 0, i32 [[TMP2]]
1715
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
1816
; CHECK-NEXT: br label [[COMMON_RET]]
1917
; CHECK: common.ret:
@@ -183,13 +181,11 @@ three:
183181
define i32 @test6(i32 %a) optsize {
184182
; CHECK-LABEL: @test6(
185183
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], -109
186-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
187-
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30
188-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
189-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 4
190-
; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
184+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 30)
185+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 4
186+
; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
191187
; CHECK: switch.lookup:
192-
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test6, i32 0, i32 [[TMP4]]
188+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test6, i32 0, i32 [[TMP2]]
193189
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
194190
; CHECK-NEXT: br label [[COMMON_RET]]
195191
; CHECK: common.ret:
@@ -218,15 +214,13 @@ define i8 @test7(i8 %a) optsize {
218214
; CHECK-LABEL: @test7(
219215
; CHECK-NEXT: common.ret:
220216
; CHECK-NEXT: [[TMP0:%.*]] = sub i8 [[A:%.*]], -36
221-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[TMP0]], 2
222-
; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP0]], 6
223-
; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
224-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 4
225-
; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i8 [[TMP3]] to i32
217+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[TMP0]], i8 [[TMP0]], i8 6)
218+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 4
219+
; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i8 [[TMP1]] to i32
226220
; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i32 [[SWITCH_CAST]], 8
227221
; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i32 -943228976, [[SWITCH_SHIFTAMT]]
228222
; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i32 [[SWITCH_DOWNSHIFT]] to i8
229-
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[TMP4]], i8 [[SWITCH_MASKED]], i8 -93
223+
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[TMP2]], i8 [[SWITCH_MASKED]], i8 -93
230224
; CHECK-NEXT: ret i8 [[COMMON_RET_OP]]
231225
;
232226
switch i8 %a, label %def [
@@ -250,13 +244,11 @@ three:
250244
define i32 @test8(i32 %a) optsize {
251245
; CHECK-LABEL: @test8(
252246
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97
253-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
254-
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30
255-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
256-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 5
257-
; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
247+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 30)
248+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 5
249+
; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
258250
; CHECK: switch.lookup:
259-
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i32], ptr @switch.table.test8, i32 0, i32 [[TMP4]]
251+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i32], ptr @switch.table.test8, i32 0, i32 [[TMP2]]
260252
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
261253
; CHECK-NEXT: br label [[COMMON_RET]]
262254
; CHECK: common.ret:
@@ -284,13 +276,11 @@ three:
284276
define i32 @test9(i32 %a) {
285277
; CHECK-LABEL: @test9(
286278
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 6
287-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 1
288-
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 31
289-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
290-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 8
291-
; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
279+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 31)
280+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 8
281+
; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]]
292282
; CHECK: switch.lookup:
293-
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.test9, i32 0, i32 [[TMP4]]
283+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.test9, i32 0, i32 [[TMP2]]
294284
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
295285
; CHECK-NEXT: br label [[COMMON_RET]]
296286
; CHECK: common.ret:

0 commit comments

Comments
 (0)