Skip to content

Commit 34cdb61

Browse files
committed
[RISCV] Increase default tail duplication threshold to 6 at -O3
This is just like AArch64. Changing the threshold to 6 will increase the code size, but will also decrease direct branches. CPUs with wide fetch/issue units can benefit from it. The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`.
1 parent e8c4438 commit 34cdb61

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3763,6 +3763,11 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
37633763
return ArrayRef(TargetFlags);
37643764
}
37653765

3766+
unsigned int
3767+
RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
3768+
return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
3769+
}
3770+
37663771
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
37673772
bool RISCV::isSEXT_W(const MachineInstr &MI) {
37683773
return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
288288
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
289289
getSerializableMachineMemOperandTargetFlags() const override;
290290

291+
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
292+
291293
unsigned getUndefInitOpcode(unsigned RegClassID) const override {
292294
switch (RegClassID) {
293295
case RISCV::VRRegClassID:

llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
66
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
7-
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3
7+
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
88

99
@a = external dso_local local_unnamed_addr global i32
1010
@b = external dso_local local_unnamed_addr global i32
@@ -41,12 +41,15 @@ define dso_local i32 @test(i32 %n) {
4141
; CHECK-O3-NEXT: lui a1, %hi(a)
4242
; CHECK-O3-NEXT: lw a1, %lo(a)(a1)
4343
; CHECK-O3-NEXT: mul a0, a1, a0
44-
; CHECK-O3-NEXT: j .LBB0_3
44+
; CHECK-O3-NEXT: lui a1, %hi(c)
45+
; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
46+
; CHECK-O3-NEXT: addi a0, a0, -1
47+
; CHECK-O3-NEXT: mulw a0, a0, a1
48+
; CHECK-O3-NEXT: tail foo
4549
; CHECK-O3-NEXT: .LBB0_2: # %if.else
4650
; CHECK-O3-NEXT: lui a1, %hi(b)
4751
; CHECK-O3-NEXT: lw a1, %lo(b)(a1)
4852
; CHECK-O3-NEXT: divw a0, a1, a0
49-
; CHECK-O3-NEXT: .LBB0_3: # %if.end
5053
; CHECK-O3-NEXT: lui a1, %hi(c)
5154
; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
5255
; CHECK-O3-NEXT: addi a0, a0, -1

0 commit comments

Comments
 (0)