-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Increase default tail duplication threshold to 6 at -O3 #98873
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) ChangesThis is just like AArch64. Changing the threshold to 6 will increase the code size, but will The value 6 may be debatable, we can set it to Full diff: https://github.com/llvm/llvm-project/pull/98873.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 5e1b5284751f4..5f63558739003 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3763,6 +3763,11 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
+unsigned int
+RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
+ return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
+}
+
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
bool RISCV::isSEXT_W(const MachineInstr &MI) {
return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index f0c0953a3e56a..c4c8a18bda6a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -286,6 +286,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const override;
+ unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
+
unsigned getUndefInitOpcode(unsigned RegClassID) const override {
switch (RegClassID) {
case RISCV::VRRegClassID:
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
new file mode 100644
index 0000000000000..84373ce80843f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+@a = external dso_local local_unnamed_addr global i32
+@b = external dso_local local_unnamed_addr global i32
+@c = external dso_local local_unnamed_addr global i32
+
+declare i32 @foo(i32)
+
+define dso_local i32 @test(i32 %n) {
+; CHECK-O2-LABEL: test:
+; CHECK-O2: # %bb.0: # %entry
+; CHECK-O2-NEXT: sext.w a1, a0
+; CHECK-O2-NEXT: blez a1, .LBB0_2
+; CHECK-O2-NEXT: # %bb.1: # %if.then
+; CHECK-O2-NEXT: lui a1, %hi(a)
+; CHECK-O2-NEXT: lw a1, %lo(a)(a1)
+; CHECK-O2-NEXT: mul a0, a1, a0
+; CHECK-O2-NEXT: j .LBB0_3
+; CHECK-O2-NEXT: .LBB0_2: # %if.else
+; CHECK-O2-NEXT: lui a1, %hi(b)
+; CHECK-O2-NEXT: lw a1, %lo(b)(a1)
+; CHECK-O2-NEXT: divw a0, a1, a0
+; CHECK-O2-NEXT: .LBB0_3: # %if.end
+; CHECK-O2-NEXT: lui a1, %hi(c)
+; CHECK-O2-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O2-NEXT: addi a0, a0, -1
+; CHECK-O2-NEXT: mulw a0, a0, a1
+; CHECK-O2-NEXT: tail foo
+;
+; CHECK-O3-LABEL: test:
+; CHECK-O3: # %bb.0: # %entry
+; CHECK-O3-NEXT: sext.w a1, a0
+; CHECK-O3-NEXT: blez a1, .LBB0_2
+; CHECK-O3-NEXT: # %bb.1: # %if.then
+; CHECK-O3-NEXT: lui a1, %hi(a)
+; CHECK-O3-NEXT: lw a1, %lo(a)(a1)
+; CHECK-O3-NEXT: mul a0, a1, a0
+; CHECK-O3-NEXT: lui a1, %hi(c)
+; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT: addi a0, a0, -1
+; CHECK-O3-NEXT: mulw a0, a0, a1
+; CHECK-O3-NEXT: tail foo
+; CHECK-O3-NEXT: .LBB0_2: # %if.else
+; CHECK-O3-NEXT: lui a1, %hi(b)
+; CHECK-O3-NEXT: lw a1, %lo(b)(a1)
+; CHECK-O3-NEXT: divw a0, a1, a0
+; CHECK-O3-NEXT: lui a1, %hi(c)
+; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT: addi a0, a0, -1
+; CHECK-O3-NEXT: mulw a0, a0, a1
+; CHECK-O3-NEXT: tail foo
+entry:
+ %cmp = icmp sgt i32 %n, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %va = load i32, ptr @a
+ %mul = mul nsw i32 %va, %n
+ br label %if.end
+
+if.else:
+ %vb = load i32, ptr @b
+ %div = sdiv i32 %vb, %n
+ br label %if.end
+
+if.end:
+ %phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ]
+ %vc = load i32, ptr @c
+ %add = add nsw i32 %phi, -1
+ %arg = mul i32 %add, %vc
+ %ret = tail call i32 @foo(i32 %arg)
+ ret i32 %ret
+}
|
Do you have any performance numbers for this? I can try and get some for the spacemit-x60. |
Thanks! I haven't run any benchmarks. I'd appreciate it if you can test it for me. :-) |
Sorry for the delay, I've been running SPEC CPU 2017 intrate with the changes but the results are a bit noisy, the standard deviation is about +-1%. There's been no noticeable regressions anyway (and no improvements either but I wouldn't expect any given that it's dual-issue) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Thanks a lot! I just got my Spacemit K1 laptops. :-) |
This is just like AArch64. Changing the threshold to 6 will increase the code size, but will also decrease direct branches. CPUs with wide fetch/issue units can benefit from it. The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`.
6522052
to
f7e99ad
Compare
This is just like AArch64.
Changing the threshold to 6 will increase the code size, but will
also decrease unconditional branches. CPUs with wide fetch/issue units
can benefit from it.
The value 6 may be debatable, we can set it to
SchedModel.IssueWidth
.