Skip to content

Commit 342ea51

Browse files
committed
[RISCV] Implement RISCVTTIImpl::getPreferredAddressingMode for HasVendorXCVmem
For a simple matmult kernel this heuristic reduces the length of the critical basic block from 15 to 20 instructions, resulting in a 20% speedup. [RISCV] Address PR comment [RISCV] Add !ST->is64Bit() check
1 parent 75a5f8c commit 342ea51

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,6 +2329,15 @@ unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
23292329
return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);
23302330
}
23312331

2332+
TTI::AddressingModeKind
2333+
RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
2334+
ScalarEvolution *SE) const {
2335+
if (ST->hasVendorXCVmem() && !ST->is64Bit())
2336+
return TTI::AMK_PostIndexed;
2337+
2338+
return BasicTTIImplBase::getPreferredAddressingMode(L, SE);
2339+
}
2340+
23322341
bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
23332342
const TargetTransformInfo::LSRCost &C2) {
23342343
// RISC-V specific here are "instruction number 1st priority".

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
388388
llvm_unreachable("unknown register class");
389389
}
390390

391+
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L,
392+
ScalarEvolution *SE) const;
393+
391394
unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
392395
if (Vector)
393396
return RISCVRegisterClass::VRRC;

llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
define i32 @test_heuristic(ptr %b, i32 %e, i1 %0) {
66
; CHECK-LABEL: test_heuristic:
77
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: add a3, a0, a1
89
; CHECK-NEXT: andi a2, a2, 1
910
; CHECK-NEXT: .LBB0_1: # %loop
1011
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
11-
; CHECK-NEXT: mv a3, a0
12+
; CHECK-NEXT: cv.lbu a1, (a3), 1
1213
; CHECK-NEXT: addi a0, a0, 1
1314
; CHECK-NEXT: beqz a2, .LBB0_1
1415
; CHECK-NEXT: # %bb.2: # %exit
15-
; CHECK-NEXT: cv.lbu a0, a1(a3)
16+
; CHECK-NEXT: mv a0, a1
1617
; CHECK-NEXT: ret
1718
entry:
1819
%1 = getelementptr i8, ptr %b, i32 %e

0 commit comments

Comments
 (0)