Skip to content

Commit f590963

Browse files
authored
[RISCV] Implement RISCVTTIImpl::getPreferredAddressingMode for HasVendorXCVmem (llvm#120533)
For a simple matmult kernel this heuristic reduces the length of the critical basic block from 15 to 20 instructions, resulting in a 20% speedup. **Without heuristic:** ``` 13688: 001b838b cv.lb t2, (s7), 0x1 1368c: 09cdbcab cv.lb s9, t3(s11) 13690: 089db62b cv.lb a2, s1(s11) 13694: 092dbdab cv.lb s11, s2(s11) 13698: 001d028b cv.lb t0, (s10), 0x1 1369c: 00f282b3 add t0, t0, a5 136a0: 9072b52b cv.mac a0, t0, t2 136a4: 9192bfab cv.mac t6, t0, s9 136a8: 90c2beab cv.mac t4, t0, a2 136ac: 91b2bf2b cv.mac t5, t0, s11 136b0: fffc0c13 addi s8, s8, -0x1 136b4: 018e0633 add a2, t3, s8 136b8: 91b2b0ab cv.mac ra, t0, s11 136bc: 000b8d93 mv s11, s7 136c0: fc0614e3 bnez a2, 0x13688 <muriscv_nn_vec_mat_mult_t_s8+0x2f0> #instrs = 15 ``` **With heuristic:** ``` 7bc0: 001c860b cv.lb a2, (s9), 0x1 7bc4: 001e0d0b cv.lb s10, (t3), 0x1 7bc8: 001e808b cv.lb ra, (t4), 0x1 7bcc: 0015038b cv.lb t2, (a0), 0x1 7bd0: 001c028b cv.lb t0, (s8), 0x1 7bd4: 00f282b3 add t0, t0, a5 7bd8: 90c2bfab cv.mac t6, t0, a2 7bdc: 91a2b92b cv.mac s2, t0, s10 7be0: 9012b5ab cv.mac a1, t0, ra 7be4: 9072b9ab cv.mac s3, t0, t2 7be8: 9072b72b cv.mac a4, t0, t2 7bec: fc851ae3 bne a0, s0, 0x7bc0 <muriscv_nn_vec_mat_mult_t_s8+0x338> #instrs = 12 improvement = 1 - 12/15 = 0.2 = 20% ```
1 parent e50ec3e commit f590963

File tree

3 files changed

+46
-0
lines changed

3 files changed

+46
-0
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,6 +2329,15 @@ unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
23292329
return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);
23302330
}
23312331

2332+
TTI::AddressingModeKind
2333+
RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
2334+
ScalarEvolution *SE) const {
2335+
if (ST->hasVendorXCVmem() && !ST->is64Bit())
2336+
return TTI::AMK_PostIndexed;
2337+
2338+
return BasicTTIImplBase::getPreferredAddressingMode(L, SE);
2339+
}
2340+
23322341
bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
23332342
const TargetTransformInfo::LSRCost &C2) {
23342343
// RISC-V specific here are "instruction number 1st priority".

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
388388
llvm_unreachable("unknown register class");
389389
}
390390

391+
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L,
392+
ScalarEvolution *SE) const;
393+
391394
unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
392395
if (Vector)
393396
return RISCVRegisterClass::VRRC;
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -O3 -mtriple=riscv32 -mattr=+m,+xcvmem -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s --check-prefixes=CHECK
4+
5+
define i32 @test_heuristic(ptr %b, i32 %e, i1 %0) {
6+
; CHECK-LABEL: test_heuristic:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: add a3, a0, a1
9+
; CHECK-NEXT: andi a2, a2, 1
10+
; CHECK-NEXT: .LBB0_1: # %loop
11+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
12+
; CHECK-NEXT: cv.lbu a1, (a3), 1
13+
; CHECK-NEXT: addi a0, a0, 1
14+
; CHECK-NEXT: beqz a2, .LBB0_1
15+
; CHECK-NEXT: # %bb.2: # %exit
16+
; CHECK-NEXT: mv a0, a1
17+
; CHECK-NEXT: ret
18+
entry:
19+
%1 = getelementptr i8, ptr %b, i32 %e
20+
br label %loop
21+
22+
loop: ; preds = %loop, %entry
23+
%2 = phi ptr [ %b, %entry ], [ %7, %loop ]
24+
%3 = phi ptr [ %1, %entry ], [ %8, %loop ]
25+
%4 = load i8, ptr %2, align 1
26+
%5 = load i8, ptr %3, align 1
27+
%6 = zext i8 %5 to i32
28+
%7 = getelementptr i8, ptr %2, i32 1
29+
%8 = getelementptr i8, ptr %3, i32 1
30+
br i1 %0, label %exit, label %loop
31+
32+
exit: ; preds = %loop
33+
ret i32 %6
34+
}

0 commit comments

Comments
 (0)