Skip to content

Commit dfc1901

Browse files
committed
[RISCV] Custom lower ISD::VSCALE.
This patch custom lowers ISD::VSCALE into a csrr vlenb followed by a shift right by 3 followed by a multiply by the scale amount. I've added computeKnownBits support to indicate that the csrr vlenb always produces 3 trailng bits of 0s so the shift right is "exact". This allows the shift and multiply sequence to be nicely optimized into a single shift or removed completely when the scale amount is a power of 2. The non power of 2 case multiplying by 24 is still producing suboptimal code. We could remove the right shift and use a multiply by 3. Hopefully we can improve DAG combine to fix that since it's not unique to this sequence. This replaces D94144. Reviewed By: HsiangKai Differential Revision: https://reviews.llvm.org/D94249
1 parent 3de92ca commit dfc1901

File tree

6 files changed

+170
-0
lines changed

6 files changed

+170
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/IR/IntrinsicsRISCV.h"
3333
#include "llvm/Support/Debug.h"
3434
#include "llvm/Support/ErrorHandling.h"
35+
#include "llvm/Support/KnownBits.h"
3536
#include "llvm/Support/MathExtras.h"
3637
#include "llvm/Support/raw_ostream.h"
3738

@@ -350,6 +351,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
350351
if (Subtarget.hasStdExtV()) {
351352
setBooleanVectorContents(ZeroOrOneBooleanContent);
352353

354+
setOperationAction(ISD::VSCALE, XLenVT, Custom);
355+
353356
// RVV intrinsics may have illegal operands.
354357
// We also need to custom legalize vmv.x.s.
355358
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
@@ -686,6 +689,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
686689
}
687690
case ISD::SPLAT_VECTOR:
688691
return lowerSPLATVECTOR(Op, DAG);
692+
case ISD::VSCALE: {
693+
MVT VT = Op.getSimpleValueType();
694+
SDLoc DL(Op);
695+
SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
696+
// We define our scalable vector types for lmul=1 to use a 64 bit known
697+
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
698+
// vscale as VLENB / 8.
699+
SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
700+
DAG.getConstant(3, DL, VT));
701+
return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
702+
}
689703
}
690704
}
691705

@@ -1867,6 +1881,30 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
18671881
return true;
18681882
}
18691883

1884+
void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
1885+
KnownBits &Known,
1886+
const APInt &DemandedElts,
1887+
const SelectionDAG &DAG,
1888+
unsigned Depth) const {
1889+
unsigned Opc = Op.getOpcode();
1890+
assert((Opc >= ISD::BUILTIN_OP_END ||
1891+
Opc == ISD::INTRINSIC_WO_CHAIN ||
1892+
Opc == ISD::INTRINSIC_W_CHAIN ||
1893+
Opc == ISD::INTRINSIC_VOID) &&
1894+
"Should use MaskedValueIsZero if you don't know whether Op"
1895+
" is a target node!");
1896+
1897+
Known.resetAll();
1898+
switch (Opc) {
1899+
default: break;
1900+
case RISCVISD::READ_VLENB:
1901+
// We assume VLENB is at least 8 bytes.
1902+
// FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
1903+
Known.Zero.setLowBits(3);
1904+
break;
1905+
}
1906+
}
1907+
18701908
unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
18711909
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18721910
unsigned Depth) const {
@@ -3540,6 +3578,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
35403578
NODE_NAME_CASE(GORCIW)
35413579
NODE_NAME_CASE(VMV_X_S)
35423580
NODE_NAME_CASE(SPLAT_VECTOR_I64)
3581+
NODE_NAME_CASE(READ_VLENB)
35433582
}
35443583
// clang-format on
35453584
return nullptr;

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ enum NodeType : unsigned {
8585
// Splats an i64 scalar to a vector type (with element type i64) where the
8686
// scalar is a sign-extended i32.
8787
SPLAT_VECTOR_I64,
88+
// Read VLENB CSR
89+
READ_VLENB,
8890
};
8991
} // namespace RISCVISD
9092

@@ -123,6 +125,11 @@ class RISCVTargetLowering : public TargetLowering {
123125

124126
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
125127

128+
void computeKnownBitsForTargetNode(const SDValue Op,
129+
KnownBits &Known,
130+
const APInt &DemandedElts,
131+
const SelectionDAG &DAG,
132+
unsigned Depth) const override;
126133
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
127134
const APInt &DemandedElts,
128135
const SelectionDAG &DAG,

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
1818
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>,
1919
SDTCisInt<1>]>>;
20+
def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
21+
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
2022

2123
// X0 has special meaning for vsetvl/vsetvli.
2224
// rd | rs1 | AVL value | Effect on vl
@@ -2397,6 +2399,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
23972399
def PseudoVMV8R_V : VPseudo<VMV8R_V, V_M8, (outs VRM8:$vd), (ins VRM8:$vs2)>;
23982400
}
23992401

2402+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
2403+
def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins),
2404+
[(set GPR:$rd, (riscv_read_vlenb))]>;
2405+
}
2406+
24002407
//===----------------------------------------------------------------------===//
24012408
// 6. Configuration-Setting Instructions
24022409
//===----------------------------------------------------------------------===//

llvm/lib/Target/RISCV/RISCVMCInstLower.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,4 +210,13 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
210210
if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
211211
OutMI.addOperand(MCOp);
212212
}
213+
214+
if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) {
215+
OutMI.setOpcode(RISCV::CSRRS);
216+
OutMI.addOperand(MCOperand::createImm(
217+
RISCVSysReg::lookupSysRegByName("VLENB")->Encoding));
218+
OutMI.addOperand(MCOperand::createReg(RISCV::X0));
219+
return;
220+
}
221+
213222
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \
3+
; RUN: | FileCheck %s
4+
5+
define i32 @vscale_zero() nounwind {
6+
; CHECK-LABEL: vscale_zero:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: mv a0, zero
9+
; CHECK-NEXT: ret
10+
entry:
11+
%0 = call i32 @llvm.vscale.i32()
12+
%1 = mul i32 %0, 0
13+
ret i32 %1
14+
}
15+
16+
define i32 @vscale_one() nounwind {
17+
; CHECK-LABEL: vscale_one:
18+
; CHECK: # %bb.0: # %entry
19+
; CHECK-NEXT: csrr a0, vlenb
20+
; CHECK-NEXT: srli a0, a0, 3
21+
; CHECK-NEXT: ret
22+
entry:
23+
%0 = call i32 @llvm.vscale.i32()
24+
%1 = mul i32 %0, 1
25+
ret i32 %1
26+
}
27+
28+
define i32 @vscale_uimmpow2xlen() nounwind {
29+
; CHECK-LABEL: vscale_uimmpow2xlen:
30+
; CHECK: # %bb.0: # %entry
31+
; CHECK-NEXT: csrr a0, vlenb
32+
; CHECK-NEXT: slli a0, a0, 3
33+
; CHECK-NEXT: ret
34+
entry:
35+
%0 = call i32 @llvm.vscale.i32()
36+
%1 = mul i32 %0, 64
37+
ret i32 %1
38+
}
39+
40+
define i32 @vscale_non_pow2() nounwind {
41+
; CHECK-LABEL: vscale_non_pow2:
42+
; CHECK: # %bb.0: # %entry
43+
; CHECK-NEXT: csrr a0, vlenb
44+
; CHECK-NEXT: srli a0, a0, 3
45+
; CHECK-NEXT: addi a1, zero, 24
46+
; CHECK-NEXT: mul a0, a0, a1
47+
; CHECK-NEXT: ret
48+
entry:
49+
%0 = call i32 @llvm.vscale.i32()
50+
%1 = mul i32 %0, 24
51+
ret i32 %1
52+
}
53+
54+
declare i32 @llvm.vscale.i32()
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v < %s \
3+
; RUN: | FileCheck %s
4+
5+
define i64 @vscale_zero() nounwind {
6+
; CHECK-LABEL: vscale_zero:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: mv a0, zero
9+
; CHECK-NEXT: ret
10+
entry:
11+
%0 = call i64 @llvm.vscale.i64()
12+
%1 = mul i64 %0, 0
13+
ret i64 %1
14+
}
15+
16+
define i64 @vscale_one() nounwind {
17+
; CHECK-LABEL: vscale_one:
18+
; CHECK: # %bb.0: # %entry
19+
; CHECK-NEXT: csrr a0, vlenb
20+
; CHECK-NEXT: srli a0, a0, 3
21+
; CHECK-NEXT: ret
22+
entry:
23+
%0 = call i64 @llvm.vscale.i64()
24+
%1 = mul i64 %0, 1
25+
ret i64 %1
26+
}
27+
28+
define i64 @vscale_uimmpow2xlen() nounwind {
29+
; CHECK-LABEL: vscale_uimmpow2xlen:
30+
; CHECK: # %bb.0: # %entry
31+
; CHECK-NEXT: csrr a0, vlenb
32+
; CHECK-NEXT: slli a0, a0, 3
33+
; CHECK-NEXT: ret
34+
entry:
35+
%0 = call i64 @llvm.vscale.i64()
36+
%1 = mul i64 %0, 64
37+
ret i64 %1
38+
}
39+
40+
define i64 @vscale_non_pow2() nounwind {
41+
; CHECK-LABEL: vscale_non_pow2:
42+
; CHECK: # %bb.0: # %entry
43+
; CHECK-NEXT: csrr a0, vlenb
44+
; CHECK-NEXT: srli a0, a0, 3
45+
; CHECK-NEXT: addi a1, zero, 24
46+
; CHECK-NEXT: mul a0, a0, a1
47+
; CHECK-NEXT: ret
48+
entry:
49+
%0 = call i64 @llvm.vscale.i64()
50+
%1 = mul i64 %0, 24
51+
ret i64 %1
52+
}
53+
54+
declare i64 @llvm.vscale.i64()

0 commit comments

Comments
 (0)