Skip to content

Commit e52c558

Browse files
committed
[RISCV] Narrow indices of fixed vector gather/scatter nodes
Doing so allows the use of smaller constants overall, and may allow (for some small vector constants) avoiding the constant pool entirely. This can result in extra VTYPE toggles if we get unlucky. This was reviewed under PR #66405.
1 parent 917392a commit e52c558

File tree

2 files changed

+37
-14
lines changed

2 files changed

+37
-14
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11633,15 +11633,39 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
1163311633
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
1163411634
}
1163511635

11636-
// According to the property that indexed load/store instructions
11637-
// zero-extended their indices, \p narrowIndex tries to narrow the type of index
11638-
// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
11639-
// bits(ty).
11636+
/// According to the property that indexed load/store instructions zero-extend
11637+
/// their indices, try to narrow the type of index operand.
1164011638
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
1164111639
if (isIndexTypeSigned(IndexType))
1164211640
return false;
1164311641

11644-
if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
11642+
if (!N->hasOneUse())
11643+
return false;
11644+
11645+
EVT VT = N.getValueType();
11646+
SDLoc DL(N);
11647+
11648+
// In general, what we're doing here is seeing if we can sink a truncate to
11649+
// a smaller element type into the expression tree building our index.
11650+
// TODO: We can generalize this and handle a bunch more cases if useful.
11651+
11652+
// Narrow a buildvector to the narrowest element type. This requires less
11653+
// work and less register pressure at high LMUL, and creates smaller constants
11654+
// which may be cheaper to materialize.
11655+
if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
11656+
KnownBits Known = DAG.computeKnownBits(N);
11657+
unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
11658+
LLVMContext &C = *DAG.getContext();
11659+
EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
11660+
if (ResultVT.bitsLT(VT.getVectorElementType())) {
11661+
N = DAG.getNode(ISD::TRUNCATE, DL,
11662+
VT.changeVectorElementType(ResultVT), N);
11663+
return true;
11664+
}
11665+
}
11666+
11667+
// Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
11668+
if (N.getOpcode() != ISD::SHL)
1164511669
return false;
1164611670

1164711671
SDValue N0 = N.getOperand(0);
@@ -11656,7 +11680,6 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
1165611680
if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
1165711681
return false;;
1165811682

11659-
SDLoc DL(N);
1166011683
SDValue Src = N0.getOperand(0);
1166111684
EVT SrcVT = Src.getValueType();
1166211685
unsigned SrcElen = SrcVT.getScalarSizeInBits();

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13027,17 +13027,17 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
1302713027
; RV32-NEXT: lui a1, %hi(.LCPI107_0)
1302813028
; RV32-NEXT: addi a1, a1, %lo(.LCPI107_0)
1302913029
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13030-
; RV32-NEXT: vle32.v v10, (a1)
13031-
; RV32-NEXT: vluxei32.v v8, (a0), v10
13030+
; RV32-NEXT: vle8.v v9, (a1)
13031+
; RV32-NEXT: vluxei8.v v8, (a0), v9
1303213032
; RV32-NEXT: ret
1303313033
;
1303413034
; RV64V-LABEL: mgather_strided_2xSEW:
1303513035
; RV64V: # %bb.0:
1303613036
; RV64V-NEXT: lui a1, %hi(.LCPI107_0)
1303713037
; RV64V-NEXT: addi a1, a1, %lo(.LCPI107_0)
1303813038
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13039-
; RV64V-NEXT: vle64.v v12, (a1)
13040-
; RV64V-NEXT: vluxei64.v v8, (a0), v12
13039+
; RV64V-NEXT: vle8.v v9, (a1)
13040+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1304113041
; RV64V-NEXT: ret
1304213042
;
1304313043
; RV64ZVE32F-LABEL: mgather_strided_2xSEW:
@@ -13144,17 +13144,17 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
1314413144
; RV32-NEXT: lui a1, %hi(.LCPI108_0)
1314513145
; RV32-NEXT: addi a1, a1, %lo(.LCPI108_0)
1314613146
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13147-
; RV32-NEXT: vle32.v v10, (a1)
13148-
; RV32-NEXT: vluxei32.v v8, (a0), v10
13147+
; RV32-NEXT: vle8.v v9, (a1)
13148+
; RV32-NEXT: vluxei8.v v8, (a0), v9
1314913149
; RV32-NEXT: ret
1315013150
;
1315113151
; RV64V-LABEL: mgather_gather_2xSEW:
1315213152
; RV64V: # %bb.0:
1315313153
; RV64V-NEXT: lui a1, %hi(.LCPI108_0)
1315413154
; RV64V-NEXT: addi a1, a1, %lo(.LCPI108_0)
1315513155
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13156-
; RV64V-NEXT: vle64.v v12, (a1)
13157-
; RV64V-NEXT: vluxei64.v v8, (a0), v12
13156+
; RV64V-NEXT: vle8.v v9, (a1)
13157+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1315813158
; RV64V-NEXT: ret
1315913159
;
1316013160
; RV64ZVE32F-LABEL: mgather_gather_2xSEW:

0 commit comments

Comments
 (0)