Skip to content

Commit bcf3654

Browse files
authored
[RISCV] Lower i64 load/stores to ld/sd with Zilsd. (#139808)
Don't split i64 load/store when we have Zilsd. In the future, we should enhanced the LoadStoreOptimizer pass to do this, but this is a good starting point. Even if we support it in LoadStoreOptimizer, we might still want this for volatile loads/stores to guarantee the use of Zilsd.
1 parent 2422b17 commit bcf3654

File tree

4 files changed

+231
-4
lines changed

4 files changed

+231
-4
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,6 +1626,51 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
16261626
}
16271627
break;
16281628
}
1629+
case RISCVISD::LD_RV32: {
1630+
assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1631+
1632+
SDValue Base, Offset;
1633+
SDValue Chain = Node->getOperand(0);
1634+
SDValue Addr = Node->getOperand(1);
1635+
SelectAddrRegImm(Addr, Base, Offset);
1636+
1637+
SDValue Ops[] = {Base, Offset, Chain};
1638+
MachineSDNode *New = CurDAG->getMachineNode(
1639+
RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1640+
SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1641+
MVT::i32, SDValue(New, 0));
1642+
SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1643+
MVT::i32, SDValue(New, 0));
1644+
CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1645+
ReplaceUses(SDValue(Node, 0), Lo);
1646+
ReplaceUses(SDValue(Node, 1), Hi);
1647+
ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1648+
CurDAG->RemoveDeadNode(Node);
1649+
return;
1650+
}
1651+
case RISCVISD::SD_RV32: {
1652+
SDValue Base, Offset;
1653+
SDValue Chain = Node->getOperand(0);
1654+
SDValue Addr = Node->getOperand(3);
1655+
SelectAddrRegImm(Addr, Base, Offset);
1656+
1657+
SDValue Ops[] = {
1658+
CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1659+
Node->getOperand(1),
1660+
CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1661+
Node->getOperand(2),
1662+
CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1663+
1664+
SDNode *RegPair = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1665+
MVT::Untyped, Ops);
1666+
MachineSDNode *New =
1667+
CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1668+
{SDValue(RegPair, 0), Base, Offset, Chain});
1669+
CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1670+
ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1671+
CurDAG->RemoveDeadNode(Node);
1672+
return;
1673+
}
16291674
case ISD::INTRINSIC_WO_CHAIN: {
16301675
unsigned IntNo = Node->getConstantOperandVal(0);
16311676
switch (IntNo) {

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
318318
!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
319319
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
320320

321+
if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
322+
setOperationAction(ISD::LOAD, MVT::i64, Custom);
323+
setOperationAction(ISD::STORE, MVT::i64, Custom);
324+
}
325+
321326
if (Subtarget.is64Bit()) {
322327
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
323328

@@ -7748,13 +7753,33 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
77487753
case ISD::STORE: {
77497754
auto *Store = cast<StoreSDNode>(Op);
77507755
SDValue StoredVal = Store->getValue();
7751-
EVT VecTy = StoredVal.getValueType();
7756+
EVT VT = StoredVal.getValueType();
7757+
if (VT == MVT::i64) {
7758+
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
7759+
"Unexpected custom legalisation");
7760+
if (Store->isTruncatingStore())
7761+
return SDValue();
7762+
7763+
if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
7764+
return SDValue();
7765+
7766+
SDLoc DL(Op);
7767+
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
7768+
DAG.getTargetConstant(0, DL, MVT::i32));
7769+
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
7770+
DAG.getTargetConstant(1, DL, MVT::i32));
7771+
7772+
return DAG.getMemIntrinsicNode(
7773+
RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
7774+
{Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
7775+
Store->getMemOperand());
7776+
}
77527777
// Handle normal vector tuple store.
7753-
if (VecTy.isRISCVVectorTuple()) {
7778+
if (VT.isRISCVVectorTuple()) {
77547779
SDLoc DL(Op);
77557780
MVT XLenVT = Subtarget.getXLenVT();
7756-
unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7757-
unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7781+
unsigned NF = VT.getRISCVVectorTupleNumFields();
7782+
unsigned Sz = VT.getSizeInBits().getKnownMinValue();
77587783
unsigned NumElts = Sz / (NF * 8);
77597784
int Log2LMUL = Log2_64(NumElts) - 3;
77607785

@@ -13714,6 +13739,28 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1371413739
// sext_inreg we emit for ADD/SUB/MUL/SLLI.
1371513740
LoadSDNode *Ld = cast<LoadSDNode>(N);
1371613741

13742+
if (N->getValueType(0) == MVT::i64) {
13743+
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
13744+
"Unexpected custom legalisation");
13745+
13746+
if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
13747+
return;
13748+
13749+
SDLoc DL(N);
13750+
SDValue Result = DAG.getMemIntrinsicNode(
13751+
RISCVISD::LD_RV32, DL,
13752+
DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
13753+
{Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
13754+
SDValue Lo = Result.getValue(0);
13755+
SDValue Hi = Result.getValue(1);
13756+
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
13757+
Results.append({Pair, Result.getValue(2)});
13758+
return;
13759+
}
13760+
13761+
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13762+
"Unexpected custom legalisation");
13763+
1371713764
SDLoc dl(N);
1371813765
SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
1371913766
Ld->getBasePtr(), Ld->getMemoryVT(),

llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,20 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14+
//===----------------------------------------------------------------------===//
15+
// RISC-V specific DAG Nodes.
16+
//===----------------------------------------------------------------------===//
17+
18+
def SDT_RISCV_LD_RV32
19+
: SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<2>]>;
20+
def SDT_RISCV_SD_RV32
21+
: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<2>]>;
22+
23+
def riscv_ld_rv32 : RVSDNode<"LD_RV32", SDT_RISCV_LD_RV32,
24+
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
25+
def riscv_st_rv32 : RVSDNode<"SD_RV32", SDT_RISCV_SD_RV32,
26+
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
27+
1428
//===----------------------------------------------------------------------===//
1529
// Instruction Class Templates
1630
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/RISCV/zilsd.ll

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv32 -mattr=+zilsd -verify-machineinstrs < %s \
3+
; RUN: | FileCheck -check-prefixes=CHECK,SLOW %s
4+
; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+unaligned-scalar-mem -verify-machineinstrs < %s \
5+
; RUN: | FileCheck -check-prefixes=CHECK,FAST %s
6+
7+
define i64 @load(ptr %a) nounwind {
8+
; CHECK-LABEL: load:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: ld a2, 80(a0)
11+
; CHECK-NEXT: ld a0, 0(a0)
12+
; CHECK-NEXT: mv a0, a2
13+
; CHECK-NEXT: mv a1, a3
14+
; CHECK-NEXT: ret
15+
%1 = getelementptr i64, ptr %a, i32 10
16+
%2 = load i64, ptr %1
17+
%3 = load volatile i64, ptr %a
18+
ret i64 %2
19+
}
20+
21+
define void @store(ptr %a, i64 %b) nounwind {
22+
; CHECK-LABEL: store:
23+
; CHECK: # %bb.0:
24+
; CHECK-NEXT: mv a3, a2
25+
; CHECK-NEXT: mv a2, a1
26+
; CHECK-NEXT: sd a2, 0(a0)
27+
; CHECK-NEXT: sd a2, 88(a0)
28+
; CHECK-NEXT: ret
29+
store i64 %b, ptr %a
30+
%1 = getelementptr i64, ptr %a, i32 11
31+
store i64 %b, ptr %1
32+
ret void
33+
}
34+
35+
define i64 @load_unaligned(ptr %p) {
36+
; SLOW-LABEL: load_unaligned:
37+
; SLOW: # %bb.0:
38+
; SLOW-NEXT: lbu a1, 1(a0)
39+
; SLOW-NEXT: lbu a2, 2(a0)
40+
; SLOW-NEXT: lbu a3, 3(a0)
41+
; SLOW-NEXT: lbu a4, 0(a0)
42+
; SLOW-NEXT: slli a1, a1, 8
43+
; SLOW-NEXT: slli a2, a2, 16
44+
; SLOW-NEXT: slli a3, a3, 24
45+
; SLOW-NEXT: or a1, a1, a4
46+
; SLOW-NEXT: lbu a4, 4(a0)
47+
; SLOW-NEXT: lbu a5, 5(a0)
48+
; SLOW-NEXT: or a2, a3, a2
49+
; SLOW-NEXT: lbu a3, 6(a0)
50+
; SLOW-NEXT: lbu a0, 7(a0)
51+
; SLOW-NEXT: slli a5, a5, 8
52+
; SLOW-NEXT: or a4, a5, a4
53+
; SLOW-NEXT: slli a3, a3, 16
54+
; SLOW-NEXT: slli a0, a0, 24
55+
; SLOW-NEXT: or a3, a0, a3
56+
; SLOW-NEXT: or a0, a2, a1
57+
; SLOW-NEXT: or a1, a3, a4
58+
; SLOW-NEXT: ret
59+
;
60+
; FAST-LABEL: load_unaligned:
61+
; FAST: # %bb.0:
62+
; FAST-NEXT: ld a0, 0(a0)
63+
; FAST-NEXT: ret
64+
%res = load i64, ptr %p, align 1
65+
ret i64 %res
66+
}
67+
68+
define void @store_unaligned(ptr %p, i64 %v) {
69+
; SLOW-LABEL: store_unaligned:
70+
; SLOW: # %bb.0:
71+
; SLOW-NEXT: srli a3, a2, 24
72+
; SLOW-NEXT: srli a4, a2, 16
73+
; SLOW-NEXT: srli a5, a2, 8
74+
; SLOW-NEXT: srli a6, a1, 24
75+
; SLOW-NEXT: srli a7, a1, 16
76+
; SLOW-NEXT: sb a2, 4(a0)
77+
; SLOW-NEXT: sb a5, 5(a0)
78+
; SLOW-NEXT: sb a4, 6(a0)
79+
; SLOW-NEXT: sb a3, 7(a0)
80+
; SLOW-NEXT: srli a2, a1, 8
81+
; SLOW-NEXT: sb a1, 0(a0)
82+
; SLOW-NEXT: sb a2, 1(a0)
83+
; SLOW-NEXT: sb a7, 2(a0)
84+
; SLOW-NEXT: sb a6, 3(a0)
85+
; SLOW-NEXT: ret
86+
;
87+
; FAST-LABEL: store_unaligned:
88+
; FAST: # %bb.0:
89+
; FAST-NEXT: mv a3, a2
90+
; FAST-NEXT: mv a2, a1
91+
; FAST-NEXT: sd a2, 0(a0)
92+
; FAST-NEXT: ret
93+
store i64 %v, ptr %p, align 1
94+
ret void
95+
}
96+
97+
@g = dso_local global i64 0, align 8
98+
99+
define i64 @load_g() nounwind {
100+
; CHECK-LABEL: load_g:
101+
; CHECK: # %bb.0: # %entry
102+
; CHECK-NEXT: lui a0, %hi(g)
103+
; CHECK-NEXT: ld a0, %lo(g)(a0)
104+
; CHECK-NEXT: ret
105+
entry:
106+
%0 = load i64, ptr @g
107+
ret i64 %0
108+
}
109+
110+
define void @store_g() nounwind {
111+
; CHECK-LABEL: store_g:
112+
; CHECK: # %bb.0: # %entyr
113+
; CHECK-NEXT: li a0, 0
114+
; CHECK-NEXT: lui a2, %hi(g)
115+
; CHECK-NEXT: li a1, 0
116+
; CHECK-NEXT: sd a0, %lo(g)(a2)
117+
; CHECK-NEXT: ret
118+
entyr:
119+
store i64 0, ptr @g
120+
ret void
121+
}

0 commit comments

Comments
 (0)