Skip to content

[RISCV] Lower i64 load/stores to ld/sd with Zilsd. #139808

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1626,6 +1626,51 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
break;
}
case RISCVISD::LD_RV32: {
assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");

SDValue Base, Offset;
SDValue Chain = Node->getOperand(0);
SDValue Addr = Node->getOperand(1);
SelectAddrRegImm(Addr, Base, Offset);

SDValue Ops[] = {Base, Offset, Chain};
MachineSDNode *New = CurDAG->getMachineNode(
RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
MVT::i32, SDValue(New, 0));
SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
MVT::i32, SDValue(New, 0));
CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
ReplaceUses(SDValue(Node, 0), Lo);
ReplaceUses(SDValue(Node, 1), Hi);
ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
CurDAG->RemoveDeadNode(Node);
return;
}
case RISCVISD::SD_RV32: {
SDValue Base, Offset;
SDValue Chain = Node->getOperand(0);
SDValue Addr = Node->getOperand(3);
SelectAddrRegImm(Addr, Base, Offset);

SDValue Ops[] = {
CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
Node->getOperand(1),
CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
Node->getOperand(2),
CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};

SDNode *RegPair = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
MVT::Untyped, Ops);
MachineSDNode *New =
CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
{SDValue(RegPair, 0), Base, Offset, Chain});
CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
Expand Down
55 changes: 51 additions & 4 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);

if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
}

if (Subtarget.is64Bit()) {
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);

Expand Down Expand Up @@ -7748,13 +7753,33 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::STORE: {
auto *Store = cast<StoreSDNode>(Op);
SDValue StoredVal = Store->getValue();
EVT VecTy = StoredVal.getValueType();
EVT VT = StoredVal.getValueType();
if (VT == MVT::i64) {
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (Store->isTruncatingStore())
return SDValue();

if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
return SDValue();

SDLoc DL(Op);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
DAG.getTargetConstant(0, DL, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
DAG.getTargetConstant(1, DL, MVT::i32));

return DAG.getMemIntrinsicNode(
RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
{Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
Store->getMemOperand());
}
// Handle normal vector tuple store.
if (VecTy.isRISCVVectorTuple()) {
if (VT.isRISCVVectorTuple()) {
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
unsigned NF = VecTy.getRISCVVectorTupleNumFields();
unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
unsigned NF = VT.getRISCVVectorTupleNumFields();
unsigned Sz = VT.getSizeInBits().getKnownMinValue();
unsigned NumElts = Sz / (NF * 8);
int Log2LMUL = Log2_64(NumElts) - 3;

Expand Down Expand Up @@ -13714,6 +13739,28 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// sext_inreg we emit for ADD/SUB/MUL/SLLI.
LoadSDNode *Ld = cast<LoadSDNode>(N);

if (N->getValueType(0) == MVT::i64) {
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
"Unexpected custom legalisation");

if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
return;

SDLoc DL(N);
SDValue Result = DAG.getMemIntrinsicNode(
RISCVISD::LD_RV32, DL,
DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
{Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
SDValue Lo = Result.getValue(0);
SDValue Hi = Result.getValue(1);
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
Results.append({Pair, Result.getValue(2)});
return;
}

assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");

SDLoc dl(N);
SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
Ld->getBasePtr(), Ld->getMemoryVT(),
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@
//
//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
// RISC-V specific DAG Nodes.
//===----------------------------------------------------------------------===//

def SDT_RISCV_LD_RV32
: SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<2>]>;
def SDT_RISCV_SD_RV32
: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<2>]>;

def riscv_ld_rv32 : RVSDNode<"LD_RV32", SDT_RISCV_LD_RV32,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def riscv_st_rv32 : RVSDNode<"SD_RV32", SDT_RISCV_SD_RV32,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
Expand Down
121 changes: 121 additions & 0 deletions llvm/test/CodeGen/RISCV/zilsd.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 -mattr=+zilsd -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,SLOW %s
; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+unaligned-scalar-mem -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,FAST %s

define i64 @load(ptr %a) nounwind {
; CHECK-LABEL: load:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a2, 80(a0)
; CHECK-NEXT: ld a0, 0(a0)
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: mv a1, a3
Comment on lines +10 to +13
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is correct, but do we need to adjust the spill weights to get something like

mv a5, a0
ld a0, 80(a5)
ld a2, 0(a5)

; CHECK-NEXT: ret
%1 = getelementptr i64, ptr %a, i32 10
%2 = load i64, ptr %1
%3 = load volatile i64, ptr %a
ret i64 %2
}

define void @store(ptr %a, i64 %b) nounwind {
; CHECK-LABEL: store:
; CHECK: # %bb.0:
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: sd a2, 0(a0)
; CHECK-NEXT: sd a2, 88(a0)
Comment on lines +24 to +27
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is correct and the moves are necessary due to the order of the parameters (the i64 isn't passed in an even-odd pair)

; CHECK-NEXT: ret
store i64 %b, ptr %a
%1 = getelementptr i64, ptr %a, i32 11
store i64 %b, ptr %1
ret void
}

define i64 @load_unaligned(ptr %p) {
; SLOW-LABEL: load_unaligned:
; SLOW: # %bb.0:
; SLOW-NEXT: lbu a1, 1(a0)
; SLOW-NEXT: lbu a2, 2(a0)
; SLOW-NEXT: lbu a3, 3(a0)
; SLOW-NEXT: lbu a4, 0(a0)
; SLOW-NEXT: slli a1, a1, 8
; SLOW-NEXT: slli a2, a2, 16
; SLOW-NEXT: slli a3, a3, 24
; SLOW-NEXT: or a1, a1, a4
; SLOW-NEXT: lbu a4, 4(a0)
; SLOW-NEXT: lbu a5, 5(a0)
; SLOW-NEXT: or a2, a3, a2
; SLOW-NEXT: lbu a3, 6(a0)
; SLOW-NEXT: lbu a0, 7(a0)
; SLOW-NEXT: slli a5, a5, 8
; SLOW-NEXT: or a4, a5, a4
; SLOW-NEXT: slli a3, a3, 16
; SLOW-NEXT: slli a0, a0, 24
; SLOW-NEXT: or a3, a0, a3
; SLOW-NEXT: or a0, a2, a1
; SLOW-NEXT: or a1, a3, a4
; SLOW-NEXT: ret
;
; FAST-LABEL: load_unaligned:
; FAST: # %bb.0:
; FAST-NEXT: ld a0, 0(a0)
; FAST-NEXT: ret
%res = load i64, ptr %p, align 1
ret i64 %res
}

define void @store_unaligned(ptr %p, i64 %v) {
; SLOW-LABEL: store_unaligned:
; SLOW: # %bb.0:
; SLOW-NEXT: srli a3, a2, 24
; SLOW-NEXT: srli a4, a2, 16
; SLOW-NEXT: srli a5, a2, 8
; SLOW-NEXT: srli a6, a1, 24
; SLOW-NEXT: srli a7, a1, 16
; SLOW-NEXT: sb a2, 4(a0)
; SLOW-NEXT: sb a5, 5(a0)
; SLOW-NEXT: sb a4, 6(a0)
; SLOW-NEXT: sb a3, 7(a0)
; SLOW-NEXT: srli a2, a1, 8
; SLOW-NEXT: sb a1, 0(a0)
; SLOW-NEXT: sb a2, 1(a0)
; SLOW-NEXT: sb a7, 2(a0)
; SLOW-NEXT: sb a6, 3(a0)
; SLOW-NEXT: ret
;
; FAST-LABEL: store_unaligned:
; FAST: # %bb.0:
; FAST-NEXT: mv a3, a2
; FAST-NEXT: mv a2, a1
; FAST-NEXT: sd a2, 0(a0)
; FAST-NEXT: ret
store i64 %v, ptr %p, align 1
ret void
}

@g = dso_local global i64 0, align 8

define i64 @load_g() nounwind {
; CHECK-LABEL: load_g:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a0, %hi(g)
; CHECK-NEXT: ld a0, %lo(g)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr @g
ret i64 %0
}

define void @store_g() nounwind {
; CHECK-LABEL: store_g:
; CHECK: # %bb.0: # %entyr
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: lui a2, %hi(g)
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: sd a0, %lo(g)(a2)
; CHECK-NEXT: ret
entyr:
store i64 0, ptr @g
ret void
}