-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Preserve MMO when expanding PseudoRV32ZdinxSD/PseudoRV32ZdinxLD. #85877
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
9b8feb6
to
5ff25a2
Compare
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis allows the asm printer to print the stack spill/reload messages. Stacked on #85871 Patch is 134.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85877.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 0a314fdd41cbe2..080a37c9a05ed9 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -312,26 +312,40 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even);
Register Hi =
TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd);
- BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
- .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill()))
- .addReg(MBBI->getOperand(1).getReg())
- .add(MBBI->getOperand(2));
+ auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+ .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill()))
+ .addReg(MBBI->getOperand(1).getReg())
+ .add(MBBI->getOperand(2));
+
+ MachineMemOperand *MMOHi = nullptr;
+ if (MBBI->hasOneMemOperand()) {
+ MachineMemOperand *OldMMO = MBBI->memoperands().front();
+ MachineFunction *MF = MBB.getParent();
+ MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4);
+ MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4);
+ MIBLo.setMemRefs(MMOLo);
+ }
+
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
// FIXME: Zdinx RV32 can not work on unaligned memory.
assert(!STI->hasFastUnalignedAccess());
assert(MBBI->getOperand(2).getOffset() % 8 == 0);
MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);
- BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
- .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
- .add(MBBI->getOperand(1))
- .add(MBBI->getOperand(2));
+ auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+ .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
+ .add(MBBI->getOperand(1))
+ .add(MBBI->getOperand(2));
+ if (MMOHi)
+ MIBHi.setMemRefs(MMOHi);
} else {
assert(isInt<12>(MBBI->getOperand(2).getImm() + 4));
- BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
- .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
- .add(MBBI->getOperand(1))
- .addImm(MBBI->getOperand(2).getImm() + 4);
+ auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+ .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
+ .add(MBBI->getOperand(1))
+ .addImm(MBBI->getOperand(2).getImm() + 4);
+ if (MMOHi)
+ MIBHi.setMemRefs(MMOHi);
}
MBBI->eraseFromParent();
return true;
@@ -349,36 +363,53 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
Register Hi =
TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd);
+ MachineMemOperand *MMOLo = nullptr;
+ MachineMemOperand *MMOHi = nullptr;
+ if (MBBI->hasOneMemOperand()) {
+ MachineMemOperand *OldMMO = MBBI->memoperands().front();
+ MachineFunction *MF = MBB.getParent();
+ MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4);
+ MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4);
+ }
+
// If the register of operand 1 is equal to the Lo register, then swap the
// order of loading the Lo and Hi statements.
bool IsOp1EqualToLo = Lo == MBBI->getOperand(1).getReg();
// Order: Lo, Hi
if (!IsOp1EqualToLo) {
- BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
- .addReg(MBBI->getOperand(1).getReg())
- .add(MBBI->getOperand(2));
+ auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
+ .addReg(MBBI->getOperand(1).getReg())
+ .add(MBBI->getOperand(2));
+ if (MMOLo)
+ MIBLo.setMemRefs(MMOLo);
}
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
auto Offset = MBBI->getOperand(2).getOffset();
assert(MBBI->getOperand(2).getOffset() % 8 == 0);
MBBI->getOperand(2).setOffset(Offset + 4);
- BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
- .addReg(MBBI->getOperand(1).getReg())
- .add(MBBI->getOperand(2));
+ auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
+ .addReg(MBBI->getOperand(1).getReg())
+ .add(MBBI->getOperand(2));
MBBI->getOperand(2).setOffset(Offset);
+ if (MMOHi)
+ MIBHi.setMemRefs(MMOHi);
} else {
assert(isInt<12>(MBBI->getOperand(2).getImm() + 4));
- BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
- .addReg(MBBI->getOperand(1).getReg())
- .addImm(MBBI->getOperand(2).getImm() + 4);
+ auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
+ .addReg(MBBI->getOperand(1).getReg())
+ .addImm(MBBI->getOperand(2).getImm() + 4);
+ if (MMOHi)
+ MIBHi.setMemRefs(MMOHi);
}
// Order: Hi, Lo
if (IsOp1EqualToLo) {
- BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
- .addReg(MBBI->getOperand(1).getReg())
- .add(MBBI->getOperand(2));
+ auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
+ .addReg(MBBI->getOperand(1).getReg())
+ .add(MBBI->getOperand(2));
+ if (MMOLo)
+ MIBLo.setMemRefs(MMOLo);
}
MBBI->eraseFromParent();
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index a68674b221d38e..881aab955f7d0b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -431,29 +431,35 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
if (!IsRVVSpill) {
- if (MI.getOpcode() == RISCV::ADDI && !isInt<12>(Offset.getFixed())) {
+ int64_t Val = Offset.getFixed();
+ int64_t Lo12 = SignExtend64<12>(Val);
+ unsigned Opc = MI.getOpcode();
+ if (Opc == RISCV::ADDI && !isInt<12>(Val)) {
// We chose to emit the canonical immediate sequence rather than folding
// the offset into the using add under the theory that doing so doesn't
// save dynamic instruction count and some target may fuse the canonical
// 32 bit immediate sequence. We still need to clear the portion of the
// offset encoded in the immediate.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+ } else if ((Opc == RISCV::PREFETCH_I || Opc == RISCV::PREFETCH_R ||
+ Opc == RISCV::PREFETCH_W) &&
+ (Lo12 & 0b11111) != 0) {
+ // Prefetch instructions require the offset to be 32 byte aligned.
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+ } else if ((Opc == RISCV::PseudoRV32ZdinxLD ||
+ Opc == RISCV::PseudoRV32ZdinxSD) &&
+ Lo12 >= 2044) {
+ // This instruction will be split into 2 instructions. The second
+ // instruction will add 4 to the immediate. If that would overflow 12
+ // bits, we can't fold the offset.
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
} else {
// We can encode an add with 12 bit signed immediate in the immediate
// operand of our user instruction. As a result, the remaining
// offset can by construction, at worst, a LUI and a ADD.
- int64_t Val = Offset.getFixed();
- int64_t Lo12 = SignExtend64<12>(Val);
- if ((MI.getOpcode() == RISCV::PREFETCH_I ||
- MI.getOpcode() == RISCV::PREFETCH_R ||
- MI.getOpcode() == RISCV::PREFETCH_W) &&
- (Lo12 & 0b11111) != 0)
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
- else {
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
- Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
- Offset.getScalable());
- }
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
+ Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
+ Offset.getScalable());
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 225b57554c1dc0..9da1f73681c68c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -573,7 +573,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
}
let RegInfos = RegInfoByHwMode<[RV32, RV64],
- [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>,
+ [RegInfo<64, 64, 32>, RegInfo<128, 128, 64>]>,
DecoderMethod = "DecodeGPRPairRegisterClass" in
def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add
X10_X11, X12_X13, X14_X15, X16_X17,
diff --git a/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll b/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll
new file mode 100644
index 00000000000000..0abf49814db248
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll
@@ -0,0 +1,2873 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zdinx | FileCheck %s
+
+; Generate over 2048 bytes of spills by load a bunch of values and then forcing
+; all GPRs to be spilled via inline assembly that clobbers all registes. We
+; want to make sure eliminateFrameIndex doesn't fold sp+2044 as an offset in a
+; GPR pair spill instruction. When we split the pair spill, we would be unable
+; to add 4 to the immediate without overflowing simm12.
+
+; 2040(sp) should be the largest offset we have.
+
+define void @foo(ptr nocapture noundef %0) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -2032
+; CHECK-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s1, 2020(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s2, 2016(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s3, 2012(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s4, 2008(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s5, 2004(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s6, 2000(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s7, 1996(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s8, 1992(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s9, 1988(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s10, 1984(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s11, 1980(sp) # 4-byte Folded Spill
+; CHECK-NEXT: addi sp, sp, -80
+; CHECK-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 0(a0)
+; CHECK-NEXT: lw a3, 4(a0)
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: sw a2, -2044(a1) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, -2040(a1) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 8(a0)
+; CHECK-NEXT: lw a3, 12(a0)
+; CHECK-NEXT: addi a1, sp, 2044
+; CHECK-NEXT: sw a2, 0(a1) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 4(a1) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 16(a0)
+; CHECK-NEXT: lw a3, 20(a0)
+; CHECK-NEXT: sw a2, 2036(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 2040(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 24(a0)
+; CHECK-NEXT: lw a3, 28(a0)
+; CHECK-NEXT: sw a2, 2028(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 2032(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 32(a0)
+; CHECK-NEXT: lw a3, 36(a0)
+; CHECK-NEXT: sw a2, 2020(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 2024(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 40(a0)
+; CHECK-NEXT: lw a3, 44(a0)
+; CHECK-NEXT: sw a2, 2012(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 2016(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 48(a0)
+; CHECK-NEXT: lw a3, 52(a0)
+; CHECK-NEXT: sw a2, 2004(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 2008(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 56(a0)
+; CHECK-NEXT: lw a3, 60(a0)
+; CHECK-NEXT: sw a2, 1996(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 2000(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 64(a0)
+; CHECK-NEXT: lw a3, 68(a0)
+; CHECK-NEXT: sw a2, 1988(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1992(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 72(a0)
+; CHECK-NEXT: lw a3, 76(a0)
+; CHECK-NEXT: sw a2, 1980(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1984(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 80(a0)
+; CHECK-NEXT: lw a3, 84(a0)
+; CHECK-NEXT: sw a2, 1972(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1976(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 88(a0)
+; CHECK-NEXT: lw a3, 92(a0)
+; CHECK-NEXT: sw a2, 1964(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1968(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 96(a0)
+; CHECK-NEXT: lw a3, 100(a0)
+; CHECK-NEXT: sw a2, 1956(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1960(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 104(a0)
+; CHECK-NEXT: lw a3, 108(a0)
+; CHECK-NEXT: sw a2, 1948(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1952(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 112(a0)
+; CHECK-NEXT: lw a3, 116(a0)
+; CHECK-NEXT: sw a2, 1940(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1944(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 120(a0)
+; CHECK-NEXT: lw a3, 124(a0)
+; CHECK-NEXT: sw a2, 1932(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1936(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 128(a0)
+; CHECK-NEXT: lw a3, 132(a0)
+; CHECK-NEXT: sw a2, 1924(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1928(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 136(a0)
+; CHECK-NEXT: lw a3, 140(a0)
+; CHECK-NEXT: sw a2, 1916(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1920(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 144(a0)
+; CHECK-NEXT: lw a3, 148(a0)
+; CHECK-NEXT: sw a2, 1908(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1912(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 152(a0)
+; CHECK-NEXT: lw a3, 156(a0)
+; CHECK-NEXT: sw a2, 1900(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1904(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 160(a0)
+; CHECK-NEXT: lw a3, 164(a0)
+; CHECK-NEXT: sw a2, 1892(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1896(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 168(a0)
+; CHECK-NEXT: lw a3, 172(a0)
+; CHECK-NEXT: sw a2, 1884(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1888(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 176(a0)
+; CHECK-NEXT: lw a3, 180(a0)
+; CHECK-NEXT: sw a2, 1876(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1880(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 184(a0)
+; CHECK-NEXT: lw a3, 188(a0)
+; CHECK-NEXT: sw a2, 1868(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1872(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 192(a0)
+; CHECK-NEXT: lw a3, 196(a0)
+; CHECK-NEXT: sw a2, 1860(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1864(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 200(a0)
+; CHECK-NEXT: lw a3, 204(a0)
+; CHECK-NEXT: sw a2, 1852(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1856(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 208(a0)
+; CHECK-NEXT: lw a3, 212(a0)
+; CHECK-NEXT: sw a2, 1844(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1848(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 216(a0)
+; CHECK-NEXT: lw a3, 220(a0)
+; CHECK-NEXT: sw a2, 1836(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1840(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 224(a0)
+; CHECK-NEXT: lw a3, 228(a0)
+; CHECK-NEXT: sw a2, 1828(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1832(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 232(a0)
+; CHECK-NEXT: lw a3, 236(a0)
+; CHECK-NEXT: sw a2, 1820(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1824(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 240(a0)
+; CHECK-NEXT: lw a3, 244(a0)
+; CHECK-NEXT: sw a2, 1812(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1816(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 248(a0)
+; CHECK-NEXT: lw a3, 252(a0)
+; CHECK-NEXT: sw a2, 1804(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1808(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 256(a0)
+; CHECK-NEXT: lw a3, 260(a0)
+; CHECK-NEXT: sw a2, 1796(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1800(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 264(a0)
+; CHECK-NEXT: lw a3, 268(a0)
+; CHECK-NEXT: sw a2, 1788(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1792(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 272(a0)
+; CHECK-NEXT: lw a3, 276(a0)
+; CHECK-NEXT: sw a2, 1780(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1784(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 280(a0)
+; CHECK-NEXT: lw a3, 284(a0)
+; CHECK-NEXT: sw a2, 1772(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1776(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 288(a0)
+; CHECK-NEXT: lw a3, 292(a0)
+; CHECK-NEXT: sw a2, 1764(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1768(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 296(a0)
+; CHECK-NEXT: lw a3, 300(a0)
+; CHECK-NEXT: sw a2, 1756(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1760(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 304(a0)
+; CHECK-NEXT: lw a3, 308(a0)
+; CHECK-NEXT: sw a2, 1748(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1752(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 312(a0)
+; CHECK-NEXT: lw a3, 316(a0)
+; CHECK-NEXT: sw a2, 1740(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1744(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 320(a0)
+; CHECK-NEXT: lw a3, 324(a0)
+; CHECK-NEXT: sw a2, 1732(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1736(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 328(a0)
+; CHECK-NEXT: lw a3, 332(a0)
+; CHECK-NEXT: sw a2, 1724(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1728(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 336(a0)
+; CHECK-NEXT: lw a3, 340(a0)
+; CHECK-NEXT: sw a2, 1716(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1720(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 344(a0)
+; CHECK-NEXT: lw a3, 348(a0)
+; CHECK-NEXT: sw a2, 1708(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1712(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 352(a0)
+; CHECK-NEXT: lw a3, 356(a0)
+; CHECK-NEXT: sw a2, 1700(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1704(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 360(a0)
+; CHECK-NEXT: lw a3, 364(a0)
+; CHECK-NEXT: sw a2, 1692(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1696(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 368(a0)
+; CHECK-NEXT: lw a3, 372(a0)
+; CHECK-NEXT: sw a2, 1684(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1688(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 376(a0)
+; CHECK-NEXT: lw a3, 380(a0)
+; CHECK-NEXT: sw a2, 1676(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1680(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 384(a0)
+; CHECK-NEXT: lw a3, 388(a0)
+; CHECK-NEXT: sw a2, 1668(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1672(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 392(a0)
+; CHECK-NEXT: lw a3, 396(a0)
+; CHECK-NEXT: sw a2, 1660(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1664(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 400(a0)
+; CHECK-NEXT: lw a3, 404(a0)
+; CHECK-NEXT: sw a2, 1652(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1656(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 408(a0)
+; CHECK-NEXT: lw a3, 412(a0)
+; CHECK-NEXT: sw a2, 1644(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1648(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 416(a0)
+; CHECK-NEXT: lw a3, 420(a0)
+; CHECK-NEXT: sw a2, 1636(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1640(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 424(a0)
+; CHECK-NEXT: lw a3, 428(a0)
+; CHECK-NEXT: sw a2, 1628(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1632(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 432(a0)
+; CHECK-NEXT: lw a3, 436(a0)
+; CHECK-NEXT: sw a2, 1620(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 1624(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 440(a0)
+; CHECK-NEXT: lw a3, 444(a0)
+; CHECK-NEXT: sw a2...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
.addReg(MBBI->getOperand(1).getReg()) | ||
.add(MBBI->getOperand(2)); | ||
|
||
MachineMemOperand *MMOHi = nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make this a list (can be empty) so we don't need to know if MMOHi
is nullptr and chains setMemRefs
directly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess that would have to be SmallVector so we can push into it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes! Just a code style suggestion here.
auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo) | ||
.addReg(MBBI->getOperand(1).getReg()) | ||
.add(MBBI->getOperand(2)); | ||
MIBLo.setMemRefs(MMOLo); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove MIBLo
and chain the call setMemRefs
after BuildMI
?
This allows the asm printer to print the stack spill/reload messages.
It's a bug if its not so let's just assume it to simplify the code.
4c981dc
to
30cdcde
Compare
@wangpc-pp ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
We have issues with the assertion going off. Not sure, but it seems to that the machine outliner pass will create instructions without mem operands
|
|
This allows the asm printer to print the stack spill/reload messages.
Stacked on #85871