-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Fix more boundary cases in immediate selection for Zdinx load/store on RV32. #105874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…/store on RV32. In order to support -unaligned-scalar-mem properly, we need to be more careful with immediates of global variables. We need to guarantee that adding 4 in RISCVExpandingPseudos won't overflow simm12. Since we don't know what the simm12 is until link time, the only way to guarantee this is to make sure the base address is at least 8 byte aligned. There were also several corner cases bugs in immediate folding where we would fold an immediate in the range [2044,2047] where adding 4 would overflow. I have no interest in Zdinx. I only wanted to remove the assert for unaligned scalar memory. Having an assert was a bad way to handle that since asserts aren't present in release builds.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesIn order to support -unaligned-scalar-mem properly, we need to be more careful with immediates of global variables. We need to guarantee that adding 4 in RISCVExpandingPseudos won't overflow simm12. Since we don't know what the simm12 is until link time, the only way to guarantee this is to make sure the base address is at least 8 byte aligned. There were also several corner cases bugs in immediate folding where we would fold an immediate in the range [2044,2047] where adding 4 would overflow. These are not related to unaligned-scalar-mem. I have no interest in Zdinx. I only wanted to remove the assert for unaligned scalar memory. Having an assert was a bad way to handle that since asserts aren't present in release builds. Patch is 22.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105874.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 9b9e870fb61d9c..72f96965ae9857 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -291,9 +291,6 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
.setMemRefs(MMOLo);
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
- // FIXME: Zdinx RV32 can not work on unaligned scalar memory.
- assert(!STI->enableUnalignedScalarMem());
-
assert(MBBI->getOperand(2).getOffset() % 8 == 0);
MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
@@ -344,7 +341,7 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
auto Offset = MBBI->getOperand(2).getOffset();
- assert(MBBI->getOperand(2).getOffset() % 8 == 0);
+ assert(Offset % 8 == 0);
MBBI->getOperand(2).setOffset(Offset + 4);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
.addReg(MBBI->getOperand(1).getReg())
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 58f8dc4970282c..87baa13188db70 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2522,7 +2522,8 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
const MVT VT, const RISCVSubtarget *Subtarget,
SDValue Addr, SDValue &Base, SDValue &Offset,
- bool IsPrefetch = false) {
+ bool IsPrefetch = false,
+ bool IsRV32Zdinx = false) {
if (!isa<ConstantSDNode>(Addr))
return false;
@@ -2536,6 +2537,8 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
if (IsPrefetch && (Lo12 & 0b11111) != 0)
return false;
+ if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
+ return false;
if (Hi) {
int64_t Hi20 = (Hi >> 12) & 0xfffff;
@@ -2560,6 +2563,8 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
Lo12 = Seq.back().getImm();
if (IsPrefetch && (Lo12 & 0b11111) != 0)
return false;
+ if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
+ return false;
// Drop the last instruction.
Seq.pop_back();
@@ -2649,7 +2654,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
}
bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
- SDValue &Offset, bool IsINX) {
+ SDValue &Offset, bool IsRV32Zdinx) {
if (SelectAddrFrameIndex(Addr, Base, Offset))
return true;
@@ -2657,12 +2662,36 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
MVT VT = Addr.getSimpleValueType();
if (Addr.getOpcode() == RISCVISD::ADD_LO) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
+ // If this is non RV32Zdinx we can always fold.
+ if (!IsRV32Zdinx) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4
+ // to the offset when we expandin RISCVExpandPseudoInsts.
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
+ const DataLayout &DL = CurDAG->getDataLayout();
+ Align Alignment = commonAlignment(
+ GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
+ if (Alignment > 4) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+ }
+ if (auto *CP = dyn_cast<ConstantPoolSDNode>(Addr.getOperand(1))) {
+ Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
+ if (Alignment > 4) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+ }
}
- int64_t RV32ZdinxRange = IsINX ? 4 : 0;
+ int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0;
if (CurDAG->isBaseWithConstantOffset(Addr)) {
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
@@ -2678,7 +2707,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
const DataLayout &DL = CurDAG->getDataLayout();
Align Alignment = commonAlignment(
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
- if (CVal == 0 || Alignment > CVal) {
+ if ((CVal == 0 || Alignment > CVal) &&
+ (!IsRV32Zdinx || Alignment > (CVal + 4))) {
int64_t CombinedOffset = CVal + GA->getOffset();
Base = Base.getOperand(0);
Offset = CurDAG->getTargetGlobalAddress(
@@ -2705,7 +2735,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
// Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
// an ADDI for part of the offset and fold the rest into the load/store.
// This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
- if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
+ if (CVal >= -4096 && CVal < (4094 - RV32ZdinxRange)) {
int64_t Adj = CVal < 0 ? -2048 : 2047;
Base = SDValue(
CurDAG->getMachineNode(
@@ -2724,7 +2754,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
// instructions.
if (isWorthFoldingAdd(Addr) &&
selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
- Offset)) {
+ Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) {
// Insert an ADD instruction with the materialized Hi52 bits.
Base = SDValue(
CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
@@ -2733,7 +2763,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
}
}
- if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
+ /*IsPrefetch=*/false, RV32ZdinxRange))
return true;
Base = Addr;
@@ -2791,7 +2822,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
}
if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
- Offset, true)) {
+ Offset, /*IsPrefetch=*/true)) {
// Insert an ADD instruction with the materialized Hi52 bits.
Base = SDValue(
CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
@@ -2800,7 +2831,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
}
}
- if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
+ /*IsPrefetch=*/true))
return true;
Base = Addr;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 1b3b00eeccce8b..6dfaee0bcf8d4e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -48,8 +48,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset,
- bool IsINX = false);
- bool SelectAddrRegImmINX(SDValue Addr, SDValue &Base, SDValue &Offset) {
+ bool IsRV32Zdinx = false);
+ bool SelectAddrRegImmRV32Zdinx(SDValue Addr, SDValue &Base, SDValue &Offset) {
return SelectAddrRegImm(Addr, Base, Offset, true);
}
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 57c18791cc43b2..ed0ad27ac9d29f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -25,7 +25,7 @@ def SDT_RISCVSplitF64 : SDTypeProfile<2, 1, [SDTCisVT<0, i32>,
def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>;
def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>;
-def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmINX">;
+def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmRV32Zdinx">;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
diff --git a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
index f56d47716bd781..01ecaee3d7e7b6 100644
--- a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
+++ b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+zdinx -verify-machineinstrs < %s \
; RUN: -target-abi=ilp32 | FileCheck -check-prefix=RV32ZDINX %s
+; RUN: llc -mtriple=riscv32 -mattr=+zdinx,+unaligned-scalar-mem -verify-machineinstrs < %s \
+; RUN: -target-abi=ilp32 | FileCheck -check-prefix=RV32ZDINXUALIGNED %s
; RUN: llc -mtriple=riscv64 -mattr=+zdinx -verify-machineinstrs < %s \
; RUN: -target-abi=lp64 | FileCheck -check-prefix=RV64ZDINX %s
@@ -14,6 +16,15 @@ define void @foo(ptr nocapture %p, double %d) nounwind {
; RV32ZDINX-NEXT: sw a3, 1(a0)
; RV32ZDINX-NEXT: ret
;
+; RV32ZDINXUALIGNED-LABEL: foo:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: mv a3, a2
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: mv a2, a1
+; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0)
+; RV32ZDINXUALIGNED-NEXT: ret
+;
; RV64ZDINX-LABEL: foo:
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: sd a1, 2044(a0)
@@ -35,6 +46,16 @@ define void @foo2(ptr nocapture %p, double %d) nounwind {
; RV32ZDINX-NEXT: sw a3, 1(a0)
; RV32ZDINX-NEXT: ret
;
+; RV32ZDINXUALIGNED-LABEL: foo2:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: mv a3, a2
+; RV32ZDINXUALIGNED-NEXT: mv a2, a1
+; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a2
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0)
+; RV32ZDINXUALIGNED-NEXT: ret
+;
; RV64ZDINX-LABEL: foo2:
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: fadd.d a1, a1, a1
@@ -60,6 +81,16 @@ define void @foo3(ptr nocapture %p) nounwind {
; RV32ZDINX-NEXT: sw a3, 1(a0)
; RV32ZDINX-NEXT: ret
;
+; RV32ZDINXUALIGNED-LABEL: foo3:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(d)
+; RV32ZDINXUALIGNED-NEXT: lw a2, %lo(d)(a1)
+; RV32ZDINXUALIGNED-NEXT: lw a3, %lo(d+4)(a1)
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0)
+; RV32ZDINXUALIGNED-NEXT: ret
+;
; RV64ZDINX-LABEL: foo3:
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: lui a1, %hi(d)
@@ -87,6 +118,19 @@ define void @foo4(ptr %p) nounwind {
; RV32ZDINX-NEXT: addi sp, sp, 16
; RV32ZDINX-NEXT: ret
;
+; RV32ZDINXUALIGNED-LABEL: foo4:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16
+; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1)
+; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1)
+; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp)
+; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(d)
+; RV32ZDINXUALIGNED-NEXT: sw a2, %lo(d)(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, %lo(d+4)(a0)
+; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16
+; RV32ZDINXUALIGNED-NEXT: ret
+;
; RV64ZDINX-LABEL: foo4:
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: addi sp, sp, -16
@@ -116,6 +160,15 @@ define void @foo5(ptr nocapture %p, double %d) nounwind {
; RV32ZDINX-NEXT: sw a3, 3(a0)
; RV32ZDINX-NEXT: ret
;
+; RV32ZDINXUALIGNED-LABEL: foo5:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: mv a3, a2
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, -2048
+; RV32ZDINXUALIGNED-NEXT: mv a2, a1
+; RV32ZDINXUALIGNED-NEXT: sw a2, -1(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 3(a0)
+; RV32ZDINXUALIGNED-NEXT: ret
+;
; RV64ZDINX-LABEL: foo5:
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: addi a0, a0, -2048
@@ -141,6 +194,19 @@ define void @foo6(ptr %p, double %d) nounwind {
; RV32ZDINX-NEXT: sw a3, 1(a0)
; RV32ZDINX-NEXT: ret
;
+; RV32ZDINXUALIGNED-LABEL: foo6:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: lui a3, %hi(.LCPI5_0)
+; RV32ZDINXUALIGNED-NEXT: lw a4, %lo(.LCPI5_0)(a3)
+; RV32ZDINXUALIGNED-NEXT: lw a5, %lo(.LCPI5_0+4)(a3)
+; RV32ZDINXUALIGNED-NEXT: mv a3, a2
+; RV32ZDINXUALIGNED-NEXT: mv a2, a1
+; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a4
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0)
+; RV32ZDINXUALIGNED-NEXT: ret
+;
; RV64ZDINX-LABEL: foo6:
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: lui a2, %hi(.LCPI5_0)
@@ -154,3 +220,276 @@ entry:
store double %add, ptr %add.ptr, align 8
ret void
}
+
+define void @foo7(ptr nocapture %p) nounwind {
+; RV32ZDINX-LABEL: foo7:
+; RV32ZDINX: # %bb.0: # %entry
+; RV32ZDINX-NEXT: addi sp, sp, -16
+; RV32ZDINX-NEXT: lui a1, %hi(d)
+; RV32ZDINX-NEXT: lw a2, %lo(d+4)(a1)
+; RV32ZDINX-NEXT: addi a1, a1, %lo(d)
+; RV32ZDINX-NEXT: sw a2, 8(sp)
+; RV32ZDINX-NEXT: lw a1, 8(a1)
+; RV32ZDINX-NEXT: sw a1, 12(sp)
+; RV32ZDINX-NEXT: lw a2, 8(sp)
+; RV32ZDINX-NEXT: lw a3, 12(sp)
+; RV32ZDINX-NEXT: addi a0, a0, 2047
+; RV32ZDINX-NEXT: sw a2, -3(a0)
+; RV32ZDINX-NEXT: sw a3, 1(a0)
+; RV32ZDINX-NEXT: addi sp, sp, 16
+; RV32ZDINX-NEXT: ret
+;
+; RV32ZDINXUALIGNED-LABEL: foo7:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(d)
+; RV32ZDINXUALIGNED-NEXT: addi a1, a1, %lo(d)
+; RV32ZDINXUALIGNED-NEXT: lw a2, 4(a1)
+; RV32ZDINXUALIGNED-NEXT: lw a3, 8(a1)
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0)
+; RV32ZDINXUALIGNED-NEXT: ret
+;
+; RV64ZDINX-LABEL: foo7:
+; RV64ZDINX: # %bb.0: # %entry
+; RV64ZDINX-NEXT: lui a1, %hi(d)
+; RV64ZDINX-NEXT: addi a2, a1, %lo(d)
+; RV64ZDINX-NEXT: lwu a2, 8(a2)
+; RV64ZDINX-NEXT: lwu a1, %lo(d+4)(a1)
+; RV64ZDINX-NEXT: slli a2, a2, 32
+; RV64ZDINX-NEXT: or a1, a2, a1
+; RV64ZDINX-NEXT: sd a1, 2044(a0)
+; RV64ZDINX-NEXT: ret
+entry:
+ %p2 = getelementptr inbounds i8, ptr @d, i32 4
+ %0 = load double, ptr %p2, align 4
+ %add.ptr = getelementptr inbounds i8, ptr %p, i64 2044
+ store double %0, ptr %add.ptr, align 8
+ ret void
+}
+
+define void @foo8(ptr %p) nounwind {
+; RV32ZDINX-LABEL: foo8:
+; RV32ZDINX: # %bb.0: # %entry
+; RV32ZDINX-NEXT: addi sp, sp, -16
+; RV32ZDINX-NEXT: addi a1, a0, 2047
+; RV32ZDINX-NEXT: lw a2, -3(a1)
+; RV32ZDINX-NEXT: lw a3, 1(a1)
+; RV32ZDINX-NEXT: sw a0, 8(sp)
+; RV32ZDINX-NEXT: sw a2, 0(sp)
+; RV32ZDINX-NEXT: sw a3, 4(sp)
+; RV32ZDINX-NEXT: lw a0, 4(sp)
+; RV32ZDINX-NEXT: lui a1, %hi(d)
+; RV32ZDINX-NEXT: addi a2, a1, %lo(d)
+; RV32ZDINX-NEXT: sw a0, 8(a2)
+; RV32ZDINX-NEXT: lw a0, 0(sp)
+; RV32ZDINX-NEXT: sw a0, %lo(d+4)(a1)
+; RV32ZDINX-NEXT: addi sp, sp, 16
+; RV32ZDINX-NEXT: ret
+;
+; RV32ZDINXUALIGNED-LABEL: foo8:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16
+; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1)
+; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1)
+; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp)
+; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(d)
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(d)
+; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 8(a0)
+; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16
+; RV32ZDINXUALIGNED-NEXT: ret
+;
+; RV64ZDINX-LABEL: foo8:
+; RV64ZDINX: # %bb.0: # %entry
+; RV64ZDINX-NEXT: addi sp, sp, -16
+; RV64ZDINX-NEXT: ld a1, 2044(a0)
+; RV64ZDINX-NEXT: sd a0, 8(sp)
+; RV64ZDINX-NEXT: lui a0, %hi(d)
+; RV64ZDINX-NEXT: addi a2, a0, %lo(d)
+; RV64ZDINX-NEXT: sw a1, %lo(d+4)(a0)
+; RV64ZDINX-NEXT: srli a1, a1, 32
+; RV64ZDINX-NEXT: sw a1, 8(a2)
+; RV64ZDINX-NEXT: addi sp, sp, 16
+; RV64ZDINX-NEXT: ret
+entry:
+ %p.addr = alloca ptr, align 8
+ store ptr %p, ptr %p.addr, align 8
+ %0 = load ptr, ptr %p.addr, align 8
+ %add.ptr = getelementptr inbounds i8, ptr %0, i64 2044
+ %1 = load double, ptr %add.ptr, align 8
+ %p2 = getelementptr inbounds i8, ptr @d, i32 4
+ store double %1, ptr %p2, align 4
+ ret void
+}
+
+@e = global double 4.2, align 4
+
+define void @foo9(ptr nocapture %p) nounwind {
+; RV32ZDINX-LABEL: foo9:
+; RV32ZDINX: # %bb.0: # %entry
+; RV32ZDINX-NEXT: addi sp, sp, -16
+; RV32ZDINX-NEXT: lui a1, %hi(e)
+; RV32ZDINX-NEXT: lw a2, %lo(e)(a1)
+; RV32ZDINX-NEXT: sw a2, 8(sp)
+; RV32ZDINX-NEXT: addi a1, a1, %lo(e)
+; RV32ZDINX-NEXT: lw a1, 4(a1)
+; RV32ZDINX-NEXT: sw a1, 12(sp)
+; RV32ZDINX-NEXT: lw a2, 8(sp)
+; RV32ZDINX-NEXT: lw a3, 12(sp)
+; RV32ZDINX-NEXT: addi a0, a0, 2047
+; RV32ZDINX-NEXT: sw a2, -3(a0)
+; RV32ZDINX-NEXT: sw a3, 1(a0)
+; RV32ZDINX-NEXT: addi sp, sp, 16
+; RV32ZDINX-NEXT: ret
+;
+; RV32ZDINXUALIGNED-LABEL: foo9:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(e)
+; RV32ZDINXUALIGNED-NEXT: addi a1, a1, %lo(e)
+; RV32ZDINXUALIGNED-NEXT: lw a2, 0(a1)
+; RV32ZDINXUALIGNED-NEXT: lw a3, 4(a1)
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0)
+; RV32ZDINXUALIGNED-NEXT: ret
+;
+; RV64ZDINX-LABEL: foo9:
+; RV64ZDINX: # %bb.0: # %entry
+; RV64ZDINX-NEXT: lui a1, %hi(e)
+; RV64ZDINX-NEXT: addi a2, a1, %lo(e)
+; RV64ZDINX-NEXT: lwu a2, 4(a2)
+; RV64ZDINX-NEXT: lwu a1, %lo(e)(a1)
+; RV64ZDINX-NEXT: slli a2, a2, 32
+; RV64ZDINX-NEXT: or a1, a2, a1
+; RV64ZDINX-NEXT: sd a1, 2044(a0)
+; RV64ZDINX-NEXT: ret
+entry:
+ %0 = load double, ptr @e, align 4
+ %add.ptr = getelementptr inbounds i8, ptr %p, i64 2044
+ store double %0, ptr %add.ptr, align 8
+ ret void
+}
+
+define void @foo10(ptr %p) nounwind {
+; RV32ZDINX-LABEL: foo10:
+; RV32ZDINX: # %bb.0: # %entry
+; RV32ZDINX-NEXT: addi sp, sp, -16
+; RV32ZDINX-NEXT: addi a1, a0, 2047
+; RV32ZDINX-NEXT: lw a2, -3(a1)
+; RV32ZDINX-NEXT: lw a3, 1(a1)
+; RV32ZDINX-NEXT: sw a0, 8(sp)
+; RV32ZDINX-NEXT: sw a2, 0(sp)
+; RV32ZDINX-NEXT: sw a3, 4(sp)
+; RV32ZDINX-NEXT: lw a0, 4(sp)
+; RV32ZDINX-NEXT: lui a1, %hi(e)
+; RV32ZDINX-NEXT: addi a2, a1, %lo(e)
+; RV32ZDINX-NEXT: sw a0, 4(a2)
+; RV32ZDINX-NEXT: lw a0, 0(sp)
+; RV32ZDINX-NEXT: sw a0, %lo(e)(a1)
+; RV32ZDINX-NEXT: addi sp, sp, 16
+; RV32ZDINX-NEXT: ret
+;
+; RV32ZDINXUALIGNED-LABEL: foo10:
+; RV32ZDINXUALIGNED: # %bb.0: # %entry
+; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16
+; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047
+; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1)
+; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1)
+; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp)
+; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(e)
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(e)
+; RV32ZDINXUALIGNED-NEXT: sw a2, 0(a0)
+; RV32ZDINXUALIGNED-NEXT: sw a3, 4(a0)
+; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16
+; RV32ZDINXUALIGNED-NEXT: ret
+;
+; RV64ZDINX-LABEL: foo10:
+; RV64ZDINX: # %bb.0: # %entry
+; RV64ZDINX-NEXT: addi sp, sp, -16
+; RV64ZDINX-NEXT: ...
[truncated]
|
Co-authored-by: Yingwei Zheng <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
Co-authored-by: Yingwei Zheng <[email protected]>
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/2925 Here is the relevant piece of the build log for the reference
|
…913c7d296 Local branch amd-gfx add913c Merged main:675c748bb606d75a959481e6014299849cf3094c into amd-gfx:076897fe40b4 Remote branch main 5b41eb3 [RISCV] Fix more boundary cases in immediate selection for Zdinx load/store on RV32. (llvm#105874)
In order to support -unaligned-scalar-mem properly, we need to be more careful with immediates of global variables. We need to guarantee that adding 4 in RISCVExpandingPseudos won't overflow simm12. Since we don't know what the simm12 is until link time, the only way to guarantee this is to make sure the base address is at least 8 byte aligned.
There were also several corner cases bugs in immediate folding where we would fold an immediate in the range [2044,2047] where adding 4 would overflow. These are not related to unaligned-scalar-mem.
I have no interest in Zdinx. I only wanted to remove the assert for unaligned scalar memory. Having an assert was a bad way to handle that since asserts aren't present in release builds.