Skip to content

Commit a99e055

Browse files
authored
[DAG] shouldReduceLoadWidth - add optional<unsigned> byte offset argument (#136723)
Based off feedback for #129695 - we need to be able to determine the load offset of smaller loads when trying to determine whether a multiple use load should be split (in particular for AVX subvector extractions). This patch adds a std::optional<unsigned> ByteOffset argument to shouldReduceLoadWidth calls for where we know the constant offset to allow targets to make use of it in future patches.
1 parent 500cccc commit a99e055

File tree

12 files changed

+57
-42
lines changed

12 files changed

+57
-42
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,9 +1823,13 @@ class TargetLoweringBase {
18231823
virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
18241824

18251825
/// Return true if it is profitable to reduce a load to a smaller type.
1826+
/// \p ByteOffset is only set if we know the pointer offset at compile time
1827+
/// otherwise we should assume that additional pointer math is required.
18261828
/// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1827-
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1828-
EVT NewVT) const {
1829+
/// Example: (i16 (trunc (srl (i32 (load x)), 16)) -> i16 load x+2
1830+
virtual bool shouldReduceLoadWidth(
1831+
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1832+
std::optional<unsigned> ByteOffset = std::nullopt) const {
18291833
// By default, assume that it is cheaper to extract a subvector from a wide
18301834
// vector load rather than creating multiple narrow vector loads.
18311835
if (NewVT.isVector() && !SDValue(Load, 0).hasOneUse())

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6693,7 +6693,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
66936693
!TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
66946694
return false;
66956695

6696-
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6696+
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
66976697
return false;
66986698

66996699
return true;
@@ -6704,9 +6704,11 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
67046704
unsigned ShAmt) {
67056705
if (!LDST)
67066706
return false;
6707+
67076708
// Only allow byte offsets.
67086709
if (ShAmt % 8)
67096710
return false;
6711+
const unsigned ByteShAmt = ShAmt / 8;
67106712

67116713
// Do not generate loads of non-round integer types since these can
67126714
// be expensive (and would be wrong if the type is not byte sized).
@@ -6730,8 +6732,6 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
67306732

67316733
// Ensure that this isn't going to produce an unsupported memory access.
67326734
if (ShAmt) {
6733-
assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6734-
const unsigned ByteShAmt = ShAmt / 8;
67356735
const Align LDSTAlign = LDST->getAlign();
67366736
const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
67376737
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
@@ -6771,7 +6771,7 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
67716771
Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
67726772
return false;
67736773

6774-
if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6774+
if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
67756775
return false;
67766776
} else {
67776777
assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
@@ -25268,9 +25268,12 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
2526825268

2526925269
// It's fine to use TypeSize here as we know the offset will not be negative.
2527025270
TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25271+
std::optional<unsigned> ByteOffset;
25272+
if (Offset.isFixed())
25273+
ByteOffset = Offset.getFixedValue();
2527125274

2527225275
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25273-
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
25276+
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
2527425277
return SDValue();
2527525278

2527625279
// The narrow load will be offset from the base address of the old load if

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4726,8 +4726,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
47264726
// for the narrowed load.
47274727
for (unsigned width = 8; width < origWidth; width *= 2) {
47284728
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4729-
if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4730-
continue;
47314729
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
47324730
// Avoid accessing any padding here for now (we could use memWidth
47334731
// instead of origWidth here otherwise).
@@ -4737,8 +4735,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
47374735
unsigned ptrOffset =
47384736
Layout.isLittleEndian() ? offset : memWidth - width - offset;
47394737
unsigned IsFast = 0;
4738+
assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
47404739
Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4741-
if (allowsMemoryAccess(
4740+
if (shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT,
4741+
ptrOffset / 8) &&
4742+
allowsMemoryAccess(
47424743
*DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
47434744
NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
47444745
IsFast) {
@@ -12176,24 +12177,27 @@ SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
1217612177

1217712178
ISD::LoadExtType ExtTy =
1217812179
ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12179-
if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
12180-
!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
12180+
if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
1218112181
return SDValue();
1218212182

12183+
std::optional<unsigned> ByteOffset;
1218312184
Align Alignment = OriginalLoad->getAlign();
1218412185
MachinePointerInfo MPI;
1218512186
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
1218612187
int Elt = ConstEltNo->getZExtValue();
12187-
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
12188-
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
12189-
Alignment = commonAlignment(Alignment, PtrOff);
12188+
ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12189+
MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12190+
Alignment = commonAlignment(Alignment, *ByteOffset);
1219012191
} else {
1219112192
// Discard the pointer info except the address space because the memory
1219212193
// operand can't represent this new access since the offset is variable.
1219312194
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
1219412195
Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
1219512196
}
1219612197

12198+
if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12199+
return SDValue();
12200+
1219712201
unsigned IsFast = 0;
1219812202
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
1219912203
OriginalLoad->getAddressSpace(), Alignment,

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16486,11 +16486,12 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1648616486
return false;
1648716487
}
1648816488

16489-
bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
16490-
ISD::LoadExtType ExtTy,
16491-
EVT NewVT) const {
16489+
bool AArch64TargetLowering::shouldReduceLoadWidth(
16490+
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
16491+
std::optional<unsigned> ByteOffset) const {
1649216492
// TODO: This may be worth removing. Check regression tests for diffs.
16493-
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
16493+
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
16494+
ByteOffset))
1649416495
return false;
1649516496

1649616497
// If we're reducing the load width in order to avoid having to use an extra

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,8 @@ class AArch64TargetLowering : public TargetLowering {
688688
MachineFunction &MF,
689689
unsigned Intrinsic) const override;
690690

691-
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
692-
EVT NewVT) const override;
691+
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
692+
std::optional<unsigned> ByteOffset) const override;
693693

694694
bool shouldRemoveRedundantExtend(SDValue Op) const override;
695695

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -819,11 +819,11 @@ bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
819819
return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
820820
}
821821

822-
bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
823-
ISD::LoadExtType ExtTy,
824-
EVT NewVT) const {
822+
bool AMDGPUTargetLowering::shouldReduceLoadWidth(
823+
SDNode *N, ISD::LoadExtType ExtTy, EVT NewVT,
824+
std::optional<unsigned> ByteOffset) const {
825825
// TODO: This may be worth removing. Check regression tests for diffs.
826-
if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
826+
if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT, ByteOffset))
827827
return false;
828828

829829
unsigned NewSize = NewVT.getStoreSizeInBits();

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,8 @@ class AMDGPUTargetLowering : public TargetLowering {
215215
bool isFPImmLegal(const APFloat &Imm, EVT VT,
216216
bool ForCodeSize) const override;
217217
bool ShouldShrinkFPConstant(EVT VT) const override;
218-
bool shouldReduceLoadWidth(SDNode *Load,
219-
ISD::LoadExtType ExtType,
220-
EVT ExtVT) const override;
218+
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT,
219+
std::optional<unsigned> ByteOffset) const override;
221220

222221
bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
223222
const MachineMemOperand &MMO) const final;

llvm/lib/Target/BPF/BPFISelLowering.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,9 @@ class BPFTargetLowering : public TargetLowering {
135135
// ctx = ctx + reloc_offset
136136
// ... (*(u8 *)(ctx + 1)) & 0x80 ...
137137
// which will be rejected by the verifier.
138-
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
139-
EVT NewVT) const override {
138+
bool
139+
shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
140+
std::optional<unsigned> ByteOffset) const override {
140141
return false;
141142
}
142143

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3821,19 +3821,21 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
38213821
return TargetLowering::findRepresentativeClass(TRI, VT);
38223822
}
38233823

3824-
bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
3825-
ISD::LoadExtType ExtTy, EVT NewVT) const {
3824+
bool HexagonTargetLowering::shouldReduceLoadWidth(
3825+
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
3826+
std::optional<unsigned> ByteOffset) const {
38263827
// TODO: This may be worth removing. Check regression tests for diffs.
3827-
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
3828+
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
3829+
ByteOffset))
38283830
return false;
38293831

38303832
auto *L = cast<LoadSDNode>(Load);
3831-
std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
3833+
std::pair<SDValue, int> BO = getBaseAndOffset(L->getBasePtr());
38323834
// Small-data object, do not shrink.
38333835
if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
38343836
return false;
38353837
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
3836-
auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
3838+
auto &HTM = static_cast<const HexagonTargetMachine &>(getTargetMachine());
38373839
const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
38383840
return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
38393841
}

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,8 @@ class HexagonTargetLowering : public TargetLowering {
342342
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
343343
const override;
344344

345-
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
346-
EVT NewVT) const override;
345+
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
346+
std::optional<unsigned> ByteOffset) const override;
347347

348348
void AdjustInstrPostInstrSelection(MachineInstr &MI,
349349
SDNode *Node) const override;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3258,9 +3258,9 @@ bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
32583258
return false;
32593259
}
32603260

3261-
bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
3262-
ISD::LoadExtType ExtTy,
3263-
EVT NewVT) const {
3261+
bool X86TargetLowering::shouldReduceLoadWidth(
3262+
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
3263+
std::optional<unsigned> ByteOffset) const {
32643264
assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");
32653265

32663266
// "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,8 +1501,9 @@ namespace llvm {
15011501

15021502
/// Return true if we believe it is correct and profitable to reduce the
15031503
/// load node to a smaller type.
1504-
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1505-
EVT NewVT) const override;
1504+
bool
1505+
shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1506+
std::optional<unsigned> ByteOffset) const override;
15061507

15071508
/// Return true if the specified scalar FP type is computed in an SSE
15081509
/// register, not on the X87 floating point stack.

0 commit comments

Comments
 (0)