Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 3afd566

Browse files
author
Marek Olsak
committed
AMDGPU: Add 32-bit constant address space
Note: This is a candidate for LLVM 6.0, because it was planned to be in that release but was delayed due to a long review period. Merge conflict in release_60 - resolution: Add "-p6:32:32" into the second (non-amdgiz) string. Only scalar loads support 32-bit pointers. An address in a VGPR will fail to compile. That's OK because the results of loads will only be used in places where VGPRs are forbidden. Updated AMDGPUAliasAnalysis and used SReg_64_XEXEC. The tests cover all uses cases we need for Mesa. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D41651 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324487 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ea06ecf commit 3afd566

14 files changed

+375
-19
lines changed

docs/AMDGPUUsage.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ LLVM Address Space number is used throughout LLVM (for example, in LLVM IR).
285285
3 Local (group/LDS) Local (group/LDS) Local (group/LDS) Local (group/LDS)
286286
4 Generic (Flat) Region (GDS) Region (GDS) Constant
287287
5 Region (GDS) Private (Scratch) Private (Scratch) Private (Scratch)
288+
6 Constant 32-bit Constant 32-bit Constant 32-bit Constant 32-bit
288289
================== ================= ================= ================= =================
289290

290291
Current Default

lib/Target/AMDGPU/AMDGPU.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,9 @@ struct AMDGPUAS {
224224
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
225225
CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
226226
LOCAL_ADDRESS = 3, ///< Address space for local memory.
227+
228+
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
229+
227230
/// Address space for direct addressible parameter memory (CONST0)
228231
PARAM_D_ADDRESS = 6,
229232
/// Address space for indirect addressible parameter memory (VTX1)

lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
115115
bool OrLocal) {
116116
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
117117

118-
if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) {
118+
if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS ||
119+
Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS_32BIT) {
119120
return true;
120121
}
121122

lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,8 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
466466
}
467467

468468
bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
469-
if (I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
469+
if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
470+
I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
470471
canWidenScalarExtLoad(I)) {
471472
IRBuilder<> Builder(&I);
472473
Builder.SetCurrentDebugLocation(I.getDebugLoc());

lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
162162

163163
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
164164
bool &Imm) const;
165+
SDValue Expand32BitAddress(SDValue Addr) const;
165166
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
166167
bool &Imm) const;
167168
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
@@ -636,7 +637,8 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
636637
if (!N->readMem())
637638
return false;
638639
if (CbId == -1)
639-
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
640+
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
641+
N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
640642

641643
return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
642644
}
@@ -1438,19 +1440,45 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
14381440
return true;
14391441
}
14401442

1443+
SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1444+
if (Addr.getValueType() != MVT::i32)
1445+
return Addr;
1446+
1447+
// Zero-extend a 32-bit address.
1448+
SDLoc SL(Addr);
1449+
1450+
const MachineFunction &MF = CurDAG->getMachineFunction();
1451+
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1452+
unsigned AddrHiVal = Info->get32BitAddressHighBits();
1453+
SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1454+
1455+
const SDValue Ops[] = {
1456+
CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1457+
Addr,
1458+
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1459+
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1460+
0),
1461+
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1462+
};
1463+
1464+
return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1465+
Ops), 0);
1466+
}
1467+
14411468
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
14421469
SDValue &Offset, bool &Imm) const {
14431470
SDLoc SL(Addr);
1471+
14441472
if (CurDAG->isBaseWithConstantOffset(Addr)) {
14451473
SDValue N0 = Addr.getOperand(0);
14461474
SDValue N1 = Addr.getOperand(1);
14471475

14481476
if (SelectSMRDOffset(N1, Offset, Imm)) {
1449-
SBase = N0;
1477+
SBase = Expand32BitAddress(N0);
14501478
return true;
14511479
}
14521480
}
1453-
SBase = Addr;
1481+
SBase = Expand32BitAddress(Addr);
14541482
Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
14551483
Imm = true;
14561484
return true;

lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,9 @@ static bool isInstrUniform(const MachineInstr &MI) {
229229
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
230230
return true;
231231

232+
if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
233+
return true;
234+
232235
const Instruction *I = dyn_cast<Instruction>(Ptr);
233236
return I && I->getMetadata("amdgpu.uniform");
234237
}
@@ -293,7 +296,8 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
293296
if (!I.hasOneMemOperand())
294297
return false;
295298

296-
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS)
299+
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
300+
(*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT)
297301
return false;
298302

299303
if (!isInstrUniform(I))

lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ static StringRef computeDataLayout(const Triple &TT) {
266266

267267
// 32-bit private, local, and region pointers. 64-bit global, constant and
268268
// flat.
269-
return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
269+
return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32"
270270
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
271271
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
272272
}

lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
237237
AMDGPUAS AS = ST->getAMDGPUAS();
238238
if (AddrSpace == AS.GLOBAL_ADDRESS ||
239239
AddrSpace == AS.CONSTANT_ADDRESS ||
240+
AddrSpace == AS.CONSTANT_ADDRESS_32BIT ||
240241
AddrSpace == AS.FLAT_ADDRESS)
241242
return 128;
242243
if (AddrSpace == AS.LOCAL_ADDRESS ||

lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
900900
if (AS == AMDGPUASI.GLOBAL_ADDRESS)
901901
return isLegalGlobalAddressingMode(AM);
902902

903-
if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
903+
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
904+
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
904905
// If the offset isn't a multiple of 4, it probably isn't going to be
905906
// correctly aligned.
906907
// FIXME: Can we get the real alignment here?
@@ -1023,7 +1024,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
10231024
// If we have an uniform constant load, it still requires using a slow
10241025
// buffer instruction if unaligned.
10251026
if (IsFast) {
1026-
*IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ?
1027+
*IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS ||
1028+
AddrSpace == AMDGPUASI.CONSTANT_ADDRESS_32BIT) ?
10271029
(Align % 4 == 0) : true;
10281030
}
10291031

@@ -1066,7 +1068,8 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
10661068
static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
10671069
return AS == AMDGPUASI.GLOBAL_ADDRESS ||
10681070
AS == AMDGPUASI.FLAT_ADDRESS ||
1069-
AS == AMDGPUASI.CONSTANT_ADDRESS;
1071+
AS == AMDGPUASI.CONSTANT_ADDRESS ||
1072+
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
10701073
}
10711074

10721075
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
@@ -4008,13 +4011,15 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
40084011

40094012
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
40104013
const Triple &TT = getTargetMachine().getTargetTriple();
4011-
return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
4014+
return (GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
4015+
GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
40124016
AMDGPU::shouldEmitConstantsToTextSection(TT);
40134017
}
40144018

40154019
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
40164020
return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
4017-
GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
4021+
GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
4022+
GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
40184023
!shouldEmitFixup(GV) &&
40194024
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
40204025
}
@@ -4391,7 +4396,8 @@ bool
43914396
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
43924397
// We can fold offsets for anything that doesn't require a GOT relocation.
43934398
return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
4394-
GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
4399+
GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
4400+
GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
43954401
!shouldEmitGOTReloc(GA->getGlobal());
43964402
}
43974403

@@ -4444,6 +4450,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
44444450
const GlobalValue *GV = GSD->getGlobal();
44454451

44464452
if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
4453+
GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
44474454
GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS &&
44484455
// FIXME: It isn't correct to rely on the type of the pointer. This should
44494456
// be removed when address space 0 is 64-bit.
@@ -5378,15 +5385,18 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
53785385
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
53795386

53805387
unsigned NumElements = MemVT.getVectorNumElements();
5381-
if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
5388+
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
5389+
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
53825390
if (isMemOpUniform(Load))
53835391
return SDValue();
53845392
// Non-uniform loads will be selected to MUBUF instructions, so they
53855393
// have the same legalization requirements as global and private
53865394
// loads.
53875395
//
53885396
}
5389-
if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
5397+
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
5398+
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
5399+
AS == AMDGPUASI.GLOBAL_ADDRESS) {
53905400
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
53915401
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
53925402
return SDValue();
@@ -5395,7 +5405,9 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
53955405
// loads.
53965406
//
53975407
}
5398-
if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS ||
5408+
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
5409+
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
5410+
AS == AMDGPUASI.GLOBAL_ADDRESS ||
53995411
AS == AMDGPUASI.FLAT_ADDRESS) {
54005412
if (NumElements > 4)
54015413
return SplitVectorLoad(Op, DAG);

lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
4747
WorkItemIDZ(false),
4848
ImplicitBufferPtr(false),
4949
ImplicitArgPtr(false),
50-
GITPtrHigh(0xffffffff) {
50+
GITPtrHigh(0xffffffff),
51+
HighBitsOf32BitAddress(0) {
5152
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
5253
const Function &F = MF.getFunction();
5354
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
@@ -164,6 +165,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
164165
StringRef S = A.getValueAsString();
165166
if (!S.empty())
166167
S.consumeInteger(0, GITPtrHigh);
168+
169+
A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
170+
S = A.getValueAsString();
171+
if (!S.empty())
172+
S.consumeInteger(0, HighBitsOf32BitAddress);
167173
}
168174

169175
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(

lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
186186
// current hardware only allows a 16 bit value.
187187
unsigned GITPtrHigh;
188188

189+
unsigned HighBitsOf32BitAddress;
190+
189191
MCPhysReg getNextUserSGPR() const {
190192
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
191193
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -411,6 +413,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
411413
return GITPtrHigh;
412414
}
413415

416+
unsigned get32BitAddressHighBits() const {
417+
return HighBitsOf32BitAddress;
418+
}
419+
414420
unsigned getNumUserSGPRs() const {
415421
return NumUserSGPRs;
416422
}

lib/Target/AMDGPU/SMInstructions.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>
223223
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
224224
auto Ld = cast<LoadSDNode>(N);
225225
return Ld->getAlignment() >= 4 &&
226-
((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
226+
(((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
227+
Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
227228
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
228229
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
229230
!Ld->isVolatile() &&

lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,8 @@ bool isGlobalSegment(const GlobalValue *GV) {
447447
}
448448

449449
bool isReadOnlySegment(const GlobalValue *GV) {
450-
return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
450+
return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
451+
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
451452
}
452453

453454
bool shouldEmitConstantsToTextSection(const Triple &TT) {
@@ -916,6 +917,9 @@ bool isUniformMMO(const MachineMemOperand *MMO) {
916917
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
917918
return true;
918919

920+
if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
921+
return true;
922+
919923
if (const Argument *Arg = dyn_cast<Argument>(Ptr))
920924
return isArgPassedInSGPR(Arg);
921925

0 commit comments

Comments
 (0)