Skip to content

Commit 708484d

Browse files
jrbyrnesbcahoon
authored andcommitted
[AMDGPU] Add off-by-default flag to control LiveRegOpt
Change-Id: Id939bf74b48b47e5ee2b432956e476fac80e3375
1 parent 28634b9 commit 708484d

File tree

11 files changed

+2342
-283
lines changed

11 files changed

+2342
-283
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,8 @@ unsigned GCNTTIImpl::getNumberOfParts(Type *Tp) {
321321
// queries (e.g. get*InstrCost) to decide the proper handling
322322
// of 8 bit vectors.
323323
if (FixedVectorType *VTy = dyn_cast<FixedVectorType>(Tp)) {
324-
if (DL.getTypeSizeInBits(VTy->getElementType()) == 8) {
324+
if (ST->shouldCoerceIllegalTypes() &&
325+
DL.getTypeSizeInBits(VTy->getElementType()) == 8) {
325326
unsigned ElCount = VTy->getElementCount().getFixedValue();
326327
return PowerOf2Ceil(ElCount / 4);
327328
}
@@ -362,10 +363,10 @@ unsigned GCNTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
362363
if (Opcode == Instruction::Load || Opcode == Instruction::Store)
363364
return 32 * 4 / ElemWidth;
364365

365-
return (ElemWidth == 8) ? 4
366-
: (ElemWidth == 16) ? 2
367-
: (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
368-
: 1;
366+
return (ST->shouldCoerceIllegalTypes() && ElemWidth == 8) ? 4
367+
: (ElemWidth == 16) ? 2
368+
: (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
369+
: 1;
369370
}
370371

371372
unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -1154,7 +1155,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11541155

11551156
unsigned ScalarSize = DL.getTypeSizeInBits(VT->getElementType());
11561157
if (ST->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
1157-
(ScalarSize == 16 || ScalarSize == 8)) {
1158+
(ScalarSize == 16 ||
1159+
(ScalarSize == 8 && ST->shouldCoerceIllegalTypes()))) {
11581160
// Larger vector widths may require additional instructions, but are
11591161
// typically cheaper than scalarized versions.
11601162
unsigned NumVectorElts = cast<FixedVectorType>(VT)->getNumElements();

llvm/lib/Target/AMDGPU/GCNSubtarget.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ static cl::opt<unsigned>
6060
cl::desc("Number of addresses from which to enable MIMG NSA."),
6161
cl::init(3), cl::Hidden);
6262

63+
static cl::opt<bool>
64+
CoerceIllegal("amdgpu-coerce-illegal-types",
65+
cl::desc("Whether or not to coerce illegal types"),
66+
cl::ReallyHidden, cl::init(false));
67+
6368
GCNSubtarget::~GCNSubtarget() = default;
6469

6570
GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
@@ -198,6 +203,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
198203
RegBankInfo = std::make_unique<AMDGPURegisterBankInfo>(*this);
199204
InstSelector =
200205
std::make_unique<AMDGPUInstructionSelector>(*this, *RegBankInfo, TM);
206+
207+
ShouldCoerceIllegalTypes = CoerceIllegal;
201208
}
202209

203210
unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
259259
bool FeatureDisable = false;
260260

261261
SelectionDAGTargetInfo TSInfo;
262+
bool ShouldCoerceIllegalTypes = false;
263+
262264
private:
263265
SIInstrInfo InstrInfo;
264266
SITargetLowering TLInfo;
@@ -1438,6 +1440,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
14381440
// of sign-extending.
14391441
bool hasGetPCZeroExtension() const { return GFX12Insts; }
14401442

1443+
/// \returns whether or not we should coerce illegal types into vectors of
1444+
// legal types for values that span basic blocks.
1445+
bool shouldCoerceIllegalTypes() const { return ShouldCoerceIllegalTypes; }
1446+
14411447
/// \returns SGPR allocation granularity supported by the subtarget.
14421448
unsigned getSGPRAllocGranule() const {
14431449
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);

llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll

Lines changed: 208 additions & 208 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)