Skip to content

Commit 0c24767

Browse files
committed
[AMDGPU] Add off-by-default flag to control LiveRegOpt
Change-Id: Id939bf74b48b47e5ee2b432956e476fac80e3375
1 parent 8bb62e2 commit 0c24767

File tree

11 files changed

+2441
-290
lines changed

11 files changed

+2441
-290
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,8 @@ unsigned GCNTTIImpl::getNumberOfParts(Type *Tp) {
321321
// queries (e.g. get*InstrCost) to decide the proper handling
322322
// of 8 bit vectors.
323323
if (FixedVectorType *VTy = dyn_cast<FixedVectorType>(Tp)) {
324-
if (DL.getTypeSizeInBits(VTy->getElementType()) == 8) {
324+
if (ST->shouldCoerceIllegalTypes() &&
325+
DL.getTypeSizeInBits(VTy->getElementType()) == 8) {
325326
unsigned ElCount = VTy->getElementCount().getFixedValue();
326327
return PowerOf2Ceil(ElCount / 4);
327328
}
@@ -362,10 +363,10 @@ unsigned GCNTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
362363
if (Opcode == Instruction::Load || Opcode == Instruction::Store)
363364
return 32 * 4 / ElemWidth;
364365

365-
return (ElemWidth == 8) ? 4
366-
: (ElemWidth == 16) ? 2
367-
: (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
368-
: 1;
366+
return (ST->shouldCoerceIllegalTypes() && ElemWidth == 8) ? 4
367+
: (ElemWidth == 16) ? 2
368+
: (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
369+
: 1;
369370
}
370371

371372
unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -1175,7 +1176,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11751176

11761177
unsigned ScalarSize = DL.getTypeSizeInBits(VT->getElementType());
11771178
if (ST->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
1178-
(ScalarSize == 16 || ScalarSize == 8)) {
1179+
(ScalarSize == 16 ||
1180+
(ScalarSize == 8 && ST->shouldCoerceIllegalTypes()))) {
11791181
// Larger vector widths may require additional instructions, but are
11801182
// typically cheaper than scalarized versions.
11811183
unsigned NumVectorElts = cast<FixedVectorType>(VT)->getNumElements();

llvm/lib/Target/AMDGPU/GCNSubtarget.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ static cl::opt<unsigned>
5252
cl::desc("Number of addresses from which to enable MIMG NSA."),
5353
cl::init(2), cl::Hidden);
5454

55+
static cl::opt<bool>
56+
CoerceIllegal("amdgpu-coerce-illegal-types",
57+
cl::desc("Whether or not to coerce illegal types"),
58+
cl::ReallyHidden, cl::init(false));
59+
5560
GCNSubtarget::~GCNSubtarget() = default;
5661

5762
GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
@@ -191,6 +196,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
191196
RegBankInfo = std::make_unique<AMDGPURegisterBankInfo>(*this);
192197
InstSelector =
193198
std::make_unique<AMDGPUInstructionSelector>(*this, *RegBankInfo, TM);
199+
200+
ShouldCoerceIllegalTypes = CoerceIllegal;
194201
}
195202

196203
const SelectionDAGTargetInfo *GCNSubtarget::getSelectionDAGInfo() const {

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
259259
// Dummy feature to use for assembler in tablegen.
260260
bool FeatureDisable = false;
261261

262+
bool ShouldCoerceIllegalTypes = false;
263+
262264
private:
263265
SIInstrInfo InstrInfo;
264266
SITargetLowering TLInfo;
@@ -1445,6 +1447,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
14451447
// of sign-extending.
14461448
bool hasGetPCZeroExtension() const { return GFX12Insts; }
14471449

1450+
/// \returns whether or not we should coerce illegal types into vectors of
1451+
// legal types for values that span basic blocks.
1452+
bool shouldCoerceIllegalTypes() const { return ShouldCoerceIllegalTypes; }
1453+
14481454
/// \returns SGPR allocation granularity supported by the subtarget.
14491455
unsigned getSGPRAllocGranule() const {
14501456
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);

llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll

Lines changed: 208 additions & 208 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)