Skip to content

Commit d21182d

Browse files
committed
AMDGPU/GlobalISel: Only map VOP operands to VGPRs
This trivially avoids violating the constant bus restriction. Previously this was allowing one SGPR in the first source operand, which technically also avoided violating this for most operations (but not for special cases reading vcc). We do need to write some new, smarter operand folds to pick the optimal SGPR to use in some kind of post-isel fold, but that's purely an optimization. I was originally thinking we would pick which operands should be SGPRs in RegBankSelect, but I think this isn't really manageable. There would be additional complexity to handle every G_* instruction, and then any nontrivial instruction patterns would need to know when to avoid violating it, which is likely to be very error prone. I think having all inputs being canonically copies to VGPRs will simplify the operand folding logic. The current folding we do is backwards, and only considers one operand at a time, relative to operands it already has. It therefore poorly handles the case where there is already a constant bus operand user. If all operands are copies, it's somewhat simpler to consider all input operands at once to choose the optimal constant bus user. Since the failure mode for constant bus violations is now a verifier error and not an selection failure, this moves towards a place where we can turn on the fallback mode. The SGPR copy folding optimizations can be left for later.
1 parent dc141af commit d21182d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+847
-402
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 16 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -533,24 +533,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
533533
AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
534534
3); // Num Operands
535535
AltMappings.push_back(&VVMapping);
536-
537-
const InstructionMapping &SVMapping = getInstructionMapping(
538-
3, 3, getOperandsMapping(
539-
{AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
540-
AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
541-
AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
542-
3); // Num Operands
543-
AltMappings.push_back(&SVMapping);
544-
545-
// SGPR in LHS is slightly preferrable, so make it VS more expensive than
546-
// SV.
547-
const InstructionMapping &VSMapping = getInstructionMapping(
548-
3, 4, getOperandsMapping(
549-
{AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
550-
AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
551-
AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
552-
3); // Num Operands
553-
AltMappings.push_back(&VSMapping);
554536
break;
555537
}
556538
case TargetOpcode::G_LOAD:
@@ -600,22 +582,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
600582
4); // Num Operands
601583
AltMappings.push_back(&SSMapping);
602584

603-
const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
604-
getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
605-
nullptr, // Predicate operand.
606-
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
607-
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
608-
4); // Num Operands
609-
AltMappings.push_back(&SVMapping);
610-
611-
const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
612-
getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
613-
nullptr, // Predicate operand.
614-
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
615-
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
616-
4); // Num Operands
617-
AltMappings.push_back(&VSMapping);
618-
619585
const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
620586
getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
621587
nullptr, // Predicate operand.
@@ -650,10 +616,8 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
650616
case TargetOpcode::G_SMAX:
651617
case TargetOpcode::G_UMIN:
652618
case TargetOpcode::G_UMAX: {
653-
static const OpRegBankEntry<3> Table[4] = {
619+
static const OpRegBankEntry<3> Table[2] = {
654620
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
655-
{ { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
656-
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
657621

658622
// Scalar requires cmp+select, and extends if 16-bit.
659623
// FIXME: Should there be separate costs for 32 and 16-bit
@@ -2440,31 +2404,19 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
24402404
const MachineFunction &MF = *MI.getParent()->getParent();
24412405
const MachineRegisterInfo &MRI = MF.getRegInfo();
24422406
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
2443-
unsigned OpdIdx = 0;
2444-
2445-
unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
2446-
OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
2447-
2448-
if (MI.getOperand(OpdIdx).isIntrinsicID())
2449-
OpdsMapping[OpdIdx++] = nullptr;
24502407

2451-
Register Reg1 = MI.getOperand(OpdIdx).getReg();
2452-
unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
2453-
2454-
unsigned DefaultBankID = Size1 == 1 ?
2455-
AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
2456-
unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
2457-
2458-
OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
2459-
2460-
for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
2461-
const MachineOperand &MO = MI.getOperand(OpdIdx);
2462-
if (!MO.isReg())
2408+
// Even though we technically could use SGPRs, this would require knowledge of
2409+
// the constant bus restriction. Force all sources to VGPR (except for VCC).
2410+
//
2411+
// TODO: Unary ops are trivially OK, so accept SGPRs?
2412+
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2413+
const MachineOperand &Src = MI.getOperand(i);
2414+
if (!Src.isReg())
24632415
continue;
24642416

2465-
unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
2417+
unsigned Size = getSizeInBits(Src.getReg(), MRI, *TRI);
24662418
unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
2467-
OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
2419+
OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
24682420
}
24692421

24702422
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
@@ -3298,11 +3250,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
32983250
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
32993251

33003252
unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
3301-
OpdsMapping[3] = AMDGPU::getValueMapping(
3302-
getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize);
3303-
OpdsMapping[4] = AMDGPU::getValueMapping(
3304-
getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize);
3305-
3253+
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
3254+
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
33063255
break;
33073256
}
33083257
case Intrinsic::amdgcn_class: {
@@ -3312,10 +3261,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
33123261
unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
33133262
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
33143263
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
3315-
OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI),
3316-
Src0Size);
3317-
OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI),
3318-
Src1Size);
3264+
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
3265+
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
33193266
break;
33203267
}
33213268
case Intrinsic::amdgcn_icmp:
@@ -3324,10 +3271,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
33243271
// This is not VCCRegBank because this is not used in boolean contexts.
33253272
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
33263273
unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
3327-
unsigned Op1Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
3328-
unsigned Op2Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
3329-
OpdsMapping[2] = AMDGPU::getValueMapping(Op1Bank, OpSize);
3330-
OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize);
3274+
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
3275+
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
33313276
break;
33323277
}
33333278
case Intrinsic::amdgcn_readlane: {

0 commit comments

Comments
 (0)