Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit da360ea

Browse files
author
Ryan Taylor
committed
[AMDGPU] Add support for a16 modifiear for gfx9
Summary: Adding support for a16 for gfx9. A16 bit replaces r128 bit for gfx9. Change-Id: Ie8b881e4e6d2f023fb5e0150420893513e5f4841 Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D50575 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@340831 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent b300798 commit da360ea

File tree

12 files changed

+691
-45
lines changed

12 files changed

+691
-45
lines changed

lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,12 @@ def FeatureDPP : SubtargetFeature<"dpp",
242242
"Support DPP (Data Parallel Primitives) extension"
243243
>;
244244

245+
def FeatureR128A16 : SubtargetFeature<"r128-a16",
246+
"HasR128A16",
247+
"true",
248+
"Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
249+
>;
250+
245251
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
246252
"HasIntClamp",
247253
"true",
@@ -444,7 +450,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
444450
FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
445451
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
446452
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
447-
FeatureAddNoCarryInsts, FeatureScalarAtomics
453+
FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
448454
]
449455
>;
450456

@@ -703,6 +709,9 @@ def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
703709
def HasDPP : Predicate<"Subtarget->hasDPP()">,
704710
AssemblerPredicate<"FeatureDPP">;
705711

712+
def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
713+
AssemblerPredicate<"FeatureR128A16">;
714+
706715
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
707716
AssemblerPredicate<"FeatureIntClamp">;
708717

lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
197197
HasSDWAMac(false),
198198
HasSDWAOutModsVOPC(false),
199199
HasDPP(false),
200+
HasR128A16(false),
200201
HasDLInsts(false),
201202
D16PreservesUnusedBits(false),
202203
FlatAddressSpace(false),

lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
358358
bool HasSDWAMac;
359359
bool HasSDWAOutModsVOPC;
360360
bool HasDPP;
361+
bool HasR128A16;
361362
bool HasDLInsts;
362363
bool D16PreservesUnusedBits;
363364
bool FlatAddressSpace;
@@ -791,6 +792,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
791792
return HasDPP;
792793
}
793794

795+
bool hasR128A16() const {
796+
return HasR128A16;
797+
}
798+
794799
bool enableSIScheduler() const {
795800
return EnableSIScheduler;
796801
}

lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
156156
ImmTyDMask,
157157
ImmTyUNorm,
158158
ImmTyDA,
159-
ImmTyR128,
159+
ImmTyR128A16,
160160
ImmTyLWE,
161161
ImmTyExpTgt,
162162
ImmTyExpCompr,
@@ -290,7 +290,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
290290
bool isDMask() const { return isImmTy(ImmTyDMask); }
291291
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
292292
bool isDA() const { return isImmTy(ImmTyDA); }
293-
bool isR128() const { return isImmTy(ImmTyR128); }
293+
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
294294
bool isLWE() const { return isImmTy(ImmTyLWE); }
295295
bool isOff() const { return isImmTy(ImmTyOff); }
296296
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -678,7 +678,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
678678
case ImmTyDMask: OS << "DMask"; break;
679679
case ImmTyUNorm: OS << "UNorm"; break;
680680
case ImmTyDA: OS << "DA"; break;
681-
case ImmTyR128: OS << "R128"; break;
681+
case ImmTyR128A16: OS << "R128A16"; break;
682682
case ImmTyLWE: OS << "LWE"; break;
683683
case ImmTyOff: OS << "Off"; break;
684684
case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -1090,7 +1090,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
10901090
bool validateMIMGAtomicDMask(const MCInst &Inst);
10911091
bool validateMIMGGatherDMask(const MCInst &Inst);
10921092
bool validateMIMGDataSize(const MCInst &Inst);
1093-
bool validateMIMGR128(const MCInst &Inst);
10941093
bool validateMIMGD16(const MCInst &Inst);
10951094
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
10961095
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -2445,22 +2444,6 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
24452444
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
24462445
}
24472446

2448-
bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
2449-
2450-
const unsigned Opc = Inst.getOpcode();
2451-
const MCInstrDesc &Desc = MII.get(Opc);
2452-
2453-
if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2454-
return true;
2455-
2456-
int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
2457-
assert(Idx != -1);
2458-
2459-
bool R128 = (Inst.getOperand(Idx).getImm() != 0);
2460-
2461-
return !R128 || hasMIMG_R128();
2462-
}
2463-
24642447
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
24652448

24662449
const unsigned Opc = Inst.getOpcode();
@@ -2495,11 +2478,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
24952478
"integer clamping is not supported on this GPU");
24962479
return false;
24972480
}
2498-
if (!validateMIMGR128(Inst)) {
2499-
Error(IDLoc,
2500-
"r128 modifier is not supported on this GPU");
2501-
return false;
2502-
}
25032481
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
25042482
if (!validateMIMGD16(Inst)) {
25052483
Error(IDLoc,
@@ -3463,6 +3441,10 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
34633441
case AsmToken::Identifier: {
34643442
StringRef Tok = Parser.getTok().getString();
34653443
if (Tok == Name) {
3444+
if (Tok == "r128" && isGFX9())
3445+
Error(S, "r128 modifier is not supported on this GPU");
3446+
if (Tok == "a16" && !isGFX9())
3447+
Error(S, "a16 modifier is not supported on this GPU");
34663448
Bit = 1;
34673449
Parser.Lex();
34683450
} else if (Tok.startswith("no") && Tok.endswith(Name)) {
@@ -4705,7 +4687,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
47054687
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
47064688
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
47074689
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4708-
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
4690+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
47094691
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
47104692
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
47114693
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
@@ -4815,7 +4797,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
48154797
{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
48164798
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
48174799
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
4818-
{"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
4800+
{"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
4801+
{"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
48194802
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
48204803
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
48214804
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},

lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,12 @@ void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
207207
printNamedBit(MI, OpNo, O, "da");
208208
}
209209

210-
void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
210+
void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
211211
const MCSubtargetInfo &STI, raw_ostream &O) {
212-
printNamedBit(MI, OpNo, O, "r128");
212+
if (STI.hasFeature(AMDGPU::FeatureR128A16))
213+
printNamedBit(MI, OpNo, O, "a16");
214+
else
215+
printNamedBit(MI, OpNo, O, "r128");
213216
}
214217

215218
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,

lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8080
raw_ostream &O);
8181
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
8282
raw_ostream &O);
83-
void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
83+
void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
8484
raw_ostream &O);
8585
void printLWE(const MCInst *MI, unsigned OpNo,
8686
const MCSubtargetInfo &STI, raw_ostream &O);

lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
141141

142142
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
143143
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
144-
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
144+
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
145145
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
146146
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
147147
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -199,7 +199,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
199199

200200
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
201201
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
202-
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
202+
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
203203
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
204204
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
205205
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -252,7 +252,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
252252

253253
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
254254
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
255-
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
255+
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
256256
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
257257
}
258258

@@ -316,7 +316,7 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
316316

317317
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
318318
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
319-
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
319+
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
320320
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
321321
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
322322
#!if(BaseOpcode.HasD16, "$d16", "");

lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4576,6 +4576,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
45764576
const AMDGPU::ImageDimIntrinsicInfo *Intr,
45774577
SelectionDAG &DAG) const {
45784578
SDLoc DL(Op);
4579+
MachineFunction &MF = DAG.getMachineFunction();
4580+
const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
45794581
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
45804582
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
45814583
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
@@ -4585,6 +4587,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
45854587

45864588
SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
45874589
bool IsD16 = false;
4590+
bool IsA16 = false;
45884591
SDValue VData;
45894592
int NumVDataDwords;
45904593
unsigned AddrIdx; // Index of first address argument
@@ -4660,23 +4663,59 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
46604663
}
46614664
}
46624665

4663-
unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
4664-
(BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
4665-
(BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
4666-
(BaseOpcode->LodOrClampOrMip ? 1 : 0);
4666+
unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
4667+
unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
4668+
unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
4669+
unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
4670+
NumCoords + NumLCM;
4671+
unsigned NumMIVAddrs = NumVAddrs;
4672+
46674673
SmallVector<SDValue, 4> VAddrs;
4668-
for (unsigned i = 0; i < NumVAddrs; ++i)
4669-
VAddrs.push_back(Op.getOperand(AddrIdx + i));
46704674

46714675
// Optimize _L to _LZ when _L is zero
46724676
if (LZMappingInfo) {
46734677
if (auto ConstantLod =
4674-
dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
4678+
dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
46754679
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
46764680
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
4677-
VAddrs.pop_back(); // remove 'lod'
4681+
NumMIVAddrs--; // remove 'lod'
4682+
}
4683+
}
4684+
}
4685+
4686+
// Check for 16 bit addresses and pack if true.
4687+
unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
4688+
MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
4689+
if (VAddrVT.getScalarType() == MVT::f16 &&
4690+
ST->hasFeature(AMDGPU::FeatureR128A16)) {
4691+
IsA16 = true;
4692+
for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
4693+
SDValue AddrLo, AddrHi;
4694+
// Push back extra arguments.
4695+
if (i < DimIdx) {
4696+
AddrLo = Op.getOperand(i);
4697+
} else {
4698+
AddrLo = Op.getOperand(i);
4699+
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
4700+
// in 1D, derivatives dx/dh and dx/dv are packed with undef.
4701+
if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
4702+
((NumGradients / 2) % 2 == 1 &&
4703+
(i == DimIdx + (NumGradients / 2) - 1 ||
4704+
i == DimIdx + NumGradients - 1))) {
4705+
AddrHi = DAG.getUNDEF(MVT::f16);
4706+
} else {
4707+
AddrHi = Op.getOperand(i + 1);
4708+
i++;
4709+
}
4710+
AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f16,
4711+
{AddrLo, AddrHi});
4712+
AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
46784713
}
4714+
VAddrs.push_back(AddrLo);
46794715
}
4716+
} else {
4717+
for (unsigned i = 0; i < NumMIVAddrs; ++i)
4718+
VAddrs.push_back(Op.getOperand(AddrIdx + i));
46804719
}
46814720

46824721
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
@@ -4725,7 +4764,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
47254764
Ops.push_back(Unorm);
47264765
Ops.push_back(GLC);
47274766
Ops.push_back(SLC);
4728-
Ops.push_back(False); // r128
4767+
Ops.push_back(IsA16 && // a16 or r128
4768+
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
47294769
Ops.push_back(False); // tfe
47304770
Ops.push_back(False); // lwe
47314771
Ops.push_back(DimInfo->DA ? True : False);

lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,7 @@ def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
754754
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
755755
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
756756
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
757-
def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
757+
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
758758
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
759759
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
760760
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;

0 commit comments

Comments
 (0)