Skip to content

Commit 8b80277

Browse files
committed
[AMDGPU][True16][MC] VOP2 update instructions with fake16 format
1 parent 8951b51 commit 8b80277

File tree

10 files changed

+128
-87
lines changed

10 files changed

+128
-87
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ static unsigned macToMad(unsigned Opc) {
176176
return AMDGPU::V_FMA_F32_e64;
177177
case AMDGPU::V_FMAC_F16_e64:
178178
return AMDGPU::V_FMA_F16_gfx9_e64;
179-
case AMDGPU::V_FMAC_F16_t16_e64:
179+
case AMDGPU::V_FMAC_F16_fake16_e64:
180180
return AMDGPU::V_FMA_F16_gfx9_e64;
181181
case AMDGPU::V_FMAC_LEGACY_F32_e64:
182182
return AMDGPU::V_FMA_LEGACY_F32_e64;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3480,7 +3480,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34803480
Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
34813481
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
34823482
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3483-
Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3483+
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
34843484
// Don't fold if we are using source or output modifiers. The new VOP2
34853485
// instructions don't have them.
34863486
if (hasAnyModifiersSet(UseMI))
@@ -3500,7 +3500,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35003500
bool IsFMA =
35013501
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35023502
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3503-
Opc == AMDGPU::V_FMAC_F16_t16_e64;
3503+
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
35043504
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
35053505
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
35063506

@@ -3533,16 +3533,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35333533

35343534
unsigned NewOpc =
35353535
IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3536-
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
3536+
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
35373537
: AMDGPU::V_FMAMK_F16)
35383538
: (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
35393539
if (pseudoToMCOpcode(NewOpc) == -1)
35403540
return false;
35413541

3542-
// V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
3542+
// V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
35433543
// would also require restricting their register classes. For now
35443544
// just bail out.
3545-
if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3545+
if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
35463546
return false;
35473547

35483548
const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
@@ -3557,8 +3557,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35573557
Src0->setIsKill(RegSrc->isKill());
35583558

35593559
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3560-
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3561-
Opc == AMDGPU::V_FMAC_F16_e64)
3560+
Opc == AMDGPU::V_FMAC_F32_e64 ||
3561+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
35623562
UseMI.untieRegOperand(
35633563
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
35643564

@@ -3612,24 +3612,24 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36123612

36133613
unsigned NewOpc =
36143614
IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3615-
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
3615+
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
36163616
: AMDGPU::V_FMAAK_F16)
36173617
: (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
36183618
if (pseudoToMCOpcode(NewOpc) == -1)
36193619
return false;
36203620

3621-
// V_FMAAK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
3621+
// V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
36223622
// would also require restricting their register classes. For now
36233623
// just bail out.
3624-
if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3624+
if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
36253625
return false;
36263626

36273627
// FIXME: This would be a lot easier if we could return a new instruction
36283628
// instead of having to modify in place.
36293629

36303630
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3631-
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3632-
Opc == AMDGPU::V_FMAC_F16_e64)
3631+
Opc == AMDGPU::V_FMAC_F32_e64 ||
3632+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
36333633
UseMI.untieRegOperand(
36343634
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
36353635

@@ -3852,19 +3852,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38523852
return MIB;
38533853
}
38543854

3855-
assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3856-
"V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3855+
assert(Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3856+
"V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
38573857
"pre-RA");
38583858

38593859
// Handle MAC/FMAC.
38603860
bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
38613861
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3862-
Opc == AMDGPU::V_FMAC_F16_t16_e64;
3862+
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
38633863
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
38643864
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
38653865
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
38663866
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3867-
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3867+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
38683868
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
38693869
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
38703870
bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
@@ -3878,7 +3878,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38783878
return nullptr;
38793879
case AMDGPU::V_MAC_F16_e64:
38803880
case AMDGPU::V_FMAC_F16_e64:
3881-
case AMDGPU::V_FMAC_F16_t16_e64:
3881+
case AMDGPU::V_FMAC_F16_fake16_e64:
38823882
case AMDGPU::V_MAC_F32_e64:
38833883
case AMDGPU::V_MAC_LEGACY_F32_e64:
38843884
case AMDGPU::V_FMAC_F32_e64:
@@ -3963,7 +3963,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39633963
int64_t Imm;
39643964
if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
39653965
unsigned NewOpc =
3966-
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
3966+
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
39673967
: AMDGPU::V_FMAAK_F16)
39683968
: AMDGPU::V_FMAAK_F32)
39693969
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3982,7 +3982,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39823982
}
39833983
}
39843984
unsigned NewOpc =
3985-
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
3985+
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
39863986
: AMDGPU::V_FMAMK_F16)
39873987
: AMDGPU::V_FMAMK_F32)
39883988
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
@@ -4437,7 +4437,7 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
44374437
case AMDGPU::V_MAC_F32_e64:
44384438
case AMDGPU::V_MAC_LEGACY_F32_e64:
44394439
case AMDGPU::V_FMAC_F16_e64:
4440-
case AMDGPU::V_FMAC_F16_t16_e64:
4440+
case AMDGPU::V_FMAC_F16_fake16_e64:
44414441
case AMDGPU::V_FMAC_F32_e64:
44424442
case AMDGPU::V_FMAC_F64_e64:
44434443
case AMDGPU::V_FMAC_LEGACY_F32_e64:
@@ -5484,7 +5484,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54845484
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
54855485
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
54865486
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5487-
case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_t16_e64;
5487+
case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
54885488
case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
54895489
case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
54905490
case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,6 +1755,21 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
17551755
1 : VSrc_b32);
17561756
}
17571757

1758+
// Returns the vreg register class to use for sources of VOP3 instructions for the
1759+
// given VT.
1760+
class getVOP3VRegSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
1761+
RegisterOperand ret =
1762+
!cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
1763+
!eq(VT.Size, 96) : RegisterOperand<VReg_96>,
1764+
!eq(VT.Size, 64) : RegisterOperand<VReg_64>,
1765+
!eq(VT.Size, 48) : RegisterOperand<VReg_64>,
1766+
!eq(VT.Size, 16) : !if(IsTrue16,
1767+
!if(IsFake16, RegisterOperand<VGPR_32>,
1768+
RegisterOperand<VGPR_16>),
1769+
RegisterOperand<VGPR_32>),
1770+
1 : RegisterOperand<VGPR_32>);
1771+
}
1772+
17581773
// Src2 of VOP3 DPP instructions cannot be a literal
17591774
class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
17601775
RegisterOperand ret =

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3200,20 +3200,20 @@ def : GCNPat <
32003200
let SubtargetPredicate = isGFX10Plus in {
32013201
// Don't allow source modifiers. If there are any source modifiers then it's
32023202
// better to select fma instead of fmac.
3203-
let OtherPredicates = [NotHasTrue16BitInsts] in
3203+
let True16Predicate = NotHasTrue16BitInsts in
32043204
def : GCNPat <
32053205
(fma (f16 (VOP3NoMods f32:$src0)),
32063206
(f16 (VOP3NoMods f32:$src1)),
32073207
(f16 (VOP3NoMods f32:$src2))),
32083208
(V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
32093209
SRCMODS.NONE, $src2)
32103210
>;
3211-
let OtherPredicates = [HasTrue16BitInsts] in
3211+
let True16Predicate = UseFakeTrue16Insts in
32123212
def : GCNPat <
32133213
(fma (f16 (VOP3NoMods f32:$src0)),
32143214
(f16 (VOP3NoMods f32:$src1)),
32153215
(f16 (VOP3NoMods f32:$src2))),
3216-
(V_FMAC_F16_t16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
3216+
(V_FMAC_F16_fake16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
32173217
SRCMODS.NONE, $src2)
32183218
>;
32193219
}

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
455455
break;
456456
case AMDGPU::V_FMA_F16_e64:
457457
case AMDGPU::V_FMA_F16_gfx9_e64:
458-
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
458+
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
459459
: AMDGPU::V_FMAAK_F16;
460460
break;
461461
}
@@ -484,7 +484,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
484484
break;
485485
case AMDGPU::V_FMA_F16_e64:
486486
case AMDGPU::V_FMA_F16_gfx9_e64:
487-
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
487+
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
488488
: AMDGPU::V_FMAMK_F16;
489489
break;
490490
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,8 +563,8 @@ bool isMAC(unsigned Opc) {
563563
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
564564
Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
565565
Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
566-
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
567-
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
566+
Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
567+
Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
568568
Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
569569
Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
570570
Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||

0 commit comments

Comments
 (0)