Skip to content

Commit e8644e3

Browse files
authored
[AMDGPU][True16][MC] VOP2 update instructions with fake16 format (#114436)
Some old "t16" VOP2 instructions are actually in fake16 format. Correct and update test file
1 parent 0428f2c commit e8644e3

File tree

10 files changed

+133
-91
lines changed

10 files changed

+133
-91
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ static unsigned macToMad(unsigned Opc) {
176176
return AMDGPU::V_FMA_F32_e64;
177177
case AMDGPU::V_FMAC_F16_e64:
178178
return AMDGPU::V_FMA_F16_gfx9_e64;
179-
case AMDGPU::V_FMAC_F16_t16_e64:
179+
case AMDGPU::V_FMAC_F16_fake16_e64:
180180
return AMDGPU::V_FMA_F16_gfx9_e64;
181181
case AMDGPU::V_FMAC_LEGACY_F32_e64:
182182
return AMDGPU::V_FMA_LEGACY_F32_e64;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3479,7 +3479,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34793479
Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
34803480
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
34813481
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3482-
Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3482+
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
34833483
// Don't fold if we are using source or output modifiers. The new VOP2
34843484
// instructions don't have them.
34853485
if (hasAnyModifiersSet(UseMI))
@@ -3499,7 +3499,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34993499
bool IsFMA =
35003500
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35013501
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3502-
Opc == AMDGPU::V_FMAC_F16_t16_e64;
3502+
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
35033503
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
35043504
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
35053505

@@ -3532,16 +3532,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35323532

35333533
unsigned NewOpc =
35343534
IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3535-
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
3535+
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
35363536
: AMDGPU::V_FMAMK_F16)
35373537
: (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
35383538
if (pseudoToMCOpcode(NewOpc) == -1)
35393539
return false;
35403540

3541-
// V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
3541+
// V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
35423542
// would also require restricting their register classes. For now
35433543
// just bail out.
3544-
if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3544+
if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
35453545
return false;
35463546

35473547
const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
@@ -3556,8 +3556,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35563556
Src0->setIsKill(RegSrc->isKill());
35573557

35583558
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3559-
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3560-
Opc == AMDGPU::V_FMAC_F16_e64)
3559+
Opc == AMDGPU::V_FMAC_F32_e64 ||
3560+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
35613561
UseMI.untieRegOperand(
35623562
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
35633563

@@ -3611,24 +3611,24 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36113611

36123612
unsigned NewOpc =
36133613
IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3614-
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
3614+
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
36153615
: AMDGPU::V_FMAAK_F16)
36163616
: (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
36173617
if (pseudoToMCOpcode(NewOpc) == -1)
36183618
return false;
36193619

3620-
// V_FMAAK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
3620+
// V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
36213621
// would also require restricting their register classes. For now
36223622
// just bail out.
3623-
if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3623+
if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
36243624
return false;
36253625

36263626
// FIXME: This would be a lot easier if we could return a new instruction
36273627
// instead of having to modify in place.
36283628

36293629
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3630-
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3631-
Opc == AMDGPU::V_FMAC_F16_e64)
3630+
Opc == AMDGPU::V_FMAC_F32_e64 ||
3631+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
36323632
UseMI.untieRegOperand(
36333633
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
36343634

@@ -3851,19 +3851,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38513851
return MIB;
38523852
}
38533853

3854-
assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3855-
"V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3856-
"pre-RA");
3854+
assert(
3855+
Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3856+
"V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3857+
"pre-RA");
38573858

38583859
// Handle MAC/FMAC.
38593860
bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
38603861
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3861-
Opc == AMDGPU::V_FMAC_F16_t16_e64;
3862+
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
38623863
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
38633864
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
38643865
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
38653866
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3866-
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3867+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
38673868
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
38683869
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
38693870
bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
@@ -3877,7 +3878,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38773878
return nullptr;
38783879
case AMDGPU::V_MAC_F16_e64:
38793880
case AMDGPU::V_FMAC_F16_e64:
3880-
case AMDGPU::V_FMAC_F16_t16_e64:
3881+
case AMDGPU::V_FMAC_F16_fake16_e64:
38813882
case AMDGPU::V_MAC_F32_e64:
38823883
case AMDGPU::V_MAC_LEGACY_F32_e64:
38833884
case AMDGPU::V_FMAC_F32_e64:
@@ -3962,7 +3963,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39623963
int64_t Imm;
39633964
if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
39643965
unsigned NewOpc =
3965-
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
3966+
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
39663967
: AMDGPU::V_FMAAK_F16)
39673968
: AMDGPU::V_FMAAK_F32)
39683969
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3981,7 +3982,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39813982
}
39823983
}
39833984
unsigned NewOpc =
3984-
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
3985+
IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
39853986
: AMDGPU::V_FMAMK_F16)
39863987
: AMDGPU::V_FMAMK_F32)
39873988
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
@@ -4436,7 +4437,7 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
44364437
case AMDGPU::V_MAC_F32_e64:
44374438
case AMDGPU::V_MAC_LEGACY_F32_e64:
44384439
case AMDGPU::V_FMAC_F16_e64:
4439-
case AMDGPU::V_FMAC_F16_t16_e64:
4440+
case AMDGPU::V_FMAC_F16_fake16_e64:
44404441
case AMDGPU::V_FMAC_F32_e64:
44414442
case AMDGPU::V_FMAC_F64_e64:
44424443
case AMDGPU::V_FMAC_LEGACY_F32_e64:
@@ -5483,7 +5484,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54835484
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
54845485
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
54855486
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5486-
case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_t16_e64;
5487+
case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
54875488
case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
54885489
case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
54895490
case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,6 +1755,21 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
17551755
1 : VSrc_b32);
17561756
}
17571757

1758+
// Returns the vreg register class to use for sources of VOP3 instructions for the
1759+
// given VT.
1760+
class getVOP3VRegSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
1761+
RegisterOperand ret =
1762+
!cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
1763+
!eq(VT.Size, 96) : RegisterOperand<VReg_96>,
1764+
!eq(VT.Size, 64) : RegisterOperand<VReg_64>,
1765+
!eq(VT.Size, 48) : RegisterOperand<VReg_64>,
1766+
!eq(VT.Size, 16) : !if(IsTrue16,
1767+
!if(IsFake16, RegisterOperand<VGPR_32>,
1768+
RegisterOperand<VGPR_16>),
1769+
RegisterOperand<VGPR_32>),
1770+
1 : RegisterOperand<VGPR_32>);
1771+
}
1772+
17581773
// Src2 of VOP3 DPP instructions cannot be a literal
17591774
class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
17601775
RegisterOperand ret =

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3200,20 +3200,20 @@ def : GCNPat <
32003200
let SubtargetPredicate = isGFX10Plus in {
32013201
// Don't allow source modifiers. If there are any source modifiers then it's
32023202
// better to select fma instead of fmac.
3203-
let OtherPredicates = [NotHasTrue16BitInsts] in
3203+
let True16Predicate = NotHasTrue16BitInsts in
32043204
def : GCNPat <
32053205
(fma (f16 (VOP3NoMods f32:$src0)),
32063206
(f16 (VOP3NoMods f32:$src1)),
32073207
(f16 (VOP3NoMods f32:$src2))),
32083208
(V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
32093209
SRCMODS.NONE, $src2)
32103210
>;
3211-
let OtherPredicates = [HasTrue16BitInsts] in
3211+
let True16Predicate = UseFakeTrue16Insts in
32123212
def : GCNPat <
3213-
(fma (f16 (VOP3NoMods f32:$src0)),
3214-
(f16 (VOP3NoMods f32:$src1)),
3215-
(f16 (VOP3NoMods f32:$src2))),
3216-
(V_FMAC_F16_t16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
3213+
(fma (f16 (VOP3NoMods f16:$src0)),
3214+
(f16 (VOP3NoMods f16:$src1)),
3215+
(f16 (VOP3NoMods f16:$src2))),
3216+
(V_FMAC_F16_fake16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
32173217
SRCMODS.NONE, $src2)
32183218
>;
32193219
}

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
455455
break;
456456
case AMDGPU::V_FMA_F16_e64:
457457
case AMDGPU::V_FMA_F16_gfx9_e64:
458-
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
458+
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
459459
: AMDGPU::V_FMAAK_F16;
460460
break;
461461
}
@@ -484,7 +484,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
484484
break;
485485
case AMDGPU::V_FMA_F16_e64:
486486
case AMDGPU::V_FMA_F16_gfx9_e64:
487-
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
487+
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
488488
: AMDGPU::V_FMAMK_F16;
489489
break;
490490
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,8 +563,8 @@ bool isMAC(unsigned Opc) {
563563
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
564564
Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
565565
Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
566-
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
567-
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
566+
Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
567+
Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
568568
Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
569569
Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
570570
Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||

0 commit comments

Comments
 (0)