Skip to content

Commit c6a7c4d

Browse files
authored
[AMDGPU] Add 256-bit vdst and 96-bit src to profile switches. NFC. (#81801)
I need these operands for a future patch. Also simplify conditions there. If nothing using !cond instead of nesting !if's does not need to realign code every time a new type is added.
1 parent ba27993 commit c6a7c4d

File tree

2 files changed

+28
-39
lines changed

2 files changed

+28
-39
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,11 +1468,12 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
14681468
defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
14691469
VOPDstOperand_t16Lo128),
14701470
VOPDstOperand<VGPR_32>);
1471-
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1472-
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1473-
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1474-
!if(!eq(VT.Size, 16), op16,
1475-
VOPDstS64orS32)))); // else VT == i1
1471+
RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
1472+
!eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
1473+
!eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
1474+
!eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
1475+
!eq(VT.Size, 16) : op16,
1476+
1 : VOPDstS64orS32); // else VT == i1
14761477
}
14771478

14781479
class getVALUDstForVT_fake16<ValueType VT> {
@@ -1556,40 +1557,23 @@ class getSDWASrcForVT <ValueType VT> {
15561557
// given VT.
15571558
class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
15581559
RegisterOperand ret =
1559-
!if(!eq(VT.Size, 128),
1560-
VRegSrc_128,
1561-
!if(!eq(VT.Size, 64),
1562-
!if(VT.isFP,
1563-
!if(!eq(VT.Value, v2f32.Value),
1564-
VSrc_v2f32,
1565-
VSrc_f64),
1566-
!if(!eq(VT.Value, v2i32.Value),
1567-
VSrc_v2b32,
1568-
VSrc_b64)),
1569-
!if(!eq(VT.Value, i1.Value),
1570-
SSrc_i1,
1571-
!if(VT.isFP,
1572-
!if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1573-
!if(IsTrue16, VSrcT_f16, VSrc_f16),
1574-
!if(!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)),
1575-
VSrc_v2f16,
1576-
!if(!or(!eq(VT.Value, v4f16.Value), !eq(VT.Value, v4bf16.Value)),
1577-
AVSrc_64,
1578-
VSrc_f32
1579-
)
1580-
)
1581-
),
1582-
!if(!eq(VT.Value, i16.Value),
1583-
!if(IsTrue16, VSrcT_b16, VSrc_b16),
1584-
!if(!eq(VT.Value, v2i16.Value),
1585-
VSrc_v2b16,
1586-
VSrc_b32
1587-
)
1588-
)
1589-
)
1590-
)
1591-
)
1592-
);
1560+
!cond(!eq(VT, f64) : VSrc_f64,
1561+
!eq(VT, f32) : VSrc_f32,
1562+
!eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16),
1563+
!eq(VT, bf16) : !if(IsTrue16, VSrcT_f16, VSrc_f16),
1564+
!eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16),
1565+
!eq(VT, i1) : SSrc_i1,
1566+
!eq(VT, v2f32) : VSrc_v2f32,
1567+
!eq(VT, v2i32) : VSrc_v2b32,
1568+
!eq(VT, v2f16) : VSrc_v2f16,
1569+
!eq(VT, v2bf16) : VSrc_v2f16,
1570+
!eq(VT, v2i16) : VSrc_v2b16,
1571+
!eq(VT, v4f16) : AVSrc_64,
1572+
!eq(VT, v4bf16) : AVSrc_64,
1573+
!eq(VT.Size, 128) : VRegSrc_128,
1574+
!eq(VT.Size, 96) : VRegSrc_96,
1575+
!eq(VT.Size, 64) : VSrc_b64,
1576+
1 : VSrc_b32);
15931577
}
15941578

15951579
// Src2 of VOP3 DPP instructions cannot be a literal

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,7 @@ class SrcReg9<RegisterClass regClass, string width> : RegisterOperand<regClass>
12141214

12151215
def VRegSrc_32 : SrcReg9<VGPR_32, "OPW32">;
12161216
def VRegSrc_64 : SrcReg9<VReg_64, "OPW64">;
1217+
def VRegSrc_96 : SrcReg9<VReg_96, "OPW96">;
12171218
def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
12181219
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
12191220
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;
@@ -1230,6 +1231,10 @@ def VGPRSrc_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
12301231
let DecoderMethod = "DecodeVGPR_32RegisterClass";
12311232
}
12321233

1234+
def VGPRSrc_96 : RegisterOperand<VReg_96> {
1235+
let DecoderMethod = "DecodeVReg_96RegisterClass";
1236+
}
1237+
12331238
def VGPRSrc_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
12341239
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
12351240
let EncoderMethod = "getMachineOpValueT16Lo128";

0 commit comments

Comments
 (0)