Skip to content

Commit c9e217c

Browse files
committed
VOPCX true16
1 parent 5e26ff3 commit c9e217c

32 files changed

+993
-454
lines changed

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 105 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,7 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
153153
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
154154
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
155155
src0_sel:$src0_sel, src1_sel:$src1_sel);
156-
let AsmVOP3Base = !if(Src0VT.isFP, "$src0_modifiers, $src1_modifiers$clamp",
157-
"$src0, $src1");
156+
let HasDst = 0;
158157
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
159158
let EmitDst = 0;
160159
}
@@ -164,23 +163,53 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
164163
def _t16 : VOPC_NoSdst_Profile<sched, vt0, vt1> {
165164
let IsTrue16 = 1;
166165
let IsRealTrue16 = 1;
167-
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
168-
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
169-
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
170-
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
171-
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
172-
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
173-
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
166+
let HasOpSel = 1;
167+
let HasModifiers = 1; // All instructions at least have OpSel
168+
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
169+
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
170+
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
171+
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
172+
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
173+
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
174+
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
175+
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
176+
let Src0VOP3DPP = VGPRSrc_16;
177+
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
178+
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
179+
180+
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
181+
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
182+
let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
183+
let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
184+
let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
185+
let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
186+
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
187+
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
188+
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
174189
}
175190
def _fake16 : VOPC_NoSdst_Profile<sched, vt0, vt1> {
176191
let IsTrue16 = 1;
192+
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
177193
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
178194
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
179195
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
180196
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
181-
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
182-
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
183-
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
197+
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
198+
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
199+
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
200+
let Src0VOP3DPP = VGPRSrc_32;
201+
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
202+
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
203+
204+
let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
205+
let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
206+
let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
207+
let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
208+
let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
209+
let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
210+
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
211+
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
212+
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
184213
}
185214
}
186215

@@ -1408,7 +1437,7 @@ class VOPC64_DPP16_Dst<bits<10> op, VOP_DPP_Pseudo ps,
14081437
class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps,
14091438
string opName = ps.OpName>
14101439
: VOPC64_DPP<ps, opName>, VOP3_DPP_Enc<op, ps.Pfl, 1> {
1411-
let Inst{7-0} = ? ;
1440+
let Inst{7-0} = ?;
14121441
}
14131442

14141443
class VOPC64_DPP16_Dst_t16<bits<10> op, VOP_DPP_Pseudo ps,
@@ -1419,6 +1448,13 @@ class VOPC64_DPP16_Dst_t16<bits<10> op, VOP_DPP_Pseudo ps,
14191448
let Inst{14} = 0;
14201449
}
14211450

1451+
class VOPC64_DPP16_NoDst_t16<bits<10> op, VOP_DPP_Pseudo ps,
1452+
string opName = ps.OpName>
1453+
: VOPC64_DPP<ps, opName>, VOP3_DPP_Enc_t16<op, ps.Pfl, 1> {
1454+
let Inst{7-0} = ?;
1455+
let Inst{14} = 0;
1456+
}
1457+
14221458
class VOPC64_DPP8<VOP_Pseudo ps, string opName = ps.OpName>
14231459
: VOP3_DPP8_Base<opName, ps.Pfl> {
14241460
Instruction Opcode = !cast<Instruction>(NAME);
@@ -1440,7 +1476,7 @@ class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
14401476

14411477
class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
14421478
: VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl> {
1443-
let Inst{7-0} = ? ;
1479+
let Inst{7-0} = ?;
14441480
let Constraints = "";
14451481
}
14461482

@@ -1452,6 +1488,13 @@ class VOPC64_DPP8_Dst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
14521488
let Constraints = "";
14531489
}
14541490

1491+
class VOPC64_DPP8_NoDst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
1492+
: VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc_t16<op, ps.Pfl> {
1493+
let Inst{7-0} = ?;
1494+
let Inst{14} = 0;
1495+
let Constraints = "";
1496+
}
1497+
14551498
//===----------------------------------------------------------------------===//
14561499
// Target-specific instruction encodings.
14571500
//===----------------------------------------------------------------------===//
@@ -1619,7 +1662,7 @@ multiclass VOPCX_Real<GFXGen Gen, bits<9> op> {
16191662
# " " # ps32.AsmOperands;
16201663
}
16211664
def _e64#Gen.Suffix :
1622-
VOP3_Real<ps64, Gen.Subtarget>,
1665+
VOP3_Real_Gen<ps64, Gen>,
16231666
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
16241667
let Inst{7-0} = ?; // sdst
16251668
let AsmString = !subst("_nosdst", "", ps64.Mnemonic)
@@ -1677,11 +1720,22 @@ multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
16771720
VOPCe<op{7-0}> {
16781721
let AsmString = asm_name # "{_e32} " # ps32.AsmOperands;
16791722
}
1680-
def _e64#Gen.Suffix
1681-
: VOP3_Real_Gen<ps64, Gen, asm_name>,
1682-
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
1683-
let Inst{7-0} = ? ; // sdst
1684-
let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
1723+
1724+
if ps64.Pfl.IsRealTrue16 then {
1725+
def _e64#Gen.Suffix
1726+
: VOP3_Real_Gen<ps64, Gen, asm_name>,
1727+
VOP3e_t16_gfx11_gfx12<{0, op}, ps64.Pfl> {
1728+
let Inst{7-0} = ?; // sdst
1729+
let Inst{14} = 0;
1730+
let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
1731+
}
1732+
} else {
1733+
def _e64#Gen.Suffix
1734+
: VOP3_Real_Gen<ps64, Gen, asm_name>,
1735+
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
1736+
let Inst{7-0} = ?; // sdst
1737+
let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
1738+
}
16851739
}
16861740

16871741
defm : VOPCXInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
@@ -1695,14 +1749,25 @@ multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
16951749
if ps64.Pfl.HasExtVOP3DPP then {
16961750
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e64"#"_dpp");
16971751
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
1698-
def _e64_dpp#Gen.Suffix
1699-
: VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
1700-
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
1701-
let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
1702-
}
17031752
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
1704-
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
1705-
let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
1753+
if ps64.Pfl.IsRealTrue16 then {
1754+
def _e64_dpp#Gen.Suffix
1755+
: VOPC64_DPP16_NoDst_t16<{0, op}, psDPP, asm_name>,
1756+
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
1757+
let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
1758+
}
1759+
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst_t16<{0, op}, ps64, asm_name> {
1760+
let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
1761+
}
1762+
} else {
1763+
def _e64_dpp#Gen.Suffix
1764+
: VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
1765+
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
1766+
let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
1767+
}
1768+
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
1769+
let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
1770+
}
17061771
}
17071772
} // End if ps64.Pfl.HasExtVOP3DPP
17081773
} // End DecoderNamespace
@@ -1756,11 +1821,23 @@ multiclass VOPCX_Real_t16_gfx11<bits<9> op, string asm_name,
17561821
string OpName = NAME, string pseudo_mnemonic = ""> :
17571822
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
17581823

1824+
multiclass VOPCX_Real_t16_and_fake16_gfx11<bits<9> op, string asm_name,
1825+
string OpName = NAME, string pseudo_mnemonic = ""> {
1826+
defm _t16: VOPCX_Real_t16_gfx11<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
1827+
defm _fake16: VOPCX_Real_t16_gfx11<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
1828+
}
1829+
17591830
multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
17601831
string OpName = NAME, string pseudo_mnemonic = ""> :
17611832
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
17621833
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
17631834

1835+
multiclass VOPCX_Real_t16_and_fake16_gfx11_gfx12<bits<9> op, string asm_name,
1836+
string OpName = NAME, string pseudo_mnemonic = ""> {
1837+
defm _t16: VOPCX_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
1838+
defm _fake16: VOPCX_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
1839+
}
1840+
17641841
defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
17651842
defm V_CMP_LT_F16 : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
17661843
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
@@ -1848,7 +1925,7 @@ defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
18481925
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
18491926

18501927
defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
1851-
defm V_CMPX_LT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
1928+
defm V_CMPX_LT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
18521929
defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
18531930
defm V_CMPX_LE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
18541931
defm V_CMPX_GT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,47 +1347,56 @@ v_cmpx_lg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
13471347
v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
13481348
// GFX11: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
13491349

1350-
v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
1351-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
1350+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
1351+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
13521352

1353-
v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
1354-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
1353+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
1354+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
13551355

1356-
v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror
1357-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
1356+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror
1357+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
13581358

1359-
v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror
1360-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
1359+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror
1360+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
13611361

1362-
v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1
1363-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
1362+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1
1363+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
13641364

1365-
v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15
1366-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
1365+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15
1366+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
13671367

1368-
v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1
1369-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
1368+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1
1369+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
13701370

1371-
v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15
1372-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
1371+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15
1372+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
13731373

1374-
v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1
1375-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
1374+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1
1375+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
13761376

1377-
v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15
1378-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
1377+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15
1378+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
13791379

1380-
v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
1381-
// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
1380+
v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
1381+
// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
13821382

1383-
v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
1384-
// GFX11: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
1383+
v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
1384+
// GFX11: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
13851385

1386-
v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
1387-
// GFX11: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
1386+
v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
1387+
// GFX11: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
13881388

1389-
v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
1390-
// GFX11: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
1389+
v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
1390+
// GFX11: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
1391+
1392+
v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h row_share:15 row_mask:0x0 bank_mask:0x1
1393+
// GFX11: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
1394+
1395+
v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
1396+
// GFX11: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x0a,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
1397+
1398+
v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
1399+
// GFX11: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x93,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
13911400

13921401
v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
13931402
// GFX11: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]

0 commit comments

Comments
 (0)