@@ -89,23 +89,57 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
89
89
def _t16 : VOPC_Profile<sched, vt0, vt1> {
90
90
let IsTrue16 = 1;
91
91
let IsRealTrue16 = 1;
92
- let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
93
- let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
94
- let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
95
- let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
96
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
97
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
98
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
92
+ let HasOpSel = 1;
93
+ let HasModifiers = 1; // All instructions at least have OpSel
94
+ let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
95
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
96
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
97
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
98
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
99
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
100
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
101
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
102
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
103
+ let Src0VOP3DPP = VGPRSrc_16;
104
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
105
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
106
+
107
+ let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 1/*IsVOP3Encoding*/>.ret;
108
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
109
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
110
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
111
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
112
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
113
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
114
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
115
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
116
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
99
117
}
100
118
def _fake16: VOPC_Profile<sched, vt0, vt1> {
101
119
let IsTrue16 = 1;
120
+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
121
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
102
122
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
103
123
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
104
124
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
105
125
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
106
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
107
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
108
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
126
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
127
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
128
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
129
+ let Src0VOP3DPP = VGPRSrc_32;
130
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
131
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
132
+
133
+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
134
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
135
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
136
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
137
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
138
+ let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
139
+ let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
140
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
141
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
142
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
109
143
}
110
144
}
111
145
@@ -283,7 +317,9 @@ class getVOPCPat64 <SDPatternOperator cond, VOPProfile P> : LetDummies {
283
317
(setcc (P.Src0VT
284
318
!if(P.HasOMod,
285
319
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
286
- (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
320
+ !if(P.HasClamp,
321
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
322
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers)))),
287
323
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
288
324
cond))],
289
325
[(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]);
@@ -324,6 +360,10 @@ multiclass VOPC_Pseudos <string opName,
324
360
let SchedRW = P.Schedule;
325
361
let isCompare = 1;
326
362
let isCommutable = 1;
363
+ let AsmMatchConverter =
364
+ !if (P.HasOpSel, "cvtVOP3OpSel",
365
+ !if (!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), "cvtVOP3",
366
+ ""));
327
367
}
328
368
329
369
if P.HasExtSDWA then
@@ -1344,29 +1384,9 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>
1344
1384
1345
1385
// VOPC64
1346
1386
1347
- class VOPC64_DPP_Base<bits<10> op , string OpName, VOPProfile P >
1348
- : VOP3_DPP_Base<OpName, P , 1>, VOP3_DPPe_Common<op, P > {
1387
+ class VOPC64_DPP<VOP_DPP_Pseudo ps , string opName = ps.OpName >
1388
+ : VOP3_DPP_Base<opName, ps.Pfl , 1> {
1349
1389
Instruction Opcode = !cast<Instruction>(NAME);
1350
-
1351
- bits<8> src0;
1352
- bits<9> dpp_ctrl;
1353
- bits<1> bound_ctrl;
1354
- bits<4> bank_mask;
1355
- bits<4> row_mask;
1356
- bit fi;
1357
-
1358
- let Inst{40-32} = 0xfa;
1359
- let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
1360
- let Inst{80-72} = dpp_ctrl;
1361
- let Inst{82} = fi;
1362
- let Inst{83} = bound_ctrl;
1363
- // Inst{87-84} ignored by hw
1364
- let Inst{91-88} = bank_mask;
1365
- let Inst{95-92} = row_mask;
1366
- }
1367
-
1368
- class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
1369
- : VOPC64_DPP_Base<op, opName, ps.Pfl> {
1370
1390
let AssemblerPredicate = HasDPP16;
1371
1391
let SubtargetPredicate = HasDPP16;
1372
1392
let True16Predicate = ps.True16Predicate;
@@ -1380,32 +1400,28 @@ class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
1380
1400
1381
1401
class VOPC64_DPP16_Dst<bits<10> op, VOP_DPP_Pseudo ps,
1382
1402
string opName = ps.OpName>
1383
- : VOPC64_DPP16< op, ps, opName > {
1403
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc< op, ps.Pfl, 1 > {
1384
1404
bits<8> sdst;
1385
1405
let Inst{7-0} = sdst;
1386
1406
}
1387
1407
1388
1408
class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps,
1389
1409
string opName = ps.OpName>
1390
- : VOPC64_DPP16< op, ps, opName > {
1410
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc< op, ps.Pfl, 1 > {
1391
1411
let Inst{7-0} = ? ;
1392
1412
}
1393
1413
1394
- class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P>
1395
- : VOP3_DPP8_Base<OpName, P>, VOP3_DPPe_Common<op, P> {
1396
- Instruction Opcode = !cast<Instruction>(NAME);
1397
-
1398
- bits<8> src0;
1399
- bits<24> dpp8;
1400
- bits<9> fi;
1401
-
1402
- let Inst{40-32} = fi;
1403
- let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
1404
- let Inst{95-72} = dpp8{23-0};
1414
+ class VOPC64_DPP16_Dst_t16<bits<10> op, VOP_DPP_Pseudo ps,
1415
+ string opName = ps.OpName>
1416
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc_t16<op, ps.Pfl, 1> {
1417
+ bits<8> sdst;
1418
+ let Inst{7-0} = sdst;
1419
+ let Inst{14} = 0;
1405
1420
}
1406
1421
1407
- class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
1408
- : VOPC64_DPP8_Base<op, opName, ps.Pfl> {
1422
+ class VOPC64_DPP8<VOP_Pseudo ps, string opName = ps.OpName>
1423
+ : VOP3_DPP8_Base<opName, ps.Pfl> {
1424
+ Instruction Opcode = !cast<Instruction>(NAME);
1409
1425
// Note ps is the non-dpp pseudo
1410
1426
let hasSideEffects = ps.hasSideEffects;
1411
1427
let Defs = ps.Defs;
@@ -1416,18 +1432,26 @@ class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
1416
1432
}
1417
1433
1418
1434
class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
1419
- : VOPC64_DPP8<op, ps, opName> {
1435
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl > {
1420
1436
bits<8> sdst;
1421
1437
let Inst{7-0} = sdst;
1422
1438
let Constraints = "";
1423
1439
}
1424
1440
1425
1441
class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
1426
- : VOPC64_DPP8<op, ps, opName> {
1442
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl > {
1427
1443
let Inst{7-0} = ? ;
1428
1444
let Constraints = "";
1429
1445
}
1430
1446
1447
+ class VOPC64_DPP8_Dst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
1448
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc_t16<op, ps.Pfl> {
1449
+ bits<8> sdst;
1450
+ let Inst{7-0} = sdst;
1451
+ let Inst{14} = 0;
1452
+ let Constraints = "";
1453
+ }
1454
+
1431
1455
//===----------------------------------------------------------------------===//
1432
1456
// Target-specific instruction encodings.
1433
1457
//===----------------------------------------------------------------------===//
@@ -1442,7 +1466,7 @@ multiclass VOPC_Real_Base<GFXGen Gen, bits<9> op> {
1442
1466
defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_e64");
1443
1467
def _e32#Gen.Suffix : VOPC_Real<ps32, Gen.Subtarget>,
1444
1468
VOPCe<op{7-0}>;
1445
- def _e64#Gen.Suffix : VOP3_Real <ps64, Gen.Subtarget >,
1469
+ def _e64#Gen.Suffix : VOP3_Real_Gen <ps64, Gen>,
1446
1470
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
1447
1471
// Encoding used for VOPC instructions encoded as VOP3 differs from
1448
1472
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
@@ -1508,13 +1532,25 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
1508
1532
// the destination-less 32bit forms add it to the asmString here.
1509
1533
VOPC_Real<ps32, Gen.Subtarget, asm_name#"_e32">,
1510
1534
VOPCe<op{7-0}>;
1511
- def _e64#Gen.Suffix :
1512
- VOP3_Real_Gen<ps64, Gen, asm_name>,
1513
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
1514
- // Encoding used for VOPC instructions encoded as VOP3 differs from
1515
- // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
1516
- bits<8> sdst;
1517
- let Inst{7-0} = sdst;
1535
+ if ps64.Pfl.IsRealTrue16 then {
1536
+ def _e64#Gen.Suffix :
1537
+ VOP3_Real_Gen<ps64, Gen, asm_name>,
1538
+ VOP3e_t16_gfx11_gfx12<{0, op}, ps64.Pfl> {
1539
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
1540
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
1541
+ bits<8> sdst;
1542
+ let Inst{7-0} = sdst;
1543
+ let Inst{14} = 0;
1544
+ }
1545
+ } else {
1546
+ def _e64#Gen.Suffix :
1547
+ VOP3_Real_Gen<ps64, Gen, asm_name>,
1548
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
1549
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
1550
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
1551
+ bits<8> sdst;
1552
+ let Inst{7-0} = sdst;
1553
+ }
1518
1554
}
1519
1555
1520
1556
defm : VOPCInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
@@ -1554,9 +1590,15 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
1554
1590
1555
1591
if ps64.Pfl.HasExtVOP3DPP then {
1556
1592
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
1557
- def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
1558
- SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
1559
- def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
1593
+ if ps64.Pfl.IsRealTrue16 then {
1594
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst_t16<{0, op}, psDPP, asm_name>,
1595
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
1596
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst_t16<{0, op}, ps64, asm_name>;
1597
+ } else {
1598
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
1599
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
1600
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
1601
+ }
1560
1602
} // end if ps64.Pfl.HasExtVOP3DPP
1561
1603
} // End DecoderNamespace
1562
1604
} // End AssemblerPredicate
@@ -1693,11 +1735,23 @@ multiclass VOPC_Real_t16_gfx11<bits <9> op, string asm_name,
1693
1735
string OpName = NAME, string pseudo_mnemonic = ""> :
1694
1736
VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
1695
1737
1738
+ multiclass VOPC_Real_t16_and_fake16_gfx11<bits <9> op, string asm_name,
1739
+ string OpName = NAME, string pseudo_mnemonic = ""> {
1740
+ defm _t16: VOPC_Real_t16_gfx11<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
1741
+ defm _fake16: VOPC_Real_t16_gfx11<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
1742
+ }
1743
+
1696
1744
multiclass VOPC_Real_t16_gfx11_gfx12<bits <9> op, string asm_name,
1697
1745
string OpName = NAME, string pseudo_mnemonic = ""> :
1698
1746
VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
1699
1747
VOPC_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
1700
1748
1749
+ multiclass VOPC_Real_t16_and_fake16_gfx11_gfx12<bits <9> op, string asm_name,
1750
+ string OpName = NAME, string pseudo_mnemonic = ""> {
1751
+ defm _t16: VOPC_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
1752
+ defm _fake16: VOPC_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
1753
+ }
1754
+
1701
1755
multiclass VOPCX_Real_t16_gfx11<bits<9> op, string asm_name,
1702
1756
string OpName = NAME, string pseudo_mnemonic = ""> :
1703
1757
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
@@ -1708,7 +1762,7 @@ multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
1708
1762
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
1709
1763
1710
1764
defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
1711
- defm V_CMP_LT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12 <0x001, "v_cmp_lt_f16">;
1765
+ defm V_CMP_LT_F16 : VOPC_Real_t16_and_fake16_gfx11_gfx12 <0x001, "v_cmp_lt_f16">;
1712
1766
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
1713
1767
defm V_CMP_LE_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
1714
1768
defm V_CMP_GT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
0 commit comments