@@ -14,6 +14,12 @@ def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot
14
14
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
15
15
def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
16
16
17
+ class True16D16Table <string hiOp, string loOp> {
18
+ Instruction T16Op = !cast<Instruction>(NAME);
19
+ Instruction HiOp = !cast<Instruction>(hiOp);
20
+ Instruction LoOp = !cast<Instruction>(loOp);
21
+ }
22
+
17
23
//===----------------------------------------------------------------------===//
18
24
// FLAT classes
19
25
//===----------------------------------------------------------------------===//
@@ -225,6 +231,12 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
225
231
let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
226
232
}
227
233
234
+ multiclass FLAT_Load_Pseudo_t16<string opName> {
235
+ def "" : FLAT_Load_Pseudo<opName, VGPR_32, 1>;
236
+ let True16Predicate = UseRealTrue16Insts in
237
+ def _t16 : FLAT_Load_Pseudo<opName#"_t16", VGPR_16>, True16D16Table<NAME#"_HI", NAME>;
238
+ }
239
+
228
240
class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
229
241
bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
230
242
opName,
@@ -242,6 +254,12 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
242
254
let enabled_saddr = EnableSaddr;
243
255
}
244
256
257
+ multiclass FLAT_Store_Pseudo_t16<string opName> {
258
+ def "" : FLAT_Store_Pseudo<opName, VGPR_32>;
259
+ let True16Predicate = UseRealTrue16Insts in
260
+ def _t16 : FLAT_Store_Pseudo<opName#"_t16", VGPR_16>, True16D16Table<NAME#"_D16_HI", NAME>;
261
+ }
262
+
245
263
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
246
264
let is_flat_global = 1 in {
247
265
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
@@ -653,27 +671,28 @@ def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
653
671
def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
654
672
def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
655
673
656
- def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
657
- def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
658
674
def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
659
675
def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
660
676
def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
661
677
def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
662
678
663
679
let SubtargetPredicate = HasD16LoadStore in {
664
680
let TiedSourceNotRead = 1 in {
665
- def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
666
681
def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
667
- def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1 >;
682
+ defm FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo_t16 <"flat_load_ubyte_d16" >;
668
683
def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
669
- def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1 >;
684
+ defm FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo_t16 <"flat_load_sbyte_d16" >;
670
685
def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
686
+ defm FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo_t16 <"flat_load_short_d16">;
671
687
}
672
688
673
689
def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
674
690
def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
675
691
}
676
692
693
+ defm FLAT_STORE_BYTE : FLAT_Store_Pseudo_t16 <"flat_store_byte">;
694
+ defm FLAT_STORE_SHORT : FLAT_Store_Pseudo_t16 <"flat_store_short">;
695
+
677
696
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
678
697
VGPR_32, i32, v2i32, VReg_64>;
679
698
@@ -1044,6 +1063,11 @@ class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> :
1044
1063
(inst $vaddr, $offset, 0, $in)
1045
1064
>;
1046
1065
1066
+ class FlatLoadPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1067
+ (vt (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset))),
1068
+ (inst $vaddr, $offset, (i32 0))
1069
+ >;
1070
+
1047
1071
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1048
1072
(node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
1049
1073
(inst $vaddr, $offset, 0, $in)
@@ -1366,16 +1390,31 @@ def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1366
1390
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1367
1391
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i32>;
1368
1392
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>;
1369
- def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1370
- def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1371
- def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1372
1393
def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1373
1394
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1374
- def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1375
1395
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1376
1396
def : FlatLoadPat <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
1377
1397
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1378
1398
1399
+ foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1400
+ let True16Predicate = p in {
1401
+ def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1402
+ def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1403
+ def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1404
+ def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1405
+ def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1406
+ def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1407
+ }
1408
+
1409
+ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
1410
+ def : FlatLoadPat_D16_t16<FLAT_LOAD_UBYTE_D16_t16, extloadi8_flat, i16>;
1411
+ def : FlatLoadPat_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
1412
+ def : FlatLoadPat_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
1413
+ def : FlatLoadPat_D16_t16<FLAT_LOAD_SHORT_D16_t16, load_flat, i16>;
1414
+ def : FlatStorePat <FLAT_STORE_BYTE_t16, truncstorei8_flat, i16>;
1415
+ def : FlatStorePat <FLAT_STORE_SHORT_t16, store_flat, i16>;
1416
+ } // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
1417
+
1379
1418
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1380
1419
def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1381
1420
@@ -1454,9 +1493,6 @@ let SubtargetPredicate = isGFX12Plus in {
1454
1493
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1455
1494
}
1456
1495
1457
- def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1458
- def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1459
-
1460
1496
let OtherPredicates = [HasD16LoadStore] in {
1461
1497
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1462
1498
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
@@ -2752,3 +2788,11 @@ defm SCRATCH_STORE_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_
2752
2788
2753
2789
defm SCRATCH_LOAD_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x53>;
2754
2790
defm SCRATCH_STORE_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x54>;
2791
+
2792
+ def True16D16Table : GenericTable {
2793
+ let FilterClass = "True16D16Table";
2794
+ let CppTypeName = "True16D16Info";
2795
+ let Fields = ["T16Op", "HiOp", "LoOp"];
2796
+ let PrimaryKey = ["T16Op"];
2797
+ let PrimaryKeyName = "getT16D16Helper";
2798
+ }
0 commit comments