Skip to content

Commit fecac70

Browse files
committed
[AMDGPU][True16][MC] FLAT load/store supporting True16 format
1 parent 5c1752e commit fecac70

File tree

2 files changed

+47
-13
lines changed

2 files changed

+47
-13
lines changed

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot
1414
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
1515
def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
1616

17+
class True16D16Table <string hiOp, string loOp> {
18+
Instruction T16Op = !cast<Instruction>(NAME);
19+
Instruction HiOp = !cast<Instruction>(hiOp);
20+
Instruction LoOp = !cast<Instruction>(loOp);
21+
}
22+
1723
//===----------------------------------------------------------------------===//
1824
// FLAT classes
1925
//===----------------------------------------------------------------------===//
@@ -225,6 +231,12 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
225231
let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
226232
}
227233

234+
multiclass FLAT_Load_Pseudo_t16<string opName> {
235+
def "" : FLAT_Load_Pseudo<opName, VGPR_32, 1>;
236+
let True16Predicate = UseRealTrue16Insts in
237+
def _t16 : FLAT_Load_Pseudo<opName#"_t16", VGPR_16>, True16D16Table<NAME#"_HI", NAME>;
238+
}
239+
228240
class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
229241
bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
230242
opName,
@@ -242,6 +254,12 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
242254
let enabled_saddr = EnableSaddr;
243255
}
244256

257+
multiclass FLAT_Store_Pseudo_t16<string opName> {
258+
def "" : FLAT_Store_Pseudo<opName, VGPR_32>;
259+
let True16Predicate = UseRealTrue16Insts in
260+
def _t16 : FLAT_Store_Pseudo<opName#"_t16", VGPR_16>, True16D16Table<NAME#"_D16_HI", NAME>;
261+
}
262+
245263
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
246264
let is_flat_global = 1 in {
247265
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
@@ -653,27 +671,28 @@ def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
653671
def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
654672
def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
655673

656-
def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
657-
def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
658674
def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
659675
def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
660676
def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
661677
def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
662678

663679
let SubtargetPredicate = HasD16LoadStore in {
664680
let TiedSourceNotRead = 1 in {
665-
def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
666681
def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
667-
def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
682+
defm FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo_t16 <"flat_load_ubyte_d16">;
668683
def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
669-
def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
684+
defm FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo_t16 <"flat_load_sbyte_d16">;
670685
def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
686+
defm FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo_t16 <"flat_load_short_d16">;
671687
}
672688

673689
def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
674690
def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
675691
}
676692

693+
defm FLAT_STORE_BYTE : FLAT_Store_Pseudo_t16 <"flat_store_byte">;
694+
defm FLAT_STORE_SHORT : FLAT_Store_Pseudo_t16 <"flat_store_short">;
695+
677696
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
678697
VGPR_32, i32, v2i32, VReg_64>;
679698

@@ -1044,6 +1063,11 @@ class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> :
10441063
(inst $vaddr, $offset, 0, $in)
10451064
>;
10461065

1066+
class FlatLoadPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1067+
(vt (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset))),
1068+
(inst $vaddr, $offset, (i32 0))
1069+
>;
1070+
10471071
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
10481072
(node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
10491073
(inst $vaddr, $offset, 0, $in)
@@ -1366,16 +1390,22 @@ def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
13661390
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
13671391
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i32>;
13681392
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>;
1369-
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1370-
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1371-
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
13721393
def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
13731394
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1374-
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
13751395
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
13761396
def : FlatLoadPat <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
13771397
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
13781398

1399+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1400+
let True16Predicate = p in {
1401+
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1402+
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1403+
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1404+
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1405+
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1406+
def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1407+
}
1408+
13791409
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
13801410
def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
13811411

@@ -1454,9 +1484,6 @@ let SubtargetPredicate = isGFX12Plus in {
14541484
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
14551485
}
14561486

1457-
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1458-
def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1459-
14601487
let OtherPredicates = [HasD16LoadStore] in {
14611488
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
14621489
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2419,8 +2419,15 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
24192419

24202420
// Return an AGPR+VGPR operand class for the given VGPR register class.
24212421
class getLdStRegisterOperand<RegisterClass RC> {
2422+
// This type of operands is only used in pseudo instructions helping
2423+
// code generation and thus doesn't need encoding and decoding methods.
2424+
// It also doesn't need to support AGPRs, because GFX908/A/40 do not
2425+
// support True16.
2426+
defvar VLdSt_16 = RegisterOperand<VGPR_16>;
2427+
24222428
RegisterOperand ret =
2423-
!cond(!eq(RC.Size, 32) : AVLdSt_32,
2429+
!cond(!eq(RC.Size, 16) : VLdSt_16,
2430+
!eq(RC.Size, 32) : AVLdSt_32,
24242431
!eq(RC.Size, 64) : AVLdSt_64,
24252432
!eq(RC.Size, 96) : AVLdSt_96,
24262433
!eq(RC.Size, 128) : AVLdSt_128,

0 commit comments

Comments
 (0)