-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][True16][CodeGen] flat/global/scratch load/store pseudo for true16 #127945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][True16][CodeGen] flat/global/scratch load/store pseudo for true16 #127945
Conversation
0cf4635
to
78d71f7
Compare
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) Changesadded load/store pseudo for: flat_store (flat_load already in place) in true16 mode and updated the codegen test file Patch is 132.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127945.diff 11 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 7988a9ac0ce55..f48d1d8c011da 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -255,6 +255,12 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
let enabled_saddr = EnableSaddr;
}
+multiclass FLAT_Store_Pseudo_t16<string opName> {
+ def "" : FLAT_Store_Pseudo<opName, VGPR_32>;
+ let OtherPredicates = [HasTrue16BitInsts] in
+ def _t16 : FLAT_Store_Pseudo<opName#"_t16", VGPR_16>, True16D16Table<NAME#"_D16_HI", NAME>;
+}
+
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
let is_flat_global = 1 in {
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
@@ -264,6 +270,21 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}
}
+multiclass FLAT_Global_Load_Pseudo_t16<string opName> {
+ defm "" : FLAT_Global_Load_Pseudo<opName, VGPR_32, 1>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasTrue16BitInsts],
+ SubtargetPredicate = HasFlatGlobalInsts, is_flat_global = 1 in {
+ def _t16 : FLAT_Load_Pseudo<Name16, VGPR_16, 0, 1>,
+ GlobalSaddrTable<0, Name16>,
+ True16D16Table<NAME#"_HI", NAME>;
+ def _SADDR_t16 : FLAT_Load_Pseudo<Name16, VGPR_16, 0, 1, 1>,
+ GlobalSaddrTable<1, Name16>,
+ True16D16Table<NAME#"_HI_SADDR", NAME#"_SADDR">;
+ }
+}
+
class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
@@ -300,6 +321,21 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
}
+multiclass FLAT_Global_Store_Pseudo_t16<string opName> {
+ defm "" : FLAT_Global_Store_Pseudo<opName, VGPR_32>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasTrue16BitInsts],
+ SubtargetPredicate = HasFlatGlobalInsts, is_flat_global = 1 in {
+ def _t16 : FLAT_Store_Pseudo<Name16, VGPR_16, 1>,
+ GlobalSaddrTable<0, Name16>,
+ True16D16Table<NAME#"_D16_HI", NAME>;
+ def _SADDR_t16 : FLAT_Store_Pseudo<Name16, VGPR_16, 1, 1>,
+ GlobalSaddrTable<1, Name16>,
+ True16D16Table<NAME#"_D16_HI_SADDR", NAME#"_SADDR">;
+ }
+}
+
class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs ),
@@ -456,6 +492,29 @@ multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit H
FlatScratchInst<opName, "ST">;
}
+multiclass FLAT_Scratch_Load_Pseudo_t16<string opName> {
+ defm "" : FLAT_Scratch_Load_Pseudo<opName, VGPR_32, 1>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasTrue16BitInsts], is_flat_scratch = 1 in {
+ def _t16 : FLAT_Scratch_Load_Pseudo<Name16, VGPR_16, 0>,
+ FlatScratchInst<Name16, "SV">,
+ True16D16Table<NAME#"_HI", NAME>;
+ def _SADDR_t16 : FLAT_Scratch_Load_Pseudo<Name16, VGPR_16, 0, 1>,
+ FlatScratchInst<Name16, "SS">,
+ True16D16Table<NAME#"_HI_SADDR", NAME#"_SADDR">;
+ let SubtargetPredicate = HasFlatScratchSVSMode in
+ def _SVS_t16 : FLAT_Scratch_Load_Pseudo<Name16, VGPR_16, 0, 1, 1>,
+ FlatScratchInst<Name16, "SVS">,
+ True16D16Table<NAME#"_HI_SVS", NAME#"_SVS">;
+
+ let SubtargetPredicate = HasFlatScratchSTMode in
+ def _ST_t16 : FLAT_Scratch_Load_Pseudo<Name16, VGPR_16, 0, 0, 0, 0>,
+ FlatScratchInst<Name16, "ST">,
+ True16D16Table<NAME#"_HI_ST", NAME#"_ST">;
+ }
+}
+
multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>,
FlatScratchInst<opName, "SV">;
@@ -471,6 +530,31 @@ multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
FlatScratchInst<opName, "ST">;
}
+multiclass FLAT_Scratch_Store_Pseudo_t16<string opName> {
+ defm "" : FLAT_Scratch_Store_Pseudo<opName, VGPR_32>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasTrue16BitInsts], is_flat_scratch = 1 in {
+ def _t16 : FLAT_Scratch_Store_Pseudo<Name16, VGPR_16>,
+ FlatScratchInst<Name16, "SV">,
+ True16D16Table<NAME#"_D16_HI", NAME>;
+ def _SADDR_t16 : FLAT_Scratch_Store_Pseudo<Name16, VGPR_16, 1>,
+ FlatScratchInst<Name16, "SS">,
+ True16D16Table<NAME#"_D16_HI_SADDR", NAME#"_SADDR">;
+
+ let SubtargetPredicate = HasFlatScratchSVSMode in
+ def _SVS_t16 : FLAT_Scratch_Store_Pseudo<Name16, VGPR_16, 1, 1>,
+ FlatScratchInst<Name16, "SVS">,
+ True16D16Table<NAME#"_D16_HI_SVS", NAME#"_SVS">;
+
+ let SubtargetPredicate = HasFlatScratchSTMode in
+ def _ST_t16 : FLAT_Scratch_Store_Pseudo<Name16, VGPR_16, 0, 0, 0>,
+ FlatScratchInst<Name16, "ST">,
+ True16D16Table<NAME#"_D16_HI_ST", NAME#"_ST">;
+ }
+}
+
+
class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
bit EnableSVE = 0,
bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
@@ -665,8 +749,6 @@ def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
-def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
-def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
@@ -686,6 +768,9 @@ def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_
def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
}
+defm FLAT_STORE_BYTE : FLAT_Store_Pseudo_t16 <"flat_store_byte">;
+defm FLAT_STORE_SHORT : FLAT_Store_Pseudo_t16 <"flat_store_short">;
+
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
VGPR_32, i32, v2i32, VReg_64>;
@@ -834,19 +919,22 @@ defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg
defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
let TiedSourceNotRead = 1 in {
-defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
-defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
-defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
-defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
+defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
+defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo_t16 <"global_load_sbyte_d16">;
+defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo_t16 <"global_load_short_d16">;
+defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo_t16 <"global_load_ubyte_d16">;
}
+defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
+defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
+
let OtherPredicates = [HasGFX10_BEncoding] in
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
-defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
-defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
+defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo_t16 <"global_store_byte">;
+defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo_t16 <"global_store_short">;
defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
@@ -854,9 +942,6 @@ defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VR
let OtherPredicates = [HasGFX10_BEncoding] in
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
-defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
-defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
-
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
VGPR_32, i32, v2i32, VReg_64>;
@@ -970,24 +1055,24 @@ defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", V
defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
let TiedSourceNotRead = 1 in {
-defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>;
defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>;
-defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>;
defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>;
-defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>;
defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>;
+defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo_t16 <"scratch_load_ubyte_d16">;
+defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo_t16 <"scratch_load_sbyte_d16">;
+defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo_t16 <"scratch_load_short_d16">;
}
-defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
-defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
+defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
+
+defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo_t16 <"scratch_store_byte">;
+defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo_t16 <"scratch_store_short">;
defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
-defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
-defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
-
defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
@@ -1071,11 +1156,21 @@ class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType
(inst $vaddr, $offset, 0, $in)
>;
+class FlatSignedLoadPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
+ (inst $vaddr, $offset, (i32 0))
+>;
+
class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
(inst $saddr, $voffset, $offset, 0, $in)
>;
+class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
+ (inst $saddr, $voffset, $offset, (i32 0))
+>;
+
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
(inst $vaddr, $offset)
@@ -1208,6 +1303,11 @@ class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
(inst $vaddr, $offset, 0, $in)
>;
+class ScratchLoadSignedPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
+ (inst $vaddr, $offset, 0)
+>;
+
class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)),
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
@@ -1223,6 +1323,11 @@ class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueTy
(inst $saddr, $offset, 0, $in)
>;
+class ScratchLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
+ (inst $saddr, $offset, 0)
+>;
+
class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
(node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)),
@@ -1245,6 +1350,11 @@ class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
(inst $vaddr, $saddr, $offset, 0, $in)
>;
+class ScratchLoadSVaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
+ (inst $vaddr, $saddr, $offset, 0)
+>;
+
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat <inst, node, vt> {
let AddedComplexity = 10;
@@ -1265,6 +1375,16 @@ multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Valu
}
}
+multiclass GlobalFLATLoadPats_D16_t16<string inst, SDPatternOperator node, ValueType vt> {
+ def : FlatSignedLoadPat_D16_t16<!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalLoadSaddrPat_D16_t16<!cast<FLAT_Pseudo>(inst#"_SADDR_t16"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> {
def : FlatStoreSignedPat <inst, node, vt> {
@@ -1276,6 +1396,16 @@ multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
}
}
+multiclass GlobalFLATStorePats_D16_t16<string inst, SDPatternOperator node, ValueType vt> {
+ def : FlatStoreSignedPat<!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_t16"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
ValueType data_vt = vt> {
let AddedComplexity = 11 in
@@ -1358,6 +1488,22 @@ multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
}
}
+multiclass ScratchFLATStorePats_t16<string inst, SDPatternOperator node,
+ ValueType vt> {
+ def : ScratchStoreSignedPat <!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
+ let AddedComplexity = 25;
+ }
+
+ def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_t16"), node, vt> {
+ let AddedComplexity = 26;
+ }
+
+ def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(inst#"_SVS_t16"), node, vt> {
+ let SubtargetPredicate = HasFlatScratchSVSMode;
+ let AddedComplexity = 27;
+ }
+}
+
multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : ScratchLoadSignedPat_D16 <inst, node, vt> {
let AddedComplexity = 25;
@@ -1373,6 +1519,21 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
}
}
+multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, ValueType vt> {
+ def : ScratchLoadSignedPat_D16_t16 <!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
+ let AddedComplexity = 25;
+ }
+
+ def : ScratchLoadSaddrPat_D16_t16<!cast<FLAT_Pseudo>(inst#"_SADDR_t16"), node, vt> {
+ let AddedComplexity = 26;
+ }
+
+ def : ScratchLoadSVaddrPat_D16_t16 <!cast<FLAT_Pseudo>(inst#"_SVS_t16"), node, vt> {
+ let SubtargetPredicate = HasFlatScratchSVSMode;
+ let AddedComplexity = 27;
+ }
+}
+
let OtherPredicates = [HasFlatAddressSpace] in {
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
@@ -1409,6 +1570,8 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
def : FlatLoadPat_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
def : FlatLoadPat_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
def : FlatLoadPat_D16_t16<FLAT_LOAD_SHORT_D16_t16, load_flat, i16>;
+ def : FlatStorePat <FLAT_STORE_BYTE_t16, truncstorei8_flat, i16>;
+ def : FlatStorePat <FLAT_STORE_SHORT_t16, store_flat, i16>;
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
@@ -1489,9 +1652,6 @@ let SubtargetPredicate = isGFX12Plus in {
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
}
-def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
-def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
-
let OtherPredicates = [HasD16LoadStore] in {
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
@@ -1531,15 +1691,28 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
-defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
-defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
-defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
+
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in {
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, atomic_load_sext_16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
+}
+
+let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
+defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
+defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
+defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
+defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
+defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
+defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
+} // end OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts
foreach vt = Reg32Types.types in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
@@ -1565,11 +1738,15 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_globa...
[truncated]
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-promote-alloca -mattr=+enable-flat-scratch,+real-true16 < %s | FileCheck --check-prefixes=GFX12,GFX12-TRUE16 %s | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-promote-alloca -mattr=+enable-flat-scratch,-real-true16 < %s | FileCheck --check-prefixes=GFX12,GFX12-FAKE16 %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM modulo predicate type change if possible.
a918d72
to
78d71f7
Compare
78d71f7
to
1f5bd32
Compare
rebased and resolved conflicts |
…e16 selection (#128784) This is a NFC change. Update the test file and fix the build llvm/llvm-project#128233 is causing a build issue. This is caused by PR llvm/llvm-project#127945 being merged while the 128233 is pending for review.
T16D16 table is implemented in #127673
this is a follow up patch to add load/store pseudo for:
flat_store
global_load/global_store
scratch_load/scratch_store
in true16 mode and updated the codegen test file