Skip to content

Commit 8bc0f87

Browse files
[AMDGPU][True16][CodeGen] D16 LDS load/store pseudo instructions in true16 (#131427)
Implement new pseudos with the suffix _t16 which have VGPR_16 as the store src or load dst. This affects LDS 8 and 16-bit loads and stores. Lower the pseudos to the existing real Hi/Lo instructions in MC inst layer with VGPR_32 src or dst --------- Co-authored-by: Abhinav <[email protected]>
1 parent 2ff370f commit 8bc0f87

File tree

6 files changed

+2182
-844
lines changed

6 files changed

+2182
-844
lines changed

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,15 @@ multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
127127
}
128128
}
129129

130+
multiclass DS_1A1D_NORET_t16<string opName, RegisterClass rc = VGPR_32>
131+
: DS_1A1D_NORET_mc<opName, rc> {
132+
let has_m0_read = 0 in {
133+
let True16Predicate = UseRealTrue16Insts in {
134+
def "_t16" : DS_1A1D_NORET<opName#"_t16", VGPR_16>, True16D16Table<NAME#"_D16_HI", NAME>;
135+
}
136+
}
137+
}
138+
130139
multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
131140
let has_m0_read = 0 in {
132141
def "" : DS_1A1D_NORET<opName, rc>;
@@ -297,6 +306,15 @@ multiclass DS_1A_RET_mc<string opName, RegisterClass rc = VGPR_32, bit HasTiedOu
297306
}
298307
}
299308

309+
multiclass DS_1A_RET_t16<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset>
310+
: DS_1A_RET_mc<opName, rc, HasTiedOutput, ofs> {
311+
let has_m0_read = 0 in {
312+
let True16Predicate = UseRealTrue16Insts in {
313+
def "_t16" : DS_1A_RET<opName#"_t16", VGPR_16, HasTiedOutput, ofs>, True16D16Table<NAME#"_D16_HI", NAME#"_D16">;
314+
}
315+
}
316+
}
317+
300318
multiclass DS_1A_RET_NoM0<string opName, RegisterClass rc = VGPR_32> {
301319
let has_m0_read = 0 in {
302320
def "" : DS_1A_RET<opName, rc>;
@@ -460,8 +478,6 @@ defm DS_MIN_F32 : DS_1A1D_NORET_mc<"ds_min_f32">;
460478
defm DS_MAX_F32 : DS_1A1D_NORET_mc<"ds_max_f32">;
461479

462480
let mayLoad = 0 in {
463-
defm DS_WRITE_B8 : DS_1A1D_NORET_mc<"ds_write_b8">;
464-
defm DS_WRITE_B16 : DS_1A1D_NORET_mc<"ds_write_b16">;
465481
defm DS_WRITE_B32 : DS_1A1D_NORET_mc<"ds_write_b32">;
466482
defm DS_WRITE2_B32 : DS_1A2D_Off8_NORET_mc<"ds_write2_b32">;
467483
defm DS_WRITE2ST64_B32: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b32">;
@@ -476,6 +492,9 @@ def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
476492

477493
} // End has_m0_read = 0
478494

495+
defm DS_WRITE_B8 : DS_1A1D_NORET_t16<"ds_write_b8">;
496+
defm DS_WRITE_B16 : DS_1A1D_NORET_t16<"ds_write_b16">;
497+
479498
let SubtargetPredicate = HasDSAddTid in {
480499
def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
481500
}
@@ -628,10 +647,7 @@ def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
628647
}
629648

630649
let mayStore = 0 in {
631-
defm DS_READ_I8 : DS_1A_RET_mc<"ds_read_i8">;
632-
defm DS_READ_U8 : DS_1A_RET_mc<"ds_read_u8">;
633650
defm DS_READ_I16 : DS_1A_RET_mc<"ds_read_i16">;
634-
defm DS_READ_U16 : DS_1A_RET_mc<"ds_read_u16">;
635651
defm DS_READ_B32 : DS_1A_RET_mc<"ds_read_b32">;
636652
defm DS_READ_B64 : DS_1A_RET_mc<"ds_read_b64", VReg_64>;
637653

@@ -652,6 +668,10 @@ def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
652668
}
653669
} // End has_m0_read = 0
654670

671+
defm DS_READ_I8 : DS_1A_RET_t16<"ds_read_i8">;
672+
defm DS_READ_U8 : DS_1A_RET_t16<"ds_read_u8">;
673+
defm DS_READ_U16 : DS_1A_RET_t16<"ds_read_u16">;
674+
655675
let SubtargetPredicate = HasDSAddTid in {
656676
def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
657677
}
@@ -795,34 +815,51 @@ multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
795815
}
796816
}
797817

818+
multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> {
819+
820+
let OtherPredicates = [LDSRequiresM0Init] in {
821+
def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
822+
}
823+
824+
let OtherPredicates = [NotLDSRequiresM0Init] in {
825+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
826+
let True16Predicate = p in {
827+
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
828+
}
829+
let True16Predicate = UseRealTrue16Insts in {
830+
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
831+
}
832+
}
833+
}
834+
798835
class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
799836
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
800837
(inst $ptr, Offset:$offset, (i1 0), $in)
801838
>;
802839

803840
defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
804-
defm : DSReadPat_mc <DS_READ_I8, i16, "sextloadi8_local">;
805841
defm : DSReadPat_mc <DS_READ_U8, i32, "extloadi8_local">;
806842
defm : DSReadPat_mc <DS_READ_U8, i32, "zextloadi8_local">;
807-
defm : DSReadPat_mc <DS_READ_U8, i16, "extloadi8_local">;
808-
defm : DSReadPat_mc <DS_READ_U8, i16, "zextloadi8_local">;
809843
defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
810844
defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
811845
defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
812846
defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
813-
defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
847+
defm : DSReadPat_t16 <DS_READ_I8, i16, "sextloadi8_local">;
848+
defm : DSReadPat_t16 <DS_READ_U8, i16, "extloadi8_local">;
849+
defm : DSReadPat_t16 <DS_READ_U8, i16, "zextloadi8_local">;
850+
defm : DSReadPat_t16 <DS_READ_U16, i16, "load_local">;
814851

815852
foreach vt = Reg32Types.types in {
816853
defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
817854
}
818855

819-
defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
856+
defm : DSReadPat_t16 <DS_READ_U8, i16, "atomic_load_8_local">;
820857
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
821-
defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_zext_8_local">;
858+
defm : DSReadPat_t16 <DS_READ_U8, i16, "atomic_load_zext_8_local">;
822859
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_zext_8_local">;
823-
defm : DSReadPat_mc <DS_READ_I8, i16, "atomic_load_sext_8_local">;
860+
defm : DSReadPat_t16 <DS_READ_I8, i16, "atomic_load_sext_8_local">;
824861
defm : DSReadPat_mc <DS_READ_I8, i32, "atomic_load_sext_8_local">;
825-
defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
862+
defm : DSReadPat_t16 <DS_READ_U16, i16, "atomic_load_16_local">;
826863
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
827864
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_zext_16_local">;
828865
defm : DSReadPat_mc <DS_READ_I16, i32, "atomic_load_sext_16_local">;
@@ -861,18 +898,34 @@ multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
861898
}
862899
}
863900

901+
multiclass DSWritePat_t16 <DS_Pseudo inst, ValueType vt, string frag> {
902+
let OtherPredicates = [LDSRequiresM0Init] in {
903+
def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
904+
}
905+
906+
let OtherPredicates = [NotLDSRequiresM0Init] in {
907+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
908+
let True16Predicate = p in {
909+
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
910+
}
911+
let True16Predicate = UseRealTrue16Insts in {
912+
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
913+
}
914+
}
915+
}
916+
864917
defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
865918
defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
866-
defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
867-
defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
919+
defm : DSWritePat_t16 <DS_WRITE_B8, i16, "truncstorei8_local">;
920+
defm : DSWritePat_t16 <DS_WRITE_B16, i16, "store_local">;
868921

869922
foreach vt = Reg32Types.types in {
870923
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
871924
}
872925

873-
defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
926+
defm : DSWritePat_t16 <DS_WRITE_B8, i16, "atomic_store_8_local">;
874927
defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
875-
defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
928+
defm : DSWritePat_t16 <DS_WRITE_B16, i16, "atomic_store_16_local">;
876929
defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
877930
defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
878931
defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;

0 commit comments

Comments
 (0)