Skip to content

Commit 409a2f0

Browse files
committed
[AMDGPU] Allow no saddr for global addtid insts
I think the global_load/store_dword_addtid instructions support switching off the scalar address. Add assembler and disassembler support for this. Differential Revision: https://reviews.llvm.org/D93288
1 parent 741978d commit 409a2f0

File tree

3 files changed

+50
-14
lines changed

3 files changed

+50
-14
lines changed

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -192,24 +192,34 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
192192
}
193193

194194
class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
195-
bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
195+
bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
196196
opName,
197197
(outs regClass:$vdst),
198-
!con((ins SReg_64:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
198+
!con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
199+
(ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
199200
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
200-
" $vdst, $saddr$offset$glc$slc$dlc"> {
201+
" $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
201202
let is_flat_global = 1;
202203
let has_data = 0;
203204
let mayLoad = 1;
204205
let has_vaddr = 0;
205206
let has_saddr = 1;
206-
let enabled_saddr = 1;
207+
let enabled_saddr = EnableSaddr;
207208
let maybeAtomic = 1;
209+
let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
208210

209211
let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
210212
let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
211213
}
212214

215+
multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
216+
bit HasTiedOutput = 0, bit HasSignedOffset = 0> {
217+
def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>,
218+
GlobalSaddrTable<0, opName>;
219+
def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>,
220+
GlobalSaddrTable<1, opName>;
221+
}
222+
213223
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
214224
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
215225
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
@@ -220,21 +230,29 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
220230
}
221231

222232
class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
223-
bit HasSignedOffset = 0> : FLAT_Pseudo<
233+
bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
224234
opName,
225235
(outs),
226-
!con(
227-
(ins vdataClass:$vdata, SReg_64:$saddr),
228-
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
229-
" $vdata, $saddr$offset$glc$slc$dlc"> {
236+
!con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
237+
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
238+
" $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
230239
let is_flat_global = 1;
231240
let mayLoad = 0;
232241
let mayStore = 1;
233242
let has_vdst = 0;
234243
let has_vaddr = 0;
235244
let has_saddr = 1;
236-
let enabled_saddr = 1;
245+
let enabled_saddr = EnableSaddr;
237246
let maybeAtomic = 1;
247+
let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
248+
}
249+
250+
multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass,
251+
bit HasSignedOffset = 0> {
252+
def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>,
253+
GlobalSaddrTable<0, opName>;
254+
def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>,
255+
GlobalSaddrTable<1, opName>;
238256
}
239257

240258
class FlatScratchInst <string sv_op, string mode> {
@@ -603,7 +621,7 @@ defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_
603621
defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
604622
defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
605623
let OtherPredicates = [HasGFX10_BEncoding] in
606-
def GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
624+
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
607625

608626
defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
609627
defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
@@ -612,7 +630,7 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VR
612630
defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
613631
defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
614632
let OtherPredicates = [HasGFX10_BEncoding] in
615-
def GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
633+
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
616634

617635
defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
618636
defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
@@ -1651,8 +1669,8 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
16511669
defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
16521670
defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
16531671
defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
1654-
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x016>;
1655-
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x017>;
1672+
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>;
1673+
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>;
16561674

16571675
// ENC_FLAT_SCRATCH.
16581676
defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>;

llvm/test/MC/AMDGPU/gfx1030_new.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@ global_load_dword_addtid v1, s[2:3] offset:16 glc slc dlc
1212
global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
1313
// GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00]
1414

15+
global_load_dword_addtid v1, off offset:16
16+
// GFX10: encoding: [0x10,0x80,0x58,0xdc,0x00,0x00,0x7d,0x01]
17+
18+
global_load_dword_addtid v1, off offset:16 glc slc dlc
19+
// GFX10: encoding: [0x10,0x90,0x5b,0xdc,0x00,0x00,0x7d,0x01]
20+
21+
global_store_dword_addtid v1, off offset:16 glc slc dlc
22+
// GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x7d,0x00]
23+
1524
global_store_dword v254, v1, s[2:3] offset:16
1625
// GFX10: encoding: [0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00]
1726

llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@
1212
# GFX10: global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
1313
0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00
1414

15+
# GFX10: global_load_dword_addtid v1, off offset:16
16+
0x10,0x80,0x58,0xdc,0x00,0x00,0x7d,0x01
17+
18+
# GFX10: global_load_dword_addtid v1, off offset:16 glc slc dlc
19+
0x10,0x90,0x5b,0xdc,0x00,0x00,0x7d,0x01
20+
21+
# GFX10: global_store_dword_addtid v1, off offset:16 glc slc dlc
22+
0x10,0x90,0x5f,0xdc,0x00,0x01,0x7d,0x00
23+
1524
# GFX10: global_store_dword v254, v1, s[2:3] offset:16
1625
0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00
1726

0 commit comments

Comments
 (0)