Skip to content

Commit 1240902

Browse files
authored
AMDGPU/GlobalISel: Handle atomic sextload and zextload (#111721)
Atomic loads are handled differently from the DAG, and have separate opcodes and explicit control over the extensions, like ordinary loads. Add new patterns for these. There's room for cleanup and improvement. d16 cases aren't handled. Fixes #111645
1 parent eeee5a4 commit 1240902

File tree

9 files changed

+1595
-4
lines changed

9 files changed

+1595
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ def : GINodeEquiv<G_STORE, AMDGPUst_glue> {
207207

208208
def : GINodeEquiv<G_LOAD, AMDGPUatomic_ld_glue> {
209209
bit CheckMMOIsAtomic = 1;
210+
let IfSignExtend = G_SEXTLOAD;
211+
let IfZeroExtend = G_ZEXTLOAD;
210212
}
211213

212214
def : GINodeEquiv<G_STORE, AMDGPUatomic_st_glue> {

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -504,23 +504,36 @@ def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> {
504504

505505
def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
506506
let IsAtomic = 1;
507-
let MemoryVT = i8;
508507
}
509508

510509
def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
511510
let IsAtomic = 1;
512-
let MemoryVT = i16;
513511
}
514512

515513
def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
516514
let IsAtomic = 1;
517-
let MemoryVT = i32;
518515
}
519516

520517
def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
521518
let IsAtomic = 1;
522-
let MemoryVT = i64;
523519
}
520+
521+
def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_8 node:$ptr)> {
522+
let IsAtomic = 1;
523+
}
524+
525+
def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext_8 node:$ptr)> {
526+
let IsAtomic = 1;
527+
}
528+
529+
def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_16 node:$ptr)> {
530+
let IsAtomic = 1;
531+
}
532+
533+
def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext_16 node:$ptr)> {
534+
let IsAtomic = 1;
535+
}
536+
524537
} // End let AddressSpaces
525538
} // End foreach as
526539

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,15 +983,20 @@ defm BUFFER_LOAD_LDS_U16 : MUBUF_Pseudo_Loads_LDSOpc <
983983
>;
984984

985985
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>;
986+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_zext_8_global>;
986987
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, atomic_load_16_global>;
988+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, atomic_load_zext_16_global>;
987989
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>;
988990
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, atomic_load_16_global>;
989991
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
990992
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
991993
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
994+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, atomic_load_sext_8_global>;
995+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, atomic_load_sext_16_global>;
992996
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
993997
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
994998
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
999+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, atomic_load_sext_16_global>;
9951000

9961001
foreach vt = Reg32Types.types in {
9971002
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", vt, load_global>;

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,12 +795,19 @@ defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
795795

796796
defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
797797
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
798+
defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_zext_8_local">;
799+
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_zext_8_local">;
800+
defm : DSReadPat_mc <DS_READ_I8, i16, "atomic_load_sext_8_local">;
801+
defm : DSReadPat_mc <DS_READ_I8, i32, "atomic_load_sext_8_local">;
798802
defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
799803
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
804+
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_zext_16_local">;
805+
defm : DSReadPat_mc <DS_READ_I16, i32, "atomic_load_sext_16_local">;
800806
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
801807
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
802808

803809
let OtherPredicates = [D16PreservesUnusedBits] in {
810+
// TODO: Atomic loads
804811
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
805812
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
806813
def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,18 +1355,25 @@ let OtherPredicates = [HasFlatAddressSpace] in {
13551355

13561356
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
13571357
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1358+
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i32>;
1359+
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16>;
13581360
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
13591361
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1362+
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i32>;
1363+
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i16>;
13601364
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
13611365
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
13621366
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1367+
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i32>;
1368+
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>;
13631369
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
13641370
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
13651371
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
13661372
def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
13671373
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
13681374
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
13691375
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1376+
def : FlatLoadPat <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
13701377
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
13711378

13721379
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
@@ -1456,6 +1463,7 @@ def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
14561463
}
14571464

14581465
let OtherPredicates = [D16PreservesUnusedBits] in {
1466+
// TODO: Handle atomic loads
14591467
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
14601468
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
14611469
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
@@ -1477,8 +1485,14 @@ let OtherPredicates = [HasFlatGlobalInsts] in {
14771485

14781486
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
14791487
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>;
1488+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i32>;
1489+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16>;
14801490
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>;
14811491
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>;
1492+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
1493+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
1494+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i32>;
1495+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16>;
14821496
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
14831497
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
14841498
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
@@ -1488,6 +1502,8 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
14881502
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
14891503
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
14901504
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
1505+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, atomic_load_sext_16_global, i32>;
1506+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
14911507
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
14921508

14931509
foreach vt = Reg32Types.types in {
@@ -1525,6 +1541,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global,
15251541
}
15261542

15271543
let OtherPredicates = [D16PreservesUnusedBits] in {
1544+
// TODO: Handle atomic loads
15281545
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
15291546
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
15301547
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,18 @@ def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
348348
let IsNonExtLoad = 1;
349349
}
350350

351+
def atomic_load_zext_glue :
352+
PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> {
353+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
354+
let IsZeroExtLoad = true;
355+
}
356+
357+
def atomic_load_sext_glue :
358+
PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> {
359+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
360+
let IsSignExtLoad = true;
361+
}
362+
351363
def atomic_load_8_glue : PatFrag<(ops node:$ptr),
352364
(AMDGPUatomic_ld_glue node:$ptr)> {
353365
let IsAtomic = 1;
@@ -372,6 +384,30 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr),
372384
let MemoryVT = i64;
373385
}
374386

387+
def atomic_load_zext_8_glue : PatFrag<(ops node:$ptr),
388+
(atomic_load_zext_glue node:$ptr)> {
389+
let IsAtomic = 1;
390+
let MemoryVT = i8;
391+
}
392+
393+
def atomic_load_sext_8_glue : PatFrag<(ops node:$ptr),
394+
(atomic_load_sext_glue node:$ptr)> {
395+
let IsAtomic = 1;
396+
let MemoryVT = i8;
397+
}
398+
399+
def atomic_load_zext_16_glue : PatFrag<(ops node:$ptr),
400+
(atomic_load_zext_glue node:$ptr)> {
401+
let IsAtomic = 1;
402+
let MemoryVT = i16;
403+
}
404+
405+
def atomic_load_sext_16_glue : PatFrag<(ops node:$ptr),
406+
(atomic_load_sext_glue node:$ptr)> {
407+
let IsAtomic = 1;
408+
let MemoryVT = i16;
409+
}
410+
375411
def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
376412
let IsLoad = 1;
377413
let IsAnyExtLoad = 1;
@@ -453,6 +489,15 @@ def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
453489
(atomic_load_32_glue node:$ptr)>;
454490
def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
455491
(atomic_load_64_glue node:$ptr)>;
492+
493+
def atomic_load_zext_8_local_m0 : PatFrag<(ops node:$ptr),
494+
(atomic_load_zext_8_glue node:$ptr)>;
495+
def atomic_load_sext_8_local_m0 : PatFrag<(ops node:$ptr),
496+
(atomic_load_sext_8_glue node:$ptr)>;
497+
def atomic_load_zext_16_local_m0 : PatFrag<(ops node:$ptr),
498+
(atomic_load_zext_16_glue node:$ptr)>;
499+
def atomic_load_sext_16_local_m0 : PatFrag<(ops node:$ptr),
500+
(atomic_load_sext_16_glue node:$ptr)>;
456501
} // End let AddressSpaces = LoadAddress_local.AddrSpaces
457502

458503

0 commit comments

Comments
 (0)