Skip to content

Commit 961e438

Browse files
committed
[AMDGPU] Support SCC on buffer atomics
Differential Revision: https://reviews.llvm.org/D98731
1 parent 14756b7 commit 961e438

File tree

4 files changed

+35
-10
lines changed

4 files changed

+35
-10
lines changed

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ class MUBUF_Atomic_Pseudo<string opName,
679679
let has_glc = 0;
680680
let has_dlc = 0;
681681
let has_tfe = 0;
682-
let has_sccb = 0;
682+
let has_sccb = 1;
683683
let maybeAtomic = 1;
684684
let AsmMatchConverter = "cvtMubufAtomic";
685685
}
@@ -2259,7 +2259,8 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
22592259
// GFX8, GFX9 (VI).
22602260
//===----------------------------------------------------------------------===//
22612261

2262-
class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
2262+
class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc,
2263+
bit has_sccb = ps.has_sccb> :
22632264
MUBUF_Real<ps>,
22642265
Enc64,
22652266
SIMCInstr<ps.PseudoInstr, Enc>,
@@ -2270,7 +2271,7 @@ class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
22702271
let Inst{12} = ps.offen;
22712272
let Inst{13} = ps.idxen;
22722273
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
2273-
let Inst{15} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
2274+
let Inst{15} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
22742275
let Inst{16} = ps.lds;
22752276
let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
22762277
let Inst{24-18} = op;
@@ -2281,26 +2282,28 @@ class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
22812282
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
22822283
}
22832284

2284-
class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
2285-
MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.VI> {
2285+
class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps, bit has_sccb = ps.has_sccb> :
2286+
MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.VI, has_sccb> {
22862287
let AssemblerPredicate = isGFX8GFX9NotGFX90A;
22872288
let DecoderNamespace = "GFX8";
22882289

22892290
let Inst{55} = !if(ps.has_tfe, tfe, ?);
22902291
}
22912292

2292-
class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps> :
2293-
MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX90A> {
2293+
class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps,
2294+
bit has_sccb = ps.has_sccb> :
2295+
MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX90A, has_sccb> {
22942296
let AssemblerPredicate = isGFX90APlus;
22952297
let DecoderNamespace = "GFX90A";
2296-
let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands);
2298+
let AsmString = ps.Mnemonic # !subst("$sccb", !if(has_sccb, "$sccb",""),
2299+
!subst("$tfe", "", ps.AsmOperands));
22972300

22982301
let Inst{55} = acc;
22992302
}
23002303

23012304
multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
23022305
def _vi : MUBUF_Real_vi<op, ps>;
2303-
def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
2306+
def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>;
23042307
}
23052308

23062309
multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
@@ -2483,7 +2486,7 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;
24832486

24842487
} // End SubtargetPredicate = HasAtomicFaddInsts
24852488

2486-
let SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus in {
2489+
let SubtargetPredicate = isGFX90APlus in {
24872490
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>;
24882491
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_vi<0x50>;
24892492
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_vi<0x51>;

llvm/test/MC/AMDGPU/gfx90a_asm_features.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,3 +1046,7 @@ global_atomic_add_f32 v1, v0, v2, s[0:1] glc ; encoding: [0x00,0x80,0x35,0xdd,0x
10461046
// GFX1010: error: instruction not supported on this GPU
10471047
// GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x39,0xdd,0x00,0x02,0x7f,0x00]
10481048
global_atomic_pk_add_f16 v0, v[0:1], v2, off glc
1049+
1050+
// NOT-GFX90A: error: scc modifier is not supported on this GPU
1051+
// GFX90A: buffer_atomic_add v4, off, s[8:11], s3 scc ; encoding: [0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03]
1052+
buffer_atomic_add v4, off, s[8:11], s3 scc

llvm/test/MC/AMDGPU/gfx90a_err.s

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,21 @@ global_atomic_min_f64 v[0:1], v[2:3], off scc
231231
global_atomic_max_f64 v[0:1], v[2:3], off scc
232232
// GFX90A: error: instruction must not use scc
233233

234+
buffer_atomic_add_f32 v4, off, s[8:11], s3 scc
235+
// GFX90A: error: instruction must not use scc
236+
237+
buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 scc
238+
// GFX90A: error: instruction must not use scc
239+
240+
buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 scc
241+
// GFX90A: error: instruction must not use scc
242+
243+
buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 scc
244+
// GFX90A: error: instruction must not use scc
245+
246+
buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 scc
247+
// GFX90A: error: instruction must not use scc
248+
234249
v_mov_b32_sdwa v1, src_lds_direct dst_sel:DWORD
235250
// GFX90A: error: lds_direct is not supported on this GPU
236251

llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,3 +793,6 @@
793793

794794
# GFX90A: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc ; encoding: [0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00]
795795
0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00
796+
797+
# GFX90A: buffer_atomic_add v4, off, s[8:11], s3 scc ; encoding: [0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03]
798+
0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03

0 commit comments

Comments
 (0)