Skip to content

Commit 6181458

Browse files
committed
[AMDGPU] gfx940 MUBUF format changes
Differential Revision: https://reviews.llvm.org/D121234
1 parent 530bc61 commit 6181458

File tree

5 files changed

+138
-2
lines changed

5 files changed

+138
-2
lines changed

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1175,15 +1175,28 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
11751175

11761176
let SubtargetPredicate = isGFX90APlus in {
11771177
def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> {
1178+
let has_glc = 1;
1179+
let has_sccb = 1;
1180+
let InOperandList = (ins CPol_0:$cpol);
1181+
let AsmOperands = "$cpol";
11781182
}
11791183
def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> {
1184+
let SubtargetPredicate = isGFX90AOnly;
11801185
}
11811186

11821187
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>;
11831188
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>;
11841189
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
11851190
} // End SubtargetPredicate = isGFX90APlus
11861191

1192+
def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
1193+
let SubtargetPredicate = isGFX940Plus;
1194+
let has_glc = 1;
1195+
let has_sccb = 1;
1196+
let InOperandList = (ins CPol_0:$cpol);
1197+
let AsmOperands = "$cpol";
1198+
}
1199+
11871200
let SubtargetPredicate = isGFX10Plus in {
11881201
def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
11891202
def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
@@ -2366,9 +2379,28 @@ class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps,
23662379
let Inst{55} = acc;
23672380
}
23682381

2382+
class MUBUF_Real_gfx940 <bits<7> op, MUBUF_Pseudo ps> :
2383+
MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX940> {
2384+
let AssemblerPredicate = isGFX940Plus;
2385+
let DecoderNamespace = "GFX9";
2386+
let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands);
2387+
2388+
let Inst{55} = acc;
2389+
}
2390+
23692391
multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
23702392
def _vi : MUBUF_Real_vi<op, ps>;
2371-
def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>;
2393+
2394+
foreach _ = BoolToList<!not(ps.FPAtomic)>.ret in
2395+
def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
2396+
2397+
foreach _ = BoolToList<ps.FPAtomic>.ret in {
2398+
def _gfx90a : MUBUF_Real_gfx90a<op, ps, 0> {
2399+
let SubtargetPredicate = isGFX90AOnly;
2400+
let AssemblerPredicate = isGFX90AOnly;
2401+
}
2402+
def _gfx940 : MUBUF_Real_gfx940<op, ps>;
2403+
}
23722404
}
23732405

23742406
multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
@@ -2558,9 +2590,17 @@ let SubtargetPredicate = isGFX90APlus in {
25582590
} // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus
25592591

25602592
def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> {
2593+
let AsmString = BUFFER_WBL2.Mnemonic; // drop flags
2594+
let AssemblerPredicate = isGFX90AOnly;
2595+
let SubtargetPredicate = isGFX90AOnly;
25612596
}
25622597
def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>;
25632598

2599+
let SubtargetPredicate = isGFX940Plus in {
2600+
def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>;
2601+
def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>;
2602+
}
2603+
25642604
class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
25652605
MTBUF_Real<ps>,
25662606
Enc64,

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1360,7 +1360,9 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
13601360
// to initiate writeback of any dirty cache lines of earlier writes by the
13611361
// same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
13621362
// writeback has completed.
1363-
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2));
1363+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
1364+
// Set SC bits to indicate system scope.
1365+
.addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
13641366
// Followed by same as GFX7, which will ensure the necessary "S_WAITCNT
13651367
// vmcnt(0)" needed by the "BUFFER_WBL2".
13661368
Changed = true;

llvm/test/MC/AMDGPU/gfx940_asm_features.s

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,33 @@ v_mov_b64 v[2:3], 1
149149
// GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00]
150150
v_mov_b64 v[2:3], 0x64
151151

152+
// GFX90A: error: invalid operand for instruction
153+
// GFX10: error: instruction not supported on this GPU
154+
// GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00]
155+
buffer_wbl2 sc1
156+
157+
// GFX90A: error: invalid operand for instruction
158+
// GFX10: error: instruction not supported on this GPU
159+
// GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00]
160+
buffer_wbl2 sc0
161+
162+
// GFX90A: error: invalid operand for instruction
163+
// GFX10: error: instruction not supported on this GPU
164+
// GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00]
165+
buffer_wbl2 sc0 sc1
166+
167+
// NOT-GFX940: error: instruction not supported on this GPU
168+
// GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00]
169+
buffer_inv sc0
170+
171+
// NOT-GFX940: error: instruction not supported on this GPU
172+
// GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00]
173+
buffer_inv sc1
174+
175+
// NOT-GFX940: error: instruction not supported on this GPU
176+
// GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00]
177+
buffer_inv sc0 sc1
178+
152179
// NOT-GFX940: error: invalid operand for instruction
153180
// GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03]
154181
buffer_atomic_swap v5, off, s[8:11], s3 sc0
@@ -224,3 +251,28 @@ global_atomic_min_f64 v[0:1], v[2:3], off sc1
224251
// GFX10: error: instruction not supported on this GPU
225252
// GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00]
226253
global_atomic_max_f64 v[0:1], v[2:3], off sc1
254+
255+
// GFX90A: error: invalid operand for instruction
256+
// GFX10: error: instruction not supported on this GPU
257+
// GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03]
258+
buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1
259+
260+
// GFX90A: error: invalid operand for instruction
261+
// GFX10: error: instruction not supported on this GPU
262+
// GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03]
263+
buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1
264+
265+
// GFX90A: error: invalid operand for instruction
266+
// GFX10: error: instruction not supported on this GPU
267+
// GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03]
268+
buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1
269+
270+
// GFX90A: error: invalid operand for instruction
271+
// GFX10: error: instruction not supported on this GPU
272+
// GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03]
273+
buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1
274+
275+
// GFX90A: error: invalid operand for instruction
276+
// GFX10: error: instruction not supported on this GPU
277+
// GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
278+
buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1

llvm/test/MC/AMDGPU/gfx940_err.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ v_mov_b64 v[2:3], v[4:5] dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWOR
3131
v_mov_b64_sdwa v[2:3], v[4:5]
3232
// GFX940: error: sdwa variant of this instruction is not supported
3333

34+
buffer_invl2
35+
// GFX940: error: instruction not supported on this GPU
36+
3437
global_load_dword v2, v[2:3], off glc
3538
// GFX940: error: invalid operand for instruction
3639

@@ -48,3 +51,9 @@ buffer_atomic_swap v5, off, s[8:11], s3 glc
4851

4952
buffer_atomic_swap v5, off, s[8:11], s3 slc
5053
// GFX940: error: invalid operand for instruction
54+
55+
buffer_wbl2 glc
56+
// GFX940: error: invalid operand for instruction
57+
58+
buffer_wbl2 scc
59+
// GFX940: error: invalid operand for instruction

llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,24 @@
102102
# GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00]
103103
0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00
104104

105+
# GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00]
106+
0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00
107+
108+
# GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00]
109+
0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00
110+
111+
# GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00]
112+
0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00
113+
114+
# GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00]
115+
0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00
116+
117+
# GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00]
118+
0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00
119+
120+
# GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00]
121+
0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00
122+
105123
# GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03]
106124
0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03
107125

@@ -149,3 +167,18 @@
149167

150168
# GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00]
151169
0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00
170+
171+
# GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03]
172+
0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03
173+
174+
# GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03]
175+
0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03
176+
177+
# GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03]
178+
0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03
179+
180+
# GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03]
181+
0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03
182+
183+
# GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
184+
0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03

0 commit comments

Comments
 (0)