Skip to content

Commit 7631af3

Browse files
committed
[AMDGPU] Skip generating cache invalidating instructions on AMDPAL
Summary: Frontend guarantees that coherent accesses have corresponding cache policy bits set (glc, dlc). Therefore there is no need for extra instructions that invalidate cache. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78800
1 parent 7aaff8f commit 7631af3

File tree

2 files changed

+70
-23
lines changed

2 files changed

+70
-23
lines changed

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,9 @@ class SICacheControl {
254254

255255
IsaVersion IV;
256256

257+
/// Whether to insert cache invalidation instructions.
258+
bool InsertCacheInv;
259+
257260
SICacheControl(const GCNSubtarget &ST);
258261

259262
public:
@@ -650,6 +653,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
650653
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
651654
TII = ST.getInstrInfo();
652655
IV = getIsaVersion(ST.getCPU());
656+
InsertCacheInv = !ST.isAmdPalOS();
653657
}
654658

655659
/* static */
@@ -714,6 +718,9 @@ bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
714718
SIAtomicScope Scope,
715719
SIAtomicAddrSpace AddrSpace,
716720
Position Pos) const {
721+
if (!InsertCacheInv)
722+
return false;
723+
717724
bool Changed = false;
718725

719726
MachineBasicBlock &MBB = *MI->getParent();
@@ -852,6 +859,9 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
852859
SIAtomicScope Scope,
853860
SIAtomicAddrSpace AddrSpace,
854861
Position Pos) const {
862+
if (!InsertCacheInv)
863+
return false;
864+
855865
bool Changed = false;
856866

857867
MachineBasicBlock &MBB = *MI->getParent();
@@ -954,6 +964,9 @@ bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
954964
SIAtomicScope Scope,
955965
SIAtomicAddrSpace AddrSpace,
956966
Position Pos) const {
967+
if (!InsertCacheInv)
968+
return false;
969+
957970
bool Changed = false;
958971

959972
MachineBasicBlock &MBB = *MI->getParent();

llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
2-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
3-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
4-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
3+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
4+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
5+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10 %s
56

67
; FUNC-LABEL: {{^}}system_acquire:
78
; GCN: %bb.0
89
; GCN-NOT: ATOMIC_FENCE
910
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
10-
; GCN-NEXT: buffer_wbinvl1{{$}}
11+
; GCN10: s_waitcnt_vscnt null, 0x0
12+
; GCN-NOT: buffer_wbinvl1{{$}}
1113
; GCN: s_endpgm
1214
define amdgpu_kernel void @system_acquire() {
1315
entry:
@@ -19,6 +21,7 @@ entry:
1921
; GCN: %bb.0
2022
; GCN-NOT: ATOMIC_FENCE
2123
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
24+
; GCN10: s_waitcnt_vscnt null, 0x0
2225
; GCN: s_endpgm
2326
define amdgpu_kernel void @system_release() {
2427
entry:
@@ -30,7 +33,8 @@ entry:
3033
; GCN: %bb.0
3134
; GCN-NOT: ATOMIC_FENCE
3235
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
33-
; GCN: buffer_wbinvl1{{$}}
36+
; GCN10: s_waitcnt_vscnt null, 0x0
37+
; GCN-NOT: buffer_wbinvl1{{$}}
3438
; GCN: s_endpgm
3539
define amdgpu_kernel void @system_acq_rel() {
3640
entry:
@@ -42,7 +46,8 @@ entry:
4246
; GCN: %bb.0
4347
; GCN-NOT: ATOMIC_FENCE
4448
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
45-
; GCN: buffer_wbinvl1{{$}}
49+
; GCN10: s_waitcnt_vscnt null, 0x0
50+
; GCN-NOT: buffer_wbinvl1{{$}}
4651
; GCN: s_endpgm
4752
define amdgpu_kernel void @system_seq_cst() {
4853
entry:
@@ -54,7 +59,8 @@ entry:
5459
; GCN: %bb.0
5560
; GCN-NOT: ATOMIC_FENCE
5661
; GCN: s_waitcnt vmcnt(0){{$}}
57-
; GCN-NEXT: buffer_wbinvl1{{$}}
62+
; GCN10: s_waitcnt_vscnt null, 0x0
63+
; GCN-NOT: buffer_wbinvl1{{$}}
5864
; GCN: s_endpgm
5965
define amdgpu_kernel void @system_one_as_acquire() {
6066
entry:
@@ -66,6 +72,7 @@ entry:
6672
; GCN: %bb.0
6773
; GCN-NOT: ATOMIC_FENCE
6874
; GCN: s_waitcnt vmcnt(0){{$}}
75+
; GCN10: s_waitcnt_vscnt null, 0x0
6976
; GCN: s_endpgm
7077
define amdgpu_kernel void @system_one_as_release() {
7178
entry:
@@ -77,7 +84,8 @@ entry:
7784
; GCN: %bb.0
7885
; GCN-NOT: ATOMIC_FENCE
7986
; GCN: s_waitcnt vmcnt(0){{$}}
80-
; GCN: buffer_wbinvl1{{$}}
87+
; GCN10: s_waitcnt_vscnt null, 0x0
88+
; GCN-NOT: buffer_wbinvl1{{$}}
8189
; GCN: s_endpgm
8290
define amdgpu_kernel void @system_one_as_acq_rel() {
8391
entry:
@@ -89,7 +97,8 @@ entry:
8997
; GCN: %bb.0
9098
; GCN-NOT: ATOMIC_FENCE
9199
; GCN: s_waitcnt vmcnt(0){{$}}
92-
; GCN: buffer_wbinvl1{{$}}
100+
; GCN10: s_waitcnt_vscnt null, 0x0
101+
; GCN-NOT: buffer_wbinvl1{{$}}
93102
; GCN: s_endpgm
94103
define amdgpu_kernel void @system_one_as_seq_cst() {
95104
entry:
@@ -181,7 +190,8 @@ entry:
181190
; GCN: %bb.0
182191
; GCN-NOT: ATOMIC_FENCE
183192
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
184-
; GCN-NEXT: buffer_wbinvl1{{$}}
193+
; GCN10: s_waitcnt_vscnt null, 0x0
194+
; GCN-NOT: buffer_wbinvl1{{$}}
185195
; GCN: s_endpgm
186196
define amdgpu_kernel void @agent_acquire() {
187197
entry:
@@ -193,6 +203,7 @@ entry:
193203
; GCN: %bb.0
194204
; GCN-NOT: ATOMIC_FENCE
195205
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
206+
; GCN10: s_waitcnt_vscnt null, 0x0
196207
; GCN: s_endpgm
197208
define amdgpu_kernel void @agent_release() {
198209
entry:
@@ -204,7 +215,8 @@ entry:
204215
; GCN: %bb.0
205216
; GCN-NOT: ATOMIC_FENCE
206217
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
207-
; GCN: buffer_wbinvl1{{$}}
218+
; GCN10: s_waitcnt_vscnt null, 0x0
219+
; GCN-NOT: buffer_wbinvl1{{$}}
208220
; GCN: s_endpgm
209221
define amdgpu_kernel void @agent_acq_rel() {
210222
entry:
@@ -216,7 +228,8 @@ entry:
216228
; GCN: %bb.0
217229
; GCN-NOT: ATOMIC_FENCE
218230
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
219-
; GCN: buffer_wbinvl1{{$}}
231+
; GCN10: s_waitcnt_vscnt null, 0x0
232+
; GCN-NOT: buffer_wbinvl1{{$}}
220233
; GCN: s_endpgm
221234
define amdgpu_kernel void @agent_seq_cst() {
222235
entry:
@@ -228,7 +241,8 @@ entry:
228241
; GCN: %bb.0
229242
; GCN-NOT: ATOMIC_FENCE
230243
; GCN: s_waitcnt vmcnt(0){{$}}
231-
; GCN-NEXT: buffer_wbinvl1{{$}}
244+
; GCN10: s_waitcnt_vscnt null, 0x0
245+
; GCN-NOT: buffer_wbinvl1{{$}}
232246
; GCN: s_endpgm
233247
define amdgpu_kernel void @agent_one_as_acquire() {
234248
entry:
@@ -240,6 +254,7 @@ entry:
240254
; GCN: %bb.0
241255
; GCN-NOT: ATOMIC_FENCE
242256
; GCN: s_waitcnt vmcnt(0){{$}}
257+
; GCN10: s_waitcnt_vscnt null, 0x0
243258
; GCN: s_endpgm
244259
define amdgpu_kernel void @agent_one_as_release() {
245260
entry:
@@ -251,7 +266,8 @@ entry:
251266
; GCN: %bb.0
252267
; GCN-NOT: ATOMIC_FENCE
253268
; GCN: s_waitcnt vmcnt(0){{$}}
254-
; GCN: buffer_wbinvl1{{$}}
269+
; GCN10: s_waitcnt_vscnt null, 0x0
270+
; GCN-NOT: buffer_wbinvl1{{$}}
255271
; GCN: s_endpgm
256272
define amdgpu_kernel void @agent_one_as_acq_rel() {
257273
entry:
@@ -263,7 +279,8 @@ entry:
263279
; GCN: %bb.0
264280
; GCN-NOT: ATOMIC_FENCE
265281
; GCN: s_waitcnt vmcnt(0){{$}}
266-
; GCN: buffer_wbinvl1{{$}}
282+
; GCN10: s_waitcnt_vscnt null, 0x0
283+
; GCN-NOT: buffer_wbinvl1{{$}}
267284
; GCN: s_endpgm
268285
define amdgpu_kernel void @agent_one_as_seq_cst() {
269286
entry:
@@ -273,7 +290,9 @@ entry:
273290

274291
; FUNC-LABEL: {{^}}workgroup_acquire:
275292
; GCN: %bb.0
276-
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
293+
; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
294+
; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
295+
; GCN10: s_waitcnt_vscnt null, 0x0
277296
; GCN-NOT: ATOMIC_FENCE
278297
; GCN: s_endpgm
279298
define amdgpu_kernel void @workgroup_acquire() {
@@ -284,7 +303,9 @@ entry:
284303

285304
; FUNC-LABEL: {{^}}workgroup_release:
286305
; GCN: %bb.0
287-
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
306+
; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
307+
; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
308+
; GCN10: s_waitcnt_vscnt null, 0x0
288309
; GCN-NOT: ATOMIC_FENCE
289310
; GCN: s_endpgm
290311
define amdgpu_kernel void @workgroup_release() {
@@ -295,7 +316,9 @@ entry:
295316

296317
; FUNC-LABEL: {{^}}workgroup_acq_rel:
297318
; GCN: %bb.0
298-
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
319+
; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
320+
; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
321+
; GCN10: s_waitcnt_vscnt null, 0x0
299322
; GCN-NOT: ATOMIC_FENCE
300323
; GCN: s_endpgm
301324
define amdgpu_kernel void @workgroup_acq_rel() {
@@ -307,6 +330,9 @@ entry:
307330
; FUNC-LABEL: {{^}}workgroup_seq_cst:
308331
; GCN: %bb.0
309332
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
333+
; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
334+
; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
335+
; GCN10: s_waitcnt_vscnt null, 0x0
310336
; GCN-NOT: ATOMIC_FENCE
311337
; GCN: s_endpgm
312338
define amdgpu_kernel void @workgroup_seq_cst() {
@@ -317,7 +343,9 @@ entry:
317343

318344
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
319345
; GCN: %bb.0
320-
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
346+
; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
347+
; GCN10: s_waitcnt vmcnt(0)
348+
; GCN10: s_waitcnt_vscnt null, 0x0
321349
; GCN-NOT: ATOMIC_FENCE
322350
; GCN: s_endpgm
323351
define amdgpu_kernel void @workgroup_one_as_acquire() {
@@ -328,7 +356,9 @@ entry:
328356

329357
; FUNC-LABEL: {{^}}workgroup_one_as_release:
330358
; GCN: %bb.0
331-
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
359+
; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
360+
; GCN10: s_waitcnt vmcnt(0)
361+
; GCN10: s_waitcnt_vscnt null, 0x0
332362
; GCN-NOT: ATOMIC_FENCE
333363
; GCN: s_endpgm
334364
define amdgpu_kernel void @workgroup_one_as_release() {
@@ -339,7 +369,9 @@ entry:
339369

340370
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
341371
; GCN: %bb.0
342-
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
372+
; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
373+
; GCN10: s_waitcnt vmcnt(0)
374+
; GCN10: s_waitcnt_vscnt null, 0x0
343375
; GCN-NOT: ATOMIC_FENCE
344376
; GCN: s_endpgm
345377
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
@@ -350,7 +382,9 @@ entry:
350382

351383
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
352384
; GCN: %bb.0
353-
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
385+
; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
386+
; GCN10: s_waitcnt vmcnt(0)
387+
; GCN10: s_waitcnt_vscnt null, 0x0
354388
; GCN-NOT: ATOMIC_FENCE
355389
; GCN: s_endpgm
356390
define amdgpu_kernel void @workgroup_one_as_seq_cst() {

0 commit comments

Comments
 (0)