Skip to content

Commit 6959adc

Browse files
perlfuyuxuanchen1997
authored andcommitted
[AMDGPU] Update hasUnwantedEffectsWhenEXECEmpty (#97982)
Summary: Add barriers and s_wait_event to hasUnwantedEffectsWhenEXECEmpty. Add a comment documenting the current expected use of the function. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250953
1 parent ebb302e commit 6959adc

File tree

4 files changed

+361
-4
lines changed

4 files changed

+361
-4
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4131,14 +4131,17 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
41314131
// EXEC = 0, but checking for that case here seems not worth it
41324132
// given the typical code patterns.
41334133
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4134-
isEXP(Opcode) ||
4135-
Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
4136-
Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
4134+
isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4135+
Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
41374136
return true;
41384137

41394138
if (MI.isCall() || MI.isInlineAsm())
41404139
return true; // conservative assumption
41414140

4141+
// Assume that barrier interactions are only intended with active lanes.
4142+
if (isBarrier(Opcode))
4143+
return true;
4144+
41424145
// A mode change is a scalar operation that influences vector instructions.
41434146
if (modifiesModeRegister(MI))
41444147
return true;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,16 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
936936
Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
937937
}
938938

939+
bool isBarrier(unsigned Opcode) const {
940+
return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
941+
Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
942+
Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
943+
Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
944+
Opcode == AMDGPU::S_BARRIER_LEAVE ||
945+
Opcode == AMDGPU::DS_GWS_INIT ||
946+
Opcode == AMDGPU::DS_GWS_BARRIER;
947+
}
948+
939949
static bool doesNotReadTiedSource(const MachineInstr &MI) {
940950
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
941951
}
@@ -1009,7 +1019,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
10091019
/// Return true if the instruction modifies the mode register.q
10101020
static bool modifiesModeRegister(const MachineInstr &MI);
10111021

1012-
/// Whether we must prevent this instruction from executing with EXEC = 0.
1022+
/// This function is used to determine if an instruction can be safely
1023+
/// executed under EXEC = 0 without hardware error, indeterminate results,
1024+
/// and/or visible effects on future vector execution or outside the shader.
1025+
/// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1026+
/// used in removing branches over short EXEC = 0 sequences.
1027+
/// As such it embeds certain assumptions which may not apply to every case
1028+
/// of EXEC = 0 execution.
10131029
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
10141030

10151031
/// Returns true if the instruction could potentially depend on the value of

llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,33 @@ body: |
184184
bb.2:
185185
S_ENDPGM 0
186186
...
187+
188+
---
189+
name: skip_barrier
190+
body: |
191+
; CHECK-LABEL: name: skip_barrier
192+
; CHECK: bb.0:
193+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
194+
; CHECK-NEXT: {{ $}}
195+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
196+
; CHECK-NEXT: {{ $}}
197+
; CHECK-NEXT: bb.1:
198+
; CHECK-NEXT: successors: %bb.2(0x80000000)
199+
; CHECK-NEXT: {{ $}}
200+
; CHECK-NEXT: V_NOP_e32 implicit $exec
201+
; CHECK-NEXT: S_BARRIER
202+
; CHECK-NEXT: {{ $}}
203+
; CHECK-NEXT: bb.2:
204+
; CHECK-NEXT: S_ENDPGM 0
205+
bb.0:
206+
successors: %bb.1, %bb.2
207+
S_CBRANCH_EXECZ %bb.2, implicit $exec
208+
209+
bb.1:
210+
successors: %bb.2
211+
V_NOP_e32 implicit $exec
212+
S_BARRIER
213+
214+
bb.2:
215+
S_ENDPGM 0
216+
...

llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,311 @@ body: |
300300
bb.2:
301301
S_ENDPGM 0
302302
...
303+
304+
---
305+
name: skip_wait_event
306+
body: |
307+
; CHECK-LABEL: name: skip_wait_event
308+
; CHECK: bb.0:
309+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
310+
; CHECK-NEXT: {{ $}}
311+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
312+
; CHECK-NEXT: {{ $}}
313+
; CHECK-NEXT: bb.1:
314+
; CHECK-NEXT: successors: %bb.2(0x80000000)
315+
; CHECK-NEXT: {{ $}}
316+
; CHECK-NEXT: V_NOP_e32 implicit $exec
317+
; CHECK-NEXT: S_WAIT_EVENT 0
318+
; CHECK-NEXT: {{ $}}
319+
; CHECK-NEXT: bb.2:
320+
; CHECK-NEXT: S_ENDPGM 0
321+
bb.0:
322+
successors: %bb.1, %bb.2
323+
S_CBRANCH_EXECZ %bb.2, implicit $exec
324+
325+
bb.1:
326+
successors: %bb.2
327+
V_NOP_e32 implicit $exec
328+
S_WAIT_EVENT 0
329+
330+
bb.2:
331+
S_ENDPGM 0
332+
...
333+
334+
---
335+
name: skip_barrier_signal_imm
336+
body: |
337+
; CHECK-LABEL: name: skip_barrier_signal_imm
338+
; CHECK: bb.0:
339+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
340+
; CHECK-NEXT: {{ $}}
341+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
342+
; CHECK-NEXT: {{ $}}
343+
; CHECK-NEXT: bb.1:
344+
; CHECK-NEXT: successors: %bb.2(0x80000000)
345+
; CHECK-NEXT: {{ $}}
346+
; CHECK-NEXT: V_NOP_e32 implicit $exec
347+
; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
348+
; CHECK-NEXT: {{ $}}
349+
; CHECK-NEXT: bb.2:
350+
; CHECK-NEXT: S_ENDPGM 0
351+
bb.0:
352+
successors: %bb.1, %bb.2
353+
S_CBRANCH_EXECZ %bb.2, implicit $exec
354+
355+
bb.1:
356+
successors: %bb.2
357+
V_NOP_e32 implicit $exec
358+
S_BARRIER_SIGNAL_IMM -1
359+
360+
bb.2:
361+
S_ENDPGM 0
362+
...
363+
364+
---
365+
name: skip_barrier_signal_isfirst_imm
366+
body: |
367+
; CHECK-LABEL: name: skip_barrier_signal_isfirst_imm
368+
; CHECK: bb.0:
369+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
370+
; CHECK-NEXT: {{ $}}
371+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
372+
; CHECK-NEXT: {{ $}}
373+
; CHECK-NEXT: bb.1:
374+
; CHECK-NEXT: successors: %bb.2(0x80000000)
375+
; CHECK-NEXT: {{ $}}
376+
; CHECK-NEXT: V_NOP_e32 implicit $exec
377+
; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
378+
; CHECK-NEXT: {{ $}}
379+
; CHECK-NEXT: bb.2:
380+
; CHECK-NEXT: S_ENDPGM 0
381+
bb.0:
382+
successors: %bb.1, %bb.2
383+
S_CBRANCH_EXECZ %bb.2, implicit $exec
384+
385+
bb.1:
386+
successors: %bb.2
387+
V_NOP_e32 implicit $exec
388+
S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
389+
390+
bb.2:
391+
S_ENDPGM 0
392+
...
393+
394+
---
395+
name: skip_barrier_signal_m0
396+
body: |
397+
; CHECK-LABEL: name: skip_barrier_signal_m0
398+
; CHECK: bb.0:
399+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
400+
; CHECK-NEXT: {{ $}}
401+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
402+
; CHECK-NEXT: {{ $}}
403+
; CHECK-NEXT: bb.1:
404+
; CHECK-NEXT: successors: %bb.2(0x80000000)
405+
; CHECK-NEXT: {{ $}}
406+
; CHECK-NEXT: V_NOP_e32 implicit $exec
407+
; CHECK-NEXT: $m0 = S_MOV_B32 -1
408+
; CHECK-NEXT: S_BARRIER_SIGNAL_M0 implicit $m0
409+
; CHECK-NEXT: {{ $}}
410+
; CHECK-NEXT: bb.2:
411+
; CHECK-NEXT: S_ENDPGM 0
412+
bb.0:
413+
successors: %bb.1, %bb.2
414+
S_CBRANCH_EXECZ %bb.2, implicit $exec
415+
416+
bb.1:
417+
successors: %bb.2
418+
V_NOP_e32 implicit $exec
419+
$m0 = S_MOV_B32 -1
420+
S_BARRIER_SIGNAL_M0 implicit $m0
421+
422+
bb.2:
423+
S_ENDPGM 0
424+
...
425+
426+
---
427+
name: skip_barrier_signal_isfirst_m0
428+
body: |
429+
; CHECK-LABEL: name: skip_barrier_signal_isfirst_m0
430+
; CHECK: bb.0:
431+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
432+
; CHECK-NEXT: {{ $}}
433+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
434+
; CHECK-NEXT: {{ $}}
435+
; CHECK-NEXT: bb.1:
436+
; CHECK-NEXT: successors: %bb.2(0x80000000)
437+
; CHECK-NEXT: {{ $}}
438+
; CHECK-NEXT: V_NOP_e32 implicit $exec
439+
; CHECK-NEXT: $m0 = S_MOV_B32 -1
440+
; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
441+
; CHECK-NEXT: {{ $}}
442+
; CHECK-NEXT: bb.2:
443+
; CHECK-NEXT: S_ENDPGM 0
444+
bb.0:
445+
successors: %bb.1, %bb.2
446+
S_CBRANCH_EXECZ %bb.2, implicit $exec
447+
448+
bb.1:
449+
successors: %bb.2
450+
V_NOP_e32 implicit $exec
451+
$m0 = S_MOV_B32 -1
452+
S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
453+
454+
bb.2:
455+
S_ENDPGM 0
456+
...
457+
458+
---
459+
name: skip_barrier_wait
460+
body: |
461+
; CHECK-LABEL: name: skip_barrier_wait
462+
; CHECK: bb.0:
463+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
464+
; CHECK-NEXT: {{ $}}
465+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
466+
; CHECK-NEXT: {{ $}}
467+
; CHECK-NEXT: bb.1:
468+
; CHECK-NEXT: successors: %bb.2(0x80000000)
469+
; CHECK-NEXT: {{ $}}
470+
; CHECK-NEXT: V_NOP_e32 implicit $exec
471+
; CHECK-NEXT: S_BARRIER_WAIT -1
472+
; CHECK-NEXT: {{ $}}
473+
; CHECK-NEXT: bb.2:
474+
; CHECK-NEXT: S_ENDPGM 0
475+
bb.0:
476+
successors: %bb.1, %bb.2
477+
S_CBRANCH_EXECZ %bb.2, implicit $exec
478+
479+
bb.1:
480+
successors: %bb.2
481+
V_NOP_e32 implicit $exec
482+
S_BARRIER_WAIT -1
483+
484+
bb.2:
485+
S_ENDPGM 0
486+
...
487+
488+
---
489+
name: skip_barrier_init_imm
490+
body: |
491+
; CHECK-LABEL: name: skip_barrier_init_imm
492+
; CHECK: bb.0:
493+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
494+
; CHECK-NEXT: {{ $}}
495+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
496+
; CHECK-NEXT: {{ $}}
497+
; CHECK-NEXT: bb.1:
498+
; CHECK-NEXT: successors: %bb.2(0x80000000)
499+
; CHECK-NEXT: {{ $}}
500+
; CHECK-NEXT: V_NOP_e32 implicit $exec
501+
; CHECK-NEXT: $m0 = S_MOV_B32 -1
502+
; CHECK-NEXT: S_BARRIER_INIT_IMM -1, implicit $m0
503+
; CHECK-NEXT: {{ $}}
504+
; CHECK-NEXT: bb.2:
505+
; CHECK-NEXT: S_ENDPGM 0
506+
bb.0:
507+
successors: %bb.1, %bb.2
508+
S_CBRANCH_EXECZ %bb.2, implicit $exec
509+
510+
bb.1:
511+
successors: %bb.2
512+
V_NOP_e32 implicit $exec
513+
$m0 = S_MOV_B32 -1
514+
S_BARRIER_INIT_IMM -1, implicit $m0
515+
516+
bb.2:
517+
S_ENDPGM 0
518+
...
519+
520+
---
521+
name: skip_barrier_init_m0
522+
body: |
523+
; CHECK-LABEL: name: skip_barrier_init_m0
524+
; CHECK: bb.0:
525+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
526+
; CHECK-NEXT: {{ $}}
527+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
528+
; CHECK-NEXT: {{ $}}
529+
; CHECK-NEXT: bb.1:
530+
; CHECK-NEXT: successors: %bb.2(0x80000000)
531+
; CHECK-NEXT: {{ $}}
532+
; CHECK-NEXT: V_NOP_e32 implicit $exec
533+
; CHECK-NEXT: $m0 = S_MOV_B32 -1
534+
; CHECK-NEXT: S_BARRIER_INIT_M0 implicit $m0
535+
; CHECK-NEXT: {{ $}}
536+
; CHECK-NEXT: bb.2:
537+
; CHECK-NEXT: S_ENDPGM 0
538+
bb.0:
539+
successors: %bb.1, %bb.2
540+
S_CBRANCH_EXECZ %bb.2, implicit $exec
541+
542+
bb.1:
543+
successors: %bb.2
544+
V_NOP_e32 implicit $exec
545+
$m0 = S_MOV_B32 -1
546+
S_BARRIER_INIT_M0 implicit $m0
547+
548+
bb.2:
549+
S_ENDPGM 0
550+
...
551+
552+
---
553+
name: skip_barrier_join_imm
554+
body: |
555+
; CHECK-LABEL: name: skip_barrier_join_imm
556+
; CHECK: bb.0:
557+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
558+
; CHECK-NEXT: {{ $}}
559+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
560+
; CHECK-NEXT: {{ $}}
561+
; CHECK-NEXT: bb.1:
562+
; CHECK-NEXT: successors: %bb.2(0x80000000)
563+
; CHECK-NEXT: {{ $}}
564+
; CHECK-NEXT: V_NOP_e32 implicit $exec
565+
; CHECK-NEXT: S_BARRIER_JOIN_IMM -1
566+
; CHECK-NEXT: {{ $}}
567+
; CHECK-NEXT: bb.2:
568+
; CHECK-NEXT: S_ENDPGM 0
569+
bb.0:
570+
successors: %bb.1, %bb.2
571+
S_CBRANCH_EXECZ %bb.2, implicit $exec
572+
573+
bb.1:
574+
successors: %bb.2
575+
V_NOP_e32 implicit $exec
576+
S_BARRIER_JOIN_IMM -1
577+
578+
bb.2:
579+
S_ENDPGM 0
580+
...
581+
582+
---
583+
name: skip_barrier_leave
584+
body: |
585+
; CHECK-LABEL: name: skip_barrier_leave
586+
; CHECK: bb.0:
587+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
588+
; CHECK-NEXT: {{ $}}
589+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
590+
; CHECK-NEXT: {{ $}}
591+
; CHECK-NEXT: bb.1:
592+
; CHECK-NEXT: successors: %bb.2(0x80000000)
593+
; CHECK-NEXT: {{ $}}
594+
; CHECK-NEXT: V_NOP_e32 implicit $exec
595+
; CHECK-NEXT: S_BARRIER_LEAVE implicit-def $scc
596+
; CHECK-NEXT: {{ $}}
597+
; CHECK-NEXT: bb.2:
598+
; CHECK-NEXT: S_ENDPGM 0
599+
bb.0:
600+
successors: %bb.1, %bb.2
601+
S_CBRANCH_EXECZ %bb.2, implicit $exec
602+
603+
bb.1:
604+
successors: %bb.2
605+
V_NOP_e32 implicit $exec
606+
S_BARRIER_LEAVE implicit-def $scc
607+
608+
bb.2:
609+
S_ENDPGM 0
610+
...

0 commit comments

Comments
 (0)