Skip to content

Commit de82fde

Browse files
committed
AMDGPU/Uniformity/GlobalISel: G_AMDGPU atomics are always divergent
Patch by: Acim Maravic Differential Revision: https://reviews.llvm.org/D157091
1 parent 1fcc2bc commit de82fde

File tree

4 files changed

+100
-1
lines changed

4 files changed

+100
-1
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8662,7 +8662,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
86628662

86638663
if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
86648664
opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
8665-
opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
8665+
opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
8666+
AMDGPU::isGenericAtomic(opcode)) {
86668667
return InstructionUniformity::NeverUniform;
86678668
}
86688669
return InstructionUniformity::Default;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,28 @@ bool isPermlane16(unsigned Opc) {
512512
Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
513513
}
514514

515+
bool isGenericAtomic(unsigned Opc) {
516+
return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
517+
Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
518+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
519+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
520+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
521+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
522+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
523+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
524+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
525+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
526+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
527+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
528+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
529+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
530+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
531+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
532+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
533+
Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
534+
Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
535+
}
536+
515537
bool isTrue16Inst(unsigned Opc) {
516538
const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
517539
return Info ? Info->IsTrue16 : false;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,9 @@ bool isMAC(unsigned Opc);
547547
LLVM_READNONE
548548
bool isPermlane16(unsigned Opc);
549549

550+
LLVM_READNONE
551+
bool isGenericAtomic(unsigned Opc);
552+
550553
namespace VOPD {
551554

552555
enum Component : unsigned {

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,76 @@ body: |
8585
SI_RETURN implicit $vgpr0
8686
8787
...
88+
89+
---
90+
name: test_buffer_atomics_always_divergent
91+
tracksRegLiveness: true
92+
body: |
93+
bb.1:
94+
liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
95+
96+
%0:_(s32) = COPY $sgpr0
97+
%1:sgpr(p0) = COPY $sgpr2_sgpr3
98+
%2:_(s32) = IMPLICIT_DEF
99+
%3:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
100+
%4:_(s32) = G_CONSTANT i32 0
101+
102+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_ATOMIC_FMIN
103+
%5:_(s32) = G_AMDGPU_ATOMIC_FMIN %0, %3
104+
105+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_ATOMIC_FMAX
106+
%6:_(s32) = G_AMDGPU_ATOMIC_FMAX %0, %3
107+
108+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SWAP
109+
%7:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SWAP %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
110+
111+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_ADD
112+
%8:_(s32) = G_AMDGPU_BUFFER_ATOMIC_ADD %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
113+
114+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SUB
115+
%9:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SUB %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
116+
117+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMIN
118+
%10:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
119+
120+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMIN
121+
%11:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
122+
123+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMAX
124+
%12:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
125+
126+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMAX
127+
%13:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
128+
129+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_AND
130+
%14:_(s32) = G_AMDGPU_BUFFER_ATOMIC_AND %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
131+
132+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_OR
133+
%15:_(s32) = G_AMDGPU_BUFFER_ATOMIC_OR %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
134+
135+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_XOR
136+
%16:_(s32) = G_AMDGPU_BUFFER_ATOMIC_XOR %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
137+
138+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_INC
139+
%17:_(s32) = G_AMDGPU_BUFFER_ATOMIC_INC %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
140+
141+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_DEC
142+
%18:_(s32) = G_AMDGPU_BUFFER_ATOMIC_DEC %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
143+
144+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD
145+
%19:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
146+
147+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMIN
148+
%20:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
149+
150+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMAX
151+
%21:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
152+
153+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_CMPSWAP
154+
%22:_(s32) = G_AMDGPU_BUFFER_ATOMIC_CMPSWAP %0, %4, %3, %2, %2, %2, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
155+
156+
; CHECK: DIVERGENT
157+
; CHECK-SAME: G_AMDGPU_ATOMIC_CMPXCHG
158+
%23:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %1, %4 :: (load store seq_cst (s32), addrspace 0)
159+
160+
...

0 commit comments

Comments
 (0)