Skip to content

Commit 444dd9b

Browse files
authored
AMDGPU: Cleanup FP atomicrmw tests and cover fmin/fmax (llvm#95131)
We apparently are missing codegen support for atomicrmw fmin/fmax. Also clean up FP atomicrmw tests to be more consistent and comprehensively test the relevant cases
1 parent 93318a8 commit 444dd9b

18 files changed

+189050
-16126
lines changed

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll

Lines changed: 7394 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll

Lines changed: 8062 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll

Lines changed: 8062 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll

Lines changed: 13854 additions & 4105 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll

Lines changed: 15756 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll

Lines changed: 15756 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll

Lines changed: 15075 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefix=GCN %s
3+
4+
define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(ptr addrspace(1) %ptr) #1 {
5+
; GCN-LABEL: global_atomic_fadd_ret_f32_wrong_subtarget:
6+
; GCN: ; %bb.0:
7+
; GCN-NEXT: s_mov_b64 s[4:5], exec
8+
; GCN-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0
9+
; GCN-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0
10+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
11+
; GCN-NEXT: ; implicit-def: $vgpr1
12+
; GCN-NEXT: s_and_saveexec_b64 s[2:3], vcc
13+
; GCN-NEXT: s_cbranch_execz .LBB0_4
14+
; GCN-NEXT: ; %bb.1:
15+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
16+
; GCN-NEXT: s_bcnt1_i32_b64 s7, s[4:5]
17+
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, s7
18+
; GCN-NEXT: s_mov_b64 s[4:5], 0
19+
; GCN-NEXT: v_mul_f32_e32 v2, 4.0, v1
20+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
21+
; GCN-NEXT: s_load_dword s6, s[0:1], 0x0
22+
; GCN-NEXT: v_mov_b32_e32 v3, 0
23+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
24+
; GCN-NEXT: v_mov_b32_e32 v1, s6
25+
; GCN-NEXT: .LBB0_2: ; %atomicrmw.start
26+
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
27+
; GCN-NEXT: v_mov_b32_e32 v5, v1
28+
; GCN-NEXT: v_add_f32_e32 v4, v5, v2
29+
; GCN-NEXT: global_atomic_cmpswap v1, v3, v[4:5], s[0:1] glc
30+
; GCN-NEXT: s_waitcnt vmcnt(0)
31+
; GCN-NEXT: buffer_wbinvl1
32+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5
33+
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
34+
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
35+
; GCN-NEXT: s_cbranch_execnz .LBB0_2
36+
; GCN-NEXT: ; %bb.3: ; %Flow
37+
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
38+
; GCN-NEXT: .LBB0_4: ; %Flow2
39+
; GCN-NEXT: s_or_b64 exec, exec, s[2:3]
40+
; GCN-NEXT: v_readfirstlane_b32 s0, v1
41+
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
42+
; GCN-NEXT: v_mad_f32 v0, v0, 4.0, s0
43+
; GCN-NEXT: global_store_dword v[0:1], v0, off
44+
; GCN-NEXT: s_endpgm
45+
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("agent") seq_cst
46+
store float %result, ptr addrspace(1) undef
47+
ret void
48+
}
49+
50+
define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(ptr addrspace(1) %ptr) #1 {
51+
; GCN-LABEL: global_atomic_fadd_noret_f32_wrong_subtarget:
52+
; GCN: ; %bb.0:
53+
; GCN-NEXT: s_mov_b64 s[2:3], exec
54+
; GCN-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0
55+
; GCN-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0
56+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
57+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
58+
; GCN-NEXT: s_cbranch_execz .LBB1_2
59+
; GCN-NEXT: ; %bb.1:
60+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
61+
; GCN-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
62+
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, s2
63+
; GCN-NEXT: v_mov_b32_e32 v0, 0
64+
; GCN-NEXT: v_mul_f32_e32 v1, 4.0, v1
65+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
66+
; GCN-NEXT: global_atomic_add_f32 v0, v1, s[0:1]
67+
; GCN-NEXT: s_waitcnt vmcnt(0)
68+
; GCN-NEXT: buffer_wbinvl1
69+
; GCN-NEXT: .LBB1_2:
70+
; GCN-NEXT: s_endpgm
71+
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("agent") seq_cst
72+
ret void
73+
}
74+
75+
attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" }

0 commit comments

Comments
 (0)