@@ -15550,22 +15550,9 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory
15550
15550
; GFX908-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
15551
15551
; GFX908: ; %bb.0:
15552
15552
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15553
- ; GFX908-NEXT: global_load_dword v4, v[0:1], off
15554
- ; GFX908-NEXT: s_mov_b64 s[4:5], 0
15555
- ; GFX908-NEXT: .LBB67_1: ; %atomicrmw.start
15556
- ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
15557
- ; GFX908-NEXT: s_waitcnt vmcnt(0)
15558
- ; GFX908-NEXT: v_pk_add_f16 v3, v4, v2
15559
- ; GFX908-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
15553
+ ; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off
15560
15554
; GFX908-NEXT: s_waitcnt vmcnt(0)
15561
15555
; GFX908-NEXT: buffer_wbinvl1
15562
- ; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
15563
- ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
15564
- ; GFX908-NEXT: v_mov_b32_e32 v4, v3
15565
- ; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
15566
- ; GFX908-NEXT: s_cbranch_execnz .LBB67_1
15567
- ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
15568
- ; GFX908-NEXT: s_or_b64 exec, exec, s[4:5]
15569
15556
; GFX908-NEXT: s_setpc_b64 s[30:31]
15570
15557
;
15571
15558
; GFX8-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
@@ -15771,22 +15758,9 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine
15771
15758
; GFX908-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
15772
15759
; GFX908: ; %bb.0:
15773
15760
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15774
- ; GFX908-NEXT: global_load_dword v4, v[0:1], off offset:2044
15775
- ; GFX908-NEXT: s_mov_b64 s[4:5], 0
15776
- ; GFX908-NEXT: .LBB68_1: ; %atomicrmw.start
15777
- ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
15778
- ; GFX908-NEXT: s_waitcnt vmcnt(0)
15779
- ; GFX908-NEXT: v_pk_add_f16 v3, v4, v2
15780
- ; GFX908-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:2044 glc
15761
+ ; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:2044
15781
15762
; GFX908-NEXT: s_waitcnt vmcnt(0)
15782
15763
; GFX908-NEXT: buffer_wbinvl1
15783
- ; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
15784
- ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
15785
- ; GFX908-NEXT: v_mov_b32_e32 v4, v3
15786
- ; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
15787
- ; GFX908-NEXT: s_cbranch_execnz .LBB68_1
15788
- ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
15789
- ; GFX908-NEXT: s_or_b64 exec, exec, s[4:5]
15790
15764
; GFX908-NEXT: s_setpc_b64 s[30:31]
15791
15765
;
15792
15766
; GFX8-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@@ -15995,22 +15969,9 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine
15995
15969
; GFX908-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
15996
15970
; GFX908: ; %bb.0:
15997
15971
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15998
- ; GFX908-NEXT: global_load_dword v4, v[0:1], off offset:-2048
15999
- ; GFX908-NEXT: s_mov_b64 s[4:5], 0
16000
- ; GFX908-NEXT: .LBB69_1: ; %atomicrmw.start
16001
- ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
16002
- ; GFX908-NEXT: s_waitcnt vmcnt(0)
16003
- ; GFX908-NEXT: v_pk_add_f16 v3, v4, v2
16004
- ; GFX908-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:-2048 glc
15972
+ ; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:-2048
16005
15973
; GFX908-NEXT: s_waitcnt vmcnt(0)
16006
15974
; GFX908-NEXT: buffer_wbinvl1
16007
- ; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
16008
- ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
16009
- ; GFX908-NEXT: v_mov_b32_e32 v4, v3
16010
- ; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
16011
- ; GFX908-NEXT: s_cbranch_execnz .LBB69_1
16012
- ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
16013
- ; GFX908-NEXT: s_or_b64 exec, exec, s[4:5]
16014
15975
; GFX908-NEXT: s_setpc_b64 s[30:31]
16015
15976
;
16016
15977
; GFX8-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@@ -16917,22 +16878,9 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr a
16917
16878
; GFX908-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
16918
16879
; GFX908: ; %bb.0:
16919
16880
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16920
- ; GFX908-NEXT: global_load_dword v4, v[0:1], off
16921
- ; GFX908-NEXT: s_mov_b64 s[4:5], 0
16922
- ; GFX908-NEXT: .LBB73_1: ; %atomicrmw.start
16923
- ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
16924
- ; GFX908-NEXT: s_waitcnt vmcnt(0)
16925
- ; GFX908-NEXT: v_pk_add_f16 v3, v4, v2
16926
- ; GFX908-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
16881
+ ; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off
16927
16882
; GFX908-NEXT: s_waitcnt vmcnt(0)
16928
16883
; GFX908-NEXT: buffer_wbinvl1
16929
- ; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
16930
- ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
16931
- ; GFX908-NEXT: v_mov_b32_e32 v4, v3
16932
- ; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
16933
- ; GFX908-NEXT: s_cbranch_execnz .LBB73_1
16934
- ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
16935
- ; GFX908-NEXT: s_or_b64 exec, exec, s[4:5]
16936
16884
; GFX908-NEXT: s_setpc_b64 s[30:31]
16937
16885
;
16938
16886
; GFX8-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
@@ -17368,22 +17316,9 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory
17368
17316
; GFX908-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
17369
17317
; GFX908: ; %bb.0:
17370
17318
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17371
- ; GFX908-NEXT: global_load_dword v4, v[0:1], off
17372
- ; GFX908-NEXT: s_mov_b64 s[4:5], 0
17373
- ; GFX908-NEXT: .LBB75_1: ; %atomicrmw.start
17374
- ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
17375
- ; GFX908-NEXT: s_waitcnt vmcnt(0)
17376
- ; GFX908-NEXT: v_pk_add_f16 v3, v4, v2
17377
- ; GFX908-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
17319
+ ; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off
17378
17320
; GFX908-NEXT: s_waitcnt vmcnt(0)
17379
17321
; GFX908-NEXT: buffer_wbinvl1
17380
- ; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
17381
- ; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
17382
- ; GFX908-NEXT: v_mov_b32_e32 v4, v3
17383
- ; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5]
17384
- ; GFX908-NEXT: s_cbranch_execnz .LBB75_1
17385
- ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end
17386
- ; GFX908-NEXT: s_or_b64 exec, exec, s[4:5]
17387
17322
; GFX908-NEXT: s_setpc_b64 s[30:31]
17388
17323
;
17389
17324
; GFX8-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
0 commit comments