@@ -755,9 +755,9 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
755
755
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
756
756
; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
757
757
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
758
- ; GFX90A-NEXT: s_waitcnt vmcnt(0)
759
758
; GFX90A-NEXT: .LBB26_1: ; %atomicrmw.start
760
759
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
760
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0)
761
761
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
762
762
; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
763
763
; GFX90A-NEXT: buffer_wbl2
@@ -824,9 +824,9 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
824
824
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825
825
; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
826
826
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
827
- ; GFX90A-NEXT: s_waitcnt vmcnt(0)
828
827
; GFX90A-NEXT: .LBB28_1: ; %atomicrmw.start
829
828
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
829
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0)
830
830
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
831
831
; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
832
832
; GFX90A-NEXT: buffer_wbl2
@@ -947,10 +947,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
947
947
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
948
948
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
949
949
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
950
- ; GFX90A-NEXT: s_waitcnt vmcnt(0)
951
950
; GFX90A-NEXT: .LBB32_1: ; %atomicrmw.start
952
951
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
953
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
952
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
954
953
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
955
954
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
956
955
; GFX90A-NEXT: buffer_wbl2
@@ -1023,10 +1022,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
1023
1022
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1024
1023
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1025
1024
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1026
- ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1027
1025
; GFX90A-NEXT: .LBB34_1: ; %atomicrmw.start
1028
1026
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1029
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1027
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1030
1028
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
1031
1029
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
1032
1030
; GFX90A-NEXT: buffer_wbl2
@@ -1067,10 +1065,9 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
1067
1065
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068
1066
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1069
1067
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
1070
- ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1071
1068
; GFX90A-NEXT: .LBB35_1: ; %atomicrmw.start
1072
1069
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1073
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1070
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1074
1071
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1075
1072
; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
1076
1073
; GFX90A-NEXT: buffer_wbl2
@@ -1137,10 +1134,9 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
1137
1134
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1138
1135
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1139
1136
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
1140
- ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1141
1137
; GFX90A-NEXT: .LBB37_1: ; %atomicrmw.start
1142
1138
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1143
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1139
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1144
1140
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1145
1141
; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
1146
1142
; GFX90A-NEXT: buffer_wbl2
@@ -1228,10 +1224,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
1228
1224
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1229
1225
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1230
1226
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1231
- ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1232
1227
; GFX90A-NEXT: .LBB40_1: ; %atomicrmw.start
1233
1228
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
1234
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1229
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1235
1230
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
1236
1231
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
1237
1232
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
0 commit comments