@@ -57,7 +57,7 @@ define amdgpu_ps void @buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %
57
57
; GFX940: ; %bb.0: ; %main_body
58
58
; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
59
59
; GFX940-NEXT: s_waitcnt vmcnt(0)
60
- ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
60
+ ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
61
61
; GFX940-NEXT: s_endpgm
62
62
main_body:
63
63
%ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 0 , i1 0 )
@@ -93,7 +93,7 @@ define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %r
93
93
; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
94
94
; GFX940-NEXT: v_mov_b32_e32 v2, 0
95
95
; GFX940-NEXT: s_waitcnt vmcnt(0)
96
- ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
96
+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
97
97
; GFX940-NEXT: s_endpgm
98
98
main_body:
99
99
%ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 4 , i1 1 )
@@ -140,7 +140,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
140
140
; GFX940: ; %bb.0: ; %main_body
141
141
; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
142
142
; GFX940-NEXT: s_waitcnt vmcnt(0)
143
- ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
143
+ ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
144
144
; GFX940-NEXT: s_endpgm
145
145
main_body:
146
146
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 0 , i32 0 )
@@ -176,7 +176,7 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inre
176
176
; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
177
177
; GFX940-NEXT: v_mov_b32_e32 v2, 0
178
178
; GFX940-NEXT: s_waitcnt vmcnt(0)
179
- ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
179
+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
180
180
; GFX940-NEXT: s_endpgm
181
181
main_body:
182
182
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 4 , i32 2 )
@@ -223,7 +223,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
223
223
; GFX940: ; %bb.0: ; %main_body
224
224
; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
225
225
; GFX940-NEXT: s_waitcnt vmcnt(0)
226
- ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
226
+ ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
227
227
; GFX940-NEXT: s_endpgm
228
228
main_body:
229
229
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 0 , i32 0 , i32 0 )
@@ -259,7 +259,7 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> i
259
259
; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
260
260
; GFX940-NEXT: v_mov_b32_e32 v2, 0
261
261
; GFX940-NEXT: s_waitcnt vmcnt(0)
262
- ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
262
+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
263
263
; GFX940-NEXT: s_endpgm
264
264
main_body:
265
265
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 4 , i32 0 , i32 2 )
@@ -306,7 +306,7 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
306
306
; GFX940: ; %bb.0: ; %main_body
307
307
; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
308
308
; GFX940-NEXT: s_waitcnt vmcnt(0)
309
- ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
309
+ ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
310
310
; GFX940-NEXT: s_endpgm
311
311
main_body:
312
312
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 0 , i32 0 )
@@ -342,7 +342,7 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inre
342
342
; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
343
343
; GFX940-NEXT: v_mov_b32_e32 v2, 0
344
344
; GFX940-NEXT: s_waitcnt vmcnt(0)
345
- ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
345
+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
346
346
; GFX940-NEXT: s_endpgm
347
347
main_body:
348
348
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 4 , i32 2 )
@@ -389,7 +389,7 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
389
389
; GFX940: ; %bb.0: ; %main_body
390
390
; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
391
391
; GFX940-NEXT: s_waitcnt vmcnt(0)
392
- ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
392
+ ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
393
393
; GFX940-NEXT: s_endpgm
394
394
main_body:
395
395
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 0 , i32 0 , i32 0 )
@@ -425,7 +425,7 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> i
425
425
; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
426
426
; GFX940-NEXT: v_mov_b32_e32 v2, 0
427
427
; GFX940-NEXT: s_waitcnt vmcnt(0)
428
- ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
428
+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
429
429
; GFX940-NEXT: s_endpgm
430
430
main_body:
431
431
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 4 , i32 0 , i32 2 )
@@ -472,7 +472,7 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
472
472
; GFX940: ; %bb.0: ; %main_body
473
473
; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
474
474
; GFX940-NEXT: s_waitcnt vmcnt(0)
475
- ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
475
+ ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
476
476
; GFX940-NEXT: s_endpgm
477
477
main_body:
478
478
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 0 , i32 0 )
@@ -508,7 +508,7 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
508
508
; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 4 offen sc0 nt
509
509
; GFX940-NEXT: v_mov_b32_e32 v2, 0
510
510
; GFX940-NEXT: s_waitcnt vmcnt(0)
511
- ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
511
+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
512
512
; GFX940-NEXT: s_endpgm
513
513
main_body:
514
514
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 4 , i32 2 )
@@ -555,7 +555,7 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
555
555
; GFX940: ; %bb.0: ; %main_body
556
556
; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
557
557
; GFX940-NEXT: s_waitcnt vmcnt(0)
558
- ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
558
+ ; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
559
559
; GFX940-NEXT: s_endpgm
560
560
main_body:
561
561
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 0 , i32 0 , i32 0 )
@@ -591,7 +591,7 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> i
591
591
; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 sc0 nt
592
592
; GFX940-NEXT: v_mov_b32_e32 v2, 0
593
593
; GFX940-NEXT: s_waitcnt vmcnt(0)
594
- ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
594
+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
595
595
; GFX940-NEXT: s_endpgm
596
596
main_body:
597
597
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64 (double %data , <4 x i32 > %rsrc , i32 %vindex , i32 4 , i32 0 , i32 2 )
0 commit comments