Skip to content

Commit ba30315

Browse files
committed
Update test checks
Mainly remove sc0 sc1 flags from memory writes since gfx942 does not force them, in contrast to gfx940 and gfx941.
1 parent e70b4ae commit ba30315

File tree

115 files changed

+12082
-12274
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+12082
-12274
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
507507
; GFX942: ; %bb.0:
508508
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509509
; GFX942-NEXT: v_mov_b32_e32 v1, 0x41200000
510-
; GFX942-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1
510+
; GFX942-NEXT: scratch_store_dword v0, v1, off offset:4
511511
; GFX942-NEXT: s_waitcnt vmcnt(0)
512512
; GFX942-NEXT: s_setpc_b64 s[30:31]
513513
;
@@ -548,7 +548,7 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
548548
; UNALIGNED_GFX942: ; %bb.0:
549549
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550550
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 0x41200000
551-
; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1
551+
; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off offset:4
552552
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
553553
; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31]
554554
;
@@ -3690,7 +3690,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
36903690
; GFX942-NEXT: s_add_u32 s0, s0, 0xffe8
36913691
; GFX942-NEXT: scratch_load_dword v2, off, s0
36923692
; GFX942-NEXT: s_waitcnt vmcnt(0)
3693-
; GFX942-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
3693+
; GFX942-NEXT: global_store_dword v[0:1], v2, off
36943694
; GFX942-NEXT: s_endpgm
36953695
;
36963696
; GFX11-LABEL: sgpr_base_large_offset:
@@ -3735,7 +3735,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
37353735
; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0xffe8
37363736
; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0
37373737
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
3738-
; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
3738+
; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off
37393739
; UNALIGNED_GFX942-NEXT: s_endpgm
37403740
;
37413741
; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset:
@@ -3790,7 +3790,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
37903790
; GFX942-NEXT: s_add_u32 s0, s0, 0x100ffe8
37913791
; GFX942-NEXT: scratch_load_dword v2, off, s0 sc0 sc1
37923792
; GFX942-NEXT: s_waitcnt vmcnt(0)
3793-
; GFX942-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
3793+
; GFX942-NEXT: global_store_dword v[0:1], v2, off
37943794
; GFX942-NEXT: s_endpgm
37953795
;
37963796
; GFX11-LABEL: sgpr_base_large_offset_split:
@@ -3843,7 +3843,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
38433843
; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0x100ffe8
38443844
; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0 sc0 sc1
38453845
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
3846-
; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
3846+
; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off
38473847
; UNALIGNED_GFX942-NEXT: s_endpgm
38483848
;
38493849
; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset_split:
@@ -4127,7 +4127,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
41274127
; GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8
41284128
; GFX942-NEXT: scratch_load_dword v2, off, s0
41294129
; GFX942-NEXT: s_waitcnt vmcnt(0)
4130-
; GFX942-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
4130+
; GFX942-NEXT: global_store_dword v[0:1], v2, off
41314131
; GFX942-NEXT: s_endpgm
41324132
;
41334133
; GFX11-LABEL: sgpr_base_negative_offset:
@@ -4170,7 +4170,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
41704170
; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8
41714171
; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0
41724172
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
4173-
; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off sc0 sc1
4173+
; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off
41744174
; UNALIGNED_GFX942-NEXT: s_endpgm
41754175
;
41764176
; UNALIGNED_GFX11-LABEL: sgpr_base_negative_offset:

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
5454
; GFX942: ; %bb.0: ; %main_body
5555
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
5656
; GFX942-NEXT: s_waitcnt vmcnt(0)
57-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
57+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
5858
; GFX942-NEXT: s_endpgm
5959
main_body:
6060
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
@@ -90,7 +90,7 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr
9090
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
9191
; GFX942-NEXT: v_mov_b32_e32 v2, 0
9292
; GFX942-NEXT: s_waitcnt vmcnt(0)
93-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
93+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
9494
; GFX942-NEXT: s_endpgm
9595
main_body:
9696
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
@@ -137,7 +137,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg
137137
; GFX942: ; %bb.0: ; %main_body
138138
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
139139
; GFX942-NEXT: s_waitcnt vmcnt(0)
140-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
140+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
141141
; GFX942-NEXT: s_endpgm
142142
main_body:
143143
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
@@ -173,7 +173,7 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp
173173
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
174174
; GFX942-NEXT: v_mov_b32_e32 v2, 0
175175
; GFX942-NEXT: s_waitcnt vmcnt(0)
176-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
176+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
177177
; GFX942-NEXT: s_endpgm
178178
main_body:
179179
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
@@ -220,7 +220,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
220220
; GFX942: ; %bb.0: ; %main_body
221221
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
222222
; GFX942-NEXT: s_waitcnt vmcnt(0)
223-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
223+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
224224
; GFX942-NEXT: s_endpgm
225225
main_body:
226226
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -256,7 +256,7 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %
256256
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
257257
; GFX942-NEXT: v_mov_b32_e32 v2, 0
258258
; GFX942-NEXT: s_waitcnt vmcnt(0)
259-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
259+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
260260
; GFX942-NEXT: s_endpgm
261261
main_body:
262262
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
@@ -303,7 +303,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr
303303
; GFX942: ; %bb.0: ; %main_body
304304
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
305305
; GFX942-NEXT: s_waitcnt vmcnt(0)
306-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
306+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
307307
; GFX942-NEXT: s_endpgm
308308
main_body:
309309
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -339,7 +339,7 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add
339339
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
340340
; GFX942-NEXT: v_mov_b32_e32 v2, 0
341341
; GFX942-NEXT: s_waitcnt vmcnt(0)
342-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
342+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
343343
; GFX942-NEXT: s_endpgm
344344
main_body:
345345
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
@@ -386,7 +386,7 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
386386
; GFX942: ; %bb.0: ; %main_body
387387
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
388388
; GFX942-NEXT: s_waitcnt vmcnt(0)
389-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
389+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
390390
; GFX942-NEXT: s_endpgm
391391
main_body:
392392
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
@@ -422,7 +422,7 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr
422422
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
423423
; GFX942-NEXT: v_mov_b32_e32 v2, 0
424424
; GFX942-NEXT: s_waitcnt vmcnt(0)
425-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
425+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
426426
; GFX942-NEXT: s_endpgm
427427
main_body:
428428
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
@@ -469,7 +469,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg
469469
; GFX942: ; %bb.0: ; %main_body
470470
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
471471
; GFX942-NEXT: s_waitcnt vmcnt(0)
472-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
472+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
473473
; GFX942-NEXT: s_endpgm
474474
main_body:
475475
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
@@ -505,7 +505,7 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp
505505
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
506506
; GFX942-NEXT: v_mov_b32_e32 v2, 0
507507
; GFX942-NEXT: s_waitcnt vmcnt(0)
508-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
508+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
509509
; GFX942-NEXT: s_endpgm
510510
main_body:
511511
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
@@ -552,7 +552,7 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
552552
; GFX942: ; %bb.0: ; %main_body
553553
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
554554
; GFX942-NEXT: s_waitcnt vmcnt(0)
555-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
555+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
556556
; GFX942-NEXT: s_endpgm
557557
main_body:
558558
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -588,7 +588,7 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %
588588
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
589589
; GFX942-NEXT: v_mov_b32_e32 v2, 0
590590
; GFX942-NEXT: s_waitcnt vmcnt(0)
591-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
591+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
592592
; GFX942-NEXT: s_endpgm
593593
main_body:
594594
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
@@ -635,7 +635,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr
635635
; GFX942: ; %bb.0: ; %main_body
636636
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
637637
; GFX942-NEXT: s_waitcnt vmcnt(0)
638-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
638+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
639639
; GFX942-NEXT: s_endpgm
640640
main_body:
641641
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -671,7 +671,7 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add
671671
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
672672
; GFX942-NEXT: v_mov_b32_e32 v2, 0
673673
; GFX942-NEXT: s_waitcnt vmcnt(0)
674-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
674+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
675675
; GFX942-NEXT: s_endpgm
676676
main_body:
677677
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
@@ -718,7 +718,7 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
718718
; GFX942: ; %bb.0: ; %main_body
719719
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
720720
; GFX942-NEXT: s_waitcnt vmcnt(0)
721-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
721+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
722722
; GFX942-NEXT: s_endpgm
723723
main_body:
724724
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
@@ -754,7 +754,7 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr
754754
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
755755
; GFX942-NEXT: v_mov_b32_e32 v2, 0
756756
; GFX942-NEXT: s_waitcnt vmcnt(0)
757-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
757+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
758758
; GFX942-NEXT: s_endpgm
759759
main_body:
760760
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
@@ -801,7 +801,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg
801801
; GFX942: ; %bb.0: ; %main_body
802802
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
803803
; GFX942-NEXT: s_waitcnt vmcnt(0)
804-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
804+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
805805
; GFX942-NEXT: s_endpgm
806806
main_body:
807807
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
@@ -837,7 +837,7 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp
837837
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
838838
; GFX942-NEXT: v_mov_b32_e32 v2, 0
839839
; GFX942-NEXT: s_waitcnt vmcnt(0)
840-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
840+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
841841
; GFX942-NEXT: s_endpgm
842842
main_body:
843843
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
@@ -884,7 +884,7 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
884884
; GFX942: ; %bb.0: ; %main_body
885885
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
886886
; GFX942-NEXT: s_waitcnt vmcnt(0)
887-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
887+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
888888
; GFX942-NEXT: s_endpgm
889889
main_body:
890890
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -920,7 +920,7 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %
920920
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
921921
; GFX942-NEXT: v_mov_b32_e32 v2, 0
922922
; GFX942-NEXT: s_waitcnt vmcnt(0)
923-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
923+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
924924
; GFX942-NEXT: s_endpgm
925925
main_body:
926926
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
@@ -967,7 +967,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr
967967
; GFX942: ; %bb.0: ; %main_body
968968
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
969969
; GFX942-NEXT: s_waitcnt vmcnt(0)
970-
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
970+
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
971971
; GFX942-NEXT: s_endpgm
972972
main_body:
973973
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -1003,7 +1003,7 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add
10031003
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
10041004
; GFX942-NEXT: v_mov_b32_e32 v2, 0
10051005
; GFX942-NEXT: s_waitcnt vmcnt(0)
1006-
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
1006+
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
10071007
; GFX942-NEXT: s_endpgm
10081008
main_body:
10091009
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)

llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
3636
; GFX942-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
3737
; GFX942-BACKOFF-NEXT: s_barrier
3838
; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
39-
; GFX942-BACKOFF-NEXT: flat_store_dword v[2:3], v0 sc0 sc1
39+
; GFX942-BACKOFF-NEXT: flat_store_dword v[2:3], v0
4040
; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
4141
; GFX942-BACKOFF-NEXT: s_setpc_b64 s[30:31]
4242
;
@@ -95,7 +95,7 @@ define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 {
9595
; GFX942-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
9696
; GFX942-BACKOFF-NEXT: s_barrier
9797
; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0)
98-
; GFX942-BACKOFF-NEXT: flat_store_dword v[2:3], v0 sc0 sc1
98+
; GFX942-BACKOFF-NEXT: flat_store_dword v[2:3], v0
9999
; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
100100
; GFX942-BACKOFF-NEXT: s_setpc_b64 s[30:31]
101101
;

llvm/test/CodeGen/AMDGPU/bf16-conversions.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
244244
; GFX-942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
245245
; GFX-942-NEXT: s_nop 1
246246
; GFX-942-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
247-
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
247+
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
248248
; GFX-942-NEXT: s_endpgm
249249
;
250250
; GFX-950-LABEL: fptrunc_f32_to_bf16:
@@ -273,7 +273,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
273273
; GFX-942-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
274274
; GFX-942-NEXT: s_nop 1
275275
; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
276-
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
276+
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
277277
; GFX-942-NEXT: s_endpgm
278278
;
279279
; GFX-950-LABEL: fptrunc_f32_to_bf16_abs:
@@ -303,7 +303,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
303303
; GFX-942-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
304304
; GFX-942-NEXT: s_nop 1
305305
; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
306-
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
306+
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
307307
; GFX-942-NEXT: s_endpgm
308308
;
309309
; GFX-950-LABEL: fptrunc_f32_to_bf16_neg:
@@ -342,7 +342,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
342342
; GFX-942-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
343343
; GFX-942-NEXT: s_nop 1
344344
; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
345-
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
345+
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
346346
; GFX-942-NEXT: s_endpgm
347347
;
348348
; GFX-950-LABEL: fptrunc_f64_to_bf16:
@@ -380,7 +380,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
380380
; GFX-942-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
381381
; GFX-942-NEXT: s_nop 1
382382
; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
383-
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
383+
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
384384
; GFX-942-NEXT: s_endpgm
385385
;
386386
; GFX-950-LABEL: fptrunc_f64_to_bf16_neg:
@@ -419,7 +419,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
419419
; GFX-942-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
420420
; GFX-942-NEXT: s_nop 1
421421
; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
422-
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
422+
; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
423423
; GFX-942-NEXT: s_endpgm
424424
;
425425
; GFX-950-LABEL: fptrunc_f64_to_bf16_abs:

0 commit comments

Comments
 (0)