@@ -558,8 +558,6 @@ define amdgpu_kernel void @flat_agent_acquire_load(
558
558
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
559
559
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3
560
560
; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV
561
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
562
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
563
561
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
564
562
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
565
563
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -577,8 +575,6 @@ define amdgpu_kernel void @flat_agent_acquire_load(
577
575
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
578
576
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3
579
577
; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV
580
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
581
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
582
578
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
583
579
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
584
580
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -2544,8 +2540,6 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
2544
2540
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
2545
2541
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2
2546
2542
; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2547
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
2548
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
2549
2543
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
2550
2544
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
2551
2545
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -2562,8 +2556,6 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
2562
2556
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
2563
2557
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2
2564
2558
; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2565
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
2566
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
2567
2559
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
2568
2560
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
2569
2561
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -7749,8 +7741,6 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
7749
7741
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
7750
7742
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
7751
7743
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7752
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
7753
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
7754
7744
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
7755
7745
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
7756
7746
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -7773,8 +7763,6 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
7773
7763
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
7774
7764
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
7775
7765
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7776
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
7777
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
7778
7766
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
7779
7767
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
7780
7768
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -9278,8 +9266,6 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
9278
9266
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
9279
9267
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
9280
9268
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9281
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
9282
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
9283
9269
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
9284
9270
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
9285
9271
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -9302,8 +9288,6 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
9302
9288
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
9303
9289
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
9304
9290
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9305
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
9306
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
9307
9291
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
9308
9292
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
9309
9293
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -10873,8 +10857,6 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
10873
10857
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
10874
10858
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
10875
10859
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10876
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
10877
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
10878
10860
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
10879
10861
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
10880
10862
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -10902,8 +10884,6 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
10902
10884
; GFX12-CU-NEXT: s_wait_storecnt 0x0
10903
10885
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
10904
10886
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10905
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
10906
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
10907
10887
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
10908
10888
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
10909
10889
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -12433,8 +12413,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load(
12433
12413
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
12434
12414
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3
12435
12415
; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV
12436
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
12437
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
12438
12416
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
12439
12417
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
12440
12418
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -12453,8 +12431,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load(
12453
12431
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
12454
12432
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3
12455
12433
; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV
12456
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
12457
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
12458
12434
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
12459
12435
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
12460
12436
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -14427,8 +14403,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
14427
14403
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
14428
14404
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2
14429
14405
; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
14430
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
14431
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
14432
14406
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
14433
14407
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
14434
14408
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -14446,8 +14420,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
14446
14420
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
14447
14421
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2
14448
14422
; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
14449
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
14450
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
14451
14423
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
14452
14424
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
14453
14425
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -19610,8 +19582,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
19610
19582
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
19611
19583
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
19612
19584
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19613
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
19614
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
19615
19585
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
19616
19586
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
19617
19587
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -19635,8 +19605,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
19635
19605
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
19636
19606
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
19637
19607
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19638
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
19639
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
19640
19608
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
19641
19609
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
19642
19610
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -21179,8 +21147,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
21179
21147
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
21180
21148
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
21181
21149
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21182
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
21183
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
21184
21150
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
21185
21151
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
21186
21152
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -21204,8 +21170,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
21204
21170
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
21205
21171
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
21206
21172
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21207
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
21208
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
21209
21173
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
21210
21174
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
21211
21175
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
@@ -22824,8 +22788,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
22824
22788
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
22825
22789
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
22826
22790
; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
22827
- ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
22828
- ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
22829
22791
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
22830
22792
; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
22831
22793
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
@@ -22854,8 +22816,6 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
22854
22816
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
22855
22817
; GFX12-CU-NEXT: s_wait_storecnt 0x0
22856
22818
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
22857
- ; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
22858
- ; GFX12-CU-NEXT: s_wait_samplecnt 0x0
22859
22819
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
22860
22820
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
22861
22821
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
0 commit comments