@@ -391,17 +391,22 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg(ptr %ptr, float %val
391
391
; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg:
392
392
; GFX940: ; %bb.0:
393
393
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
395
+ ; GFX940-NEXT: s_nop 1
396
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
394
397
; GFX940-NEXT: buffer_wbl2 sc1
395
- ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:63488 sc0
398
+ ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
396
399
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
397
400
; GFX940-NEXT: buffer_inv sc1
398
401
; GFX940-NEXT: s_setpc_b64 s[30:31]
399
402
;
400
403
; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg:
401
404
; GFX11: ; %bb.0:
402
405
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406
+ ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
407
+ ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
403
408
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
404
- ; GFX11-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:63488 glc
409
+ ; GFX11-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 glc
405
410
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
406
411
; GFX11-NEXT: buffer_gl1_inv
407
412
; GFX11-NEXT: buffer_gl0_inv
@@ -1003,17 +1008,22 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg(ptr %ptr, float %va
1003
1008
; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg:
1004
1009
; GFX940: ; %bb.0:
1005
1010
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
1012
+ ; GFX940-NEXT: s_nop 1
1013
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1006
1014
; GFX940-NEXT: buffer_wbl2 sc1
1007
- ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:63488
1015
+ ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2
1008
1016
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1009
1017
; GFX940-NEXT: buffer_inv sc1
1010
1018
; GFX940-NEXT: s_setpc_b64 s[30:31]
1011
1019
;
1012
1020
; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg:
1013
1021
; GFX11: ; %bb.0:
1014
1022
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1023
+ ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
1024
+ ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
1015
1025
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1016
- ; GFX11-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:63488
1026
+ ; GFX11-NEXT: flat_atomic_add_f32 v[0:1], v2
1017
1027
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1018
1028
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1019
1029
; GFX11-NEXT: buffer_gl1_inv
@@ -1952,17 +1962,22 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz(ptr %ptr, float
1952
1962
; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz:
1953
1963
; GFX940: ; %bb.0:
1954
1964
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1965
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
1966
+ ; GFX940-NEXT: s_nop 1
1967
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1955
1968
; GFX940-NEXT: buffer_wbl2 sc1
1956
- ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:63488 sc0
1969
+ ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
1957
1970
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1958
1971
; GFX940-NEXT: buffer_inv sc1
1959
1972
; GFX940-NEXT: s_setpc_b64 s[30:31]
1960
1973
;
1961
1974
; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz:
1962
1975
; GFX11: ; %bb.0:
1963
1976
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1977
+ ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
1978
+ ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
1964
1979
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1965
- ; GFX11-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:63488 glc
1980
+ ; GFX11-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 glc
1966
1981
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1967
1982
; GFX11-NEXT: buffer_gl1_inv
1968
1983
; GFX11-NEXT: buffer_gl0_inv
@@ -2564,17 +2579,22 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz(ptr %ptr, floa
2564
2579
; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz:
2565
2580
; GFX940: ; %bb.0:
2566
2581
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2582
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
2583
+ ; GFX940-NEXT: s_nop 1
2584
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2567
2585
; GFX940-NEXT: buffer_wbl2 sc1
2568
- ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:63488
2586
+ ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2
2569
2587
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2570
2588
; GFX940-NEXT: buffer_inv sc1
2571
2589
; GFX940-NEXT: s_setpc_b64 s[30:31]
2572
2590
;
2573
2591
; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz:
2574
2592
; GFX11: ; %bb.0:
2575
2593
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2594
+ ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
2595
+ ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
2576
2596
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2577
- ; GFX11-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:63488
2597
+ ; GFX11-NEXT: flat_atomic_add_f32 v[0:1], v2
2578
2598
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2579
2599
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2580
2600
; GFX11-NEXT: buffer_gl1_inv
@@ -3528,8 +3548,11 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg(ptr %ptr, double %v
3528
3548
; GFX940-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg:
3529
3549
; GFX940: ; %bb.0:
3530
3550
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3551
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
3552
+ ; GFX940-NEXT: s_nop 1
3553
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
3531
3554
; GFX940-NEXT: buffer_wbl2 sc1
3532
- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:63488 sc0
3555
+ ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
3533
3556
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3534
3557
; GFX940-NEXT: buffer_inv sc1
3535
3558
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -3594,7 +3617,9 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg(ptr %ptr, double %v
3594
3617
; GFX90A-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg:
3595
3618
; GFX90A: ; %bb.0:
3596
3619
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3597
- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:63488 glc
3620
+ ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
3621
+ ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
3622
+ ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
3598
3623
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3599
3624
; GFX90A-NEXT: buffer_wbinvl1
3600
3625
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -4057,8 +4082,11 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg(ptr %ptr, double %v
4057
4082
; GFX940-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg:
4058
4083
; GFX940: ; %bb.0:
4059
4084
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4085
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
4086
+ ; GFX940-NEXT: s_nop 1
4087
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
4060
4088
; GFX940-NEXT: buffer_wbl2 sc1
4061
- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:63488
4089
+ ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
4062
4090
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
4063
4091
; GFX940-NEXT: buffer_inv sc1
4064
4092
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -4120,7 +4148,9 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg(ptr %ptr, double %v
4120
4148
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg:
4121
4149
; GFX90A: ; %bb.0:
4122
4150
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4123
- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:63488
4151
+ ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
4152
+ ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
4153
+ ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
4124
4154
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
4125
4155
; GFX90A-NEXT: buffer_wbinvl1
4126
4156
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -10679,8 +10709,11 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_neg(ptr %ptr, <2
10679
10709
; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg:
10680
10710
; GFX940: ; %bb.0:
10681
10711
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10712
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
10713
+ ; GFX940-NEXT: s_nop 1
10714
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
10682
10715
; GFX940-NEXT: buffer_wbl2 sc1
10683
- ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:63488 sc0
10716
+ ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
10684
10717
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
10685
10718
; GFX940-NEXT: buffer_inv sc1
10686
10719
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -11245,8 +11278,11 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_neg(ptr %ptr, <2 x ha
11245
11278
; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg:
11246
11279
; GFX940: ; %bb.0:
11247
11280
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11281
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
11282
+ ; GFX940-NEXT: s_nop 1
11283
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
11248
11284
; GFX940-NEXT: buffer_wbl2 sc1
11249
- ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:63488
11285
+ ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2
11250
11286
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
11251
11287
; GFX940-NEXT: buffer_inv sc1
11252
11288
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -12380,8 +12416,11 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg(ptr %ptr,
12380
12416
; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg:
12381
12417
; GFX940: ; %bb.0:
12382
12418
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12419
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
12420
+ ; GFX940-NEXT: s_nop 1
12421
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
12383
12422
; GFX940-NEXT: buffer_wbl2 sc1
12384
- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:63488 sc0
12423
+ ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
12385
12424
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
12386
12425
; GFX940-NEXT: buffer_inv sc1
12387
12426
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -13210,8 +13249,11 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b
13210
13249
; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg:
13211
13250
; GFX940: ; %bb.0:
13212
13251
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13252
+ ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
13253
+ ; GFX940-NEXT: s_nop 1
13254
+ ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
13213
13255
; GFX940-NEXT: buffer_wbl2 sc1
13214
- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:63488
13256
+ ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
13215
13257
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
13216
13258
; GFX940-NEXT: buffer_inv sc1
13217
13259
; GFX940-NEXT: s_setpc_b64 s[30:31]
0 commit comments