Skip to content

Commit eda9ff8

Browse files
authored
AMDGPU: Flat instructions do not have signed offsets gfx7-gfx11 (#95852)
Fixes some atomicrmw fadd and intrinsic cases
1 parent 20d3cab commit eda9ff8

File tree

6 files changed

+151
-74
lines changed

6 files changed

+151
-74
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,7 @@ multiclass local_addr_space_atomic_op {
616616
}
617617
}
618618

619+
defm int_amdgcn_flat_atomic_fadd : noret_op;
619620
defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op;
620621
defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
621622
defm int_amdgcn_flat_atomic_fmin : noret_op;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 22 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,21 +1155,6 @@ multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
11551155
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
11561156
}
11571157

1158-
multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt,
1159-
ValueType data_vt = vt> {
1160-
defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>;
1161-
}
1162-
1163-
multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1164-
ValueType vt, ValueType data_vt = vt> {
1165-
defvar noRtnNode = !cast<PatFrags>(intr # "_noret_" # addrSpaceSuffix);
1166-
defvar rtnNode = !cast<PatFrags>(intr # "_" # addrSpaceSuffix);
1167-
1168-
let AddedComplexity = 1 in
1169-
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1170-
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1171-
}
1172-
11731158
class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
11741159
(vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
11751160
(inst $vaddr, $offset)
@@ -1585,34 +1570,34 @@ let OtherPredicates = [isGFX12Plus] in {
15851570
let OtherPredicates = [isGFX10Plus] in {
15861571
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
15871572
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1588-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1589-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1573+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1574+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
15901575
}
15911576

15921577
let OtherPredicates = [isGFX10GFX11] in {
15931578
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
15941579
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
15951580

1596-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
1597-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
1581+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
1582+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
15981583
}
15991584

16001585
let OtherPredicates = [isGFX10Only] in {
16011586
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
16021587
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
16031588
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
16041589
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1605-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1606-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1607-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1608-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1590+
defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1591+
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1592+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1593+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
16091594
}
16101595

16111596
let OtherPredicates = [isGFX12Only] in {
16121597
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
16131598
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
1614-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
1615-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
1599+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
1600+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
16161601
}
16171602

16181603
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
@@ -1646,25 +1631,24 @@ defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_fl
16461631
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
16471632
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
16481633
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1649-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1650-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1651-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1652-
defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>;
1653-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1654-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1634+
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1635+
defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1636+
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1637+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>;
1638+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1639+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
16551640
}
16561641

16571642
let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
1658-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
1659-
defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>;
1643+
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
1644+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>;
16601645
}
16611646

16621647
let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
1663-
// FIXME: These do not have signed offsets
1664-
defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>;
1665-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
1666-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_flat", v2f16>;
1667-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_flat", v2bf16>;
1648+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>;
1649+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
1650+
defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_flat", v2f16>;
1651+
defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_flat", v2bf16>;
16681652
}
16691653

16701654
let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -391,17 +391,22 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg(ptr %ptr, float %val
391391
; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg:
392392
; GFX940: ; %bb.0:
393393
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
395+
; GFX940-NEXT: s_nop 1
396+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
394397
; GFX940-NEXT: buffer_wbl2 sc1
395-
; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:63488 sc0
398+
; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
396399
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
397400
; GFX940-NEXT: buffer_inv sc1
398401
; GFX940-NEXT: s_setpc_b64 s[30:31]
399402
;
400403
; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg:
401404
; GFX11: ; %bb.0:
402405
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406+
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
407+
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
403408
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
404-
; GFX11-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:63488 glc
409+
; GFX11-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 glc
405410
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
406411
; GFX11-NEXT: buffer_gl1_inv
407412
; GFX11-NEXT: buffer_gl0_inv
@@ -1003,17 +1008,22 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg(ptr %ptr, float %va
10031008
; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg:
10041009
; GFX940: ; %bb.0:
10051010
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
1012+
; GFX940-NEXT: s_nop 1
1013+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
10061014
; GFX940-NEXT: buffer_wbl2 sc1
1007-
; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:63488
1015+
; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2
10081016
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
10091017
; GFX940-NEXT: buffer_inv sc1
10101018
; GFX940-NEXT: s_setpc_b64 s[30:31]
10111019
;
10121020
; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg:
10131021
; GFX11: ; %bb.0:
10141022
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1023+
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
1024+
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
10151025
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1016-
; GFX11-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:63488
1026+
; GFX11-NEXT: flat_atomic_add_f32 v[0:1], v2
10171027
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
10181028
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
10191029
; GFX11-NEXT: buffer_gl1_inv
@@ -1952,17 +1962,22 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz(ptr %ptr, float
19521962
; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz:
19531963
; GFX940: ; %bb.0:
19541964
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1965+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
1966+
; GFX940-NEXT: s_nop 1
1967+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
19551968
; GFX940-NEXT: buffer_wbl2 sc1
1956-
; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:63488 sc0
1969+
; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
19571970
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
19581971
; GFX940-NEXT: buffer_inv sc1
19591972
; GFX940-NEXT: s_setpc_b64 s[30:31]
19601973
;
19611974
; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz:
19621975
; GFX11: ; %bb.0:
19631976
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1977+
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
1978+
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
19641979
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1965-
; GFX11-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:63488 glc
1980+
; GFX11-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 glc
19661981
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
19671982
; GFX11-NEXT: buffer_gl1_inv
19681983
; GFX11-NEXT: buffer_gl0_inv
@@ -2564,17 +2579,22 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz(ptr %ptr, floa
25642579
; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz:
25652580
; GFX940: ; %bb.0:
25662581
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2582+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
2583+
; GFX940-NEXT: s_nop 1
2584+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
25672585
; GFX940-NEXT: buffer_wbl2 sc1
2568-
; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:63488
2586+
; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2
25692587
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
25702588
; GFX940-NEXT: buffer_inv sc1
25712589
; GFX940-NEXT: s_setpc_b64 s[30:31]
25722590
;
25732591
; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz:
25742592
; GFX11: ; %bb.0:
25752593
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2594+
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
2595+
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
25762596
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2577-
; GFX11-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:63488
2597+
; GFX11-NEXT: flat_atomic_add_f32 v[0:1], v2
25782598
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
25792599
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
25802600
; GFX11-NEXT: buffer_gl1_inv
@@ -3528,8 +3548,11 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg(ptr %ptr, double %v
35283548
; GFX940-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg:
35293549
; GFX940: ; %bb.0:
35303550
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3551+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
3552+
; GFX940-NEXT: s_nop 1
3553+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
35313554
; GFX940-NEXT: buffer_wbl2 sc1
3532-
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:63488 sc0
3555+
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
35333556
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
35343557
; GFX940-NEXT: buffer_inv sc1
35353558
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -3594,7 +3617,9 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg(ptr %ptr, double %v
35943617
; GFX90A-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg:
35953618
; GFX90A: ; %bb.0:
35963619
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3597-
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:63488 glc
3620+
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
3621+
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
3622+
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
35983623
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
35993624
; GFX90A-NEXT: buffer_wbinvl1
36003625
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -4057,8 +4082,11 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg(ptr %ptr, double %v
40574082
; GFX940-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg:
40584083
; GFX940: ; %bb.0:
40594084
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4085+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
4086+
; GFX940-NEXT: s_nop 1
4087+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
40604088
; GFX940-NEXT: buffer_wbl2 sc1
4061-
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:63488
4089+
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
40624090
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
40634091
; GFX940-NEXT: buffer_inv sc1
40644092
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -4120,7 +4148,9 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg(ptr %ptr, double %v
41204148
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg:
41214149
; GFX90A: ; %bb.0:
41224150
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4123-
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:63488
4151+
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
4152+
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
4153+
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
41244154
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
41254155
; GFX90A-NEXT: buffer_wbinvl1
41264156
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -10679,8 +10709,11 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_neg(ptr %ptr, <2
1067910709
; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg:
1068010710
; GFX940: ; %bb.0:
1068110711
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10712+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
10713+
; GFX940-NEXT: s_nop 1
10714+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1068210715
; GFX940-NEXT: buffer_wbl2 sc1
10683-
; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:63488 sc0
10716+
; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
1068410717
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1068510718
; GFX940-NEXT: buffer_inv sc1
1068610719
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -11245,8 +11278,11 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_neg(ptr %ptr, <2 x ha
1124511278
; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg:
1124611279
; GFX940: ; %bb.0:
1124711280
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11281+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
11282+
; GFX940-NEXT: s_nop 1
11283+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1124811284
; GFX940-NEXT: buffer_wbl2 sc1
11249-
; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:63488
11285+
; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2
1125011286
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1125111287
; GFX940-NEXT: buffer_inv sc1
1125211288
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -12380,8 +12416,11 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg(ptr %ptr,
1238012416
; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg:
1238112417
; GFX940: ; %bb.0:
1238212418
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12419+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
12420+
; GFX940-NEXT: s_nop 1
12421+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1238312422
; GFX940-NEXT: buffer_wbl2 sc1
12384-
; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:63488 sc0
12423+
; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
1238512424
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1238612425
; GFX940-NEXT: buffer_inv sc1
1238712426
; GFX940-NEXT: s_setpc_b64 s[30:31]
@@ -13210,8 +13249,11 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b
1321013249
; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg:
1321113250
; GFX940: ; %bb.0:
1321213251
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13252+
; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
13253+
; GFX940-NEXT: s_nop 1
13254+
; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1321313255
; GFX940-NEXT: buffer_wbl2 sc1
13214-
; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:63488
13256+
; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
1321513257
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1321613258
; GFX940-NEXT: buffer_inv sc1
1321713259
; GFX940-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)