Skip to content

Commit 37a1e1c

Browse files
committed
AMDGPU: Drop and upgrade llvm.amdgcn.atomic.csub/cond.sub to atomicrmw
1 parent 4af249f commit 37a1e1c

32 files changed

+2277
-659
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,11 +1360,6 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
13601360

13611361
The iglp_opt strategy implementations are subject to change.
13621362

1363-
llvm.amdgcn.atomic.cond.sub.u32 Provides direct access to flat_atomic_cond_sub_u32, global_atomic_cond_sub_u32
1364-
and ds_cond_sub_u32 based on address space on gfx12 targets. This
1365-
performs subtraction only if the memory value is greater than or
1366-
equal to the data value.
1367-
13681363
llvm.amdgcn.s.getpc Provides access to the s_getpc_b64 instruction, but with the return value
13691364
sign-extended from the width of the underlying PC hardware register even on
13701365
processors where the s_getpc_b64 instruction returns a zero-extended value.

llvm/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ Changes to the AMDGPU Backend
8282
``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the
8383
:ref:`atomicrmw <i_atomicrmw>` instruction with `fadd` and
8484
addrspace(0) or addrspace(1) instead.
85+
* Removed ``llvm.amdgcn.atomic.cond.sub.u32`` and
86+
``llvm.amdgcn.atomic.csub.u32`` intrinsics. Users should use the
87+
:ref:`atomicrmw <i_atomicrmw>` instruction with ``usub_cond`` and
88+
``usub_sat`` instead.
8589

8690
Changes to the ARM Backend
8791
--------------------------

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,7 +1353,6 @@ def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
13531353
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
13541354
def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
13551355
def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
1356-
def int_amdgcn_raw_buffer_atomic_cond_sub_u32 : AMDGPURawBufferAtomic;
13571356
def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
13581357
[llvm_anyint_ty],
13591358
[LLVMMatchType<0>, // src(VGPR)
@@ -1390,7 +1389,6 @@ def int_amdgcn_raw_ptr_buffer_atomic_or : AMDGPURawPtrBufferAtomic;
13901389
def int_amdgcn_raw_ptr_buffer_atomic_xor : AMDGPURawPtrBufferAtomic;
13911390
def int_amdgcn_raw_ptr_buffer_atomic_inc : AMDGPURawPtrBufferAtomic;
13921391
def int_amdgcn_raw_ptr_buffer_atomic_dec : AMDGPURawPtrBufferAtomic;
1393-
def int_amdgcn_raw_ptr_buffer_atomic_cond_sub_u32 : AMDGPURawPtrBufferAtomic;
13941392
def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
13951393
[llvm_anyint_ty],
13961394
[LLVMMatchType<0>, // src(VGPR)
@@ -1431,7 +1429,6 @@ def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
14311429
def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
14321430
def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
14331431
def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
1434-
def int_amdgcn_struct_buffer_atomic_cond_sub_u32 : AMDGPUStructBufferAtomic;
14351432
def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
14361433
[llvm_anyint_ty],
14371434
[LLVMMatchType<0>, // src(VGPR)
@@ -1467,7 +1464,6 @@ def int_amdgcn_struct_ptr_buffer_atomic_or : AMDGPUStructPtrBufferAtomic;
14671464
def int_amdgcn_struct_ptr_buffer_atomic_xor : AMDGPUStructPtrBufferAtomic;
14681465
def int_amdgcn_struct_ptr_buffer_atomic_inc : AMDGPUStructPtrBufferAtomic;
14691466
def int_amdgcn_struct_ptr_buffer_atomic_dec : AMDGPUStructPtrBufferAtomic;
1470-
def int_amdgcn_struct_ptr_buffer_atomic_cond_sub_u32 : AMDGPUStructPtrBufferAtomic;
14711467
def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
14721468
[llvm_anyint_ty],
14731469
[LLVMMatchType<0>, // src(VGPR)
@@ -2463,8 +2459,6 @@ class AMDGPUAtomicRtn<LLVMType vt, LLVMType pt = llvm_anyptr_ty> : Intrinsic <
24632459
[IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree], "",
24642460
[SDNPMemOperand]>;
24652461

2466-
def int_amdgcn_global_atomic_csub : AMDGPUAtomicRtn<llvm_i32_ty>;
2467-
24682462
// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
24692463
// <ray_dir>, <ray_inv_dir>, <texture_descr>
24702464
// <node_ptr> is i32 or i64.
@@ -2664,8 +2658,6 @@ def int_amdgcn_flat_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
26642658
def int_amdgcn_global_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
26652659
def int_amdgcn_global_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
26662660

2667-
def int_amdgcn_atomic_cond_sub_u32 : AMDGPUAtomicRtn<llvm_i32_ty>;
2668-
26692661
class AMDGPULoadIntrinsic<LLVMType ptr_ty>:
26702662
Intrinsic<
26712663
[llvm_any_ty],

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,9 +1024,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
10241024
}
10251025

10261026
if (Name.consume_front("atomic.")) {
1027-
if (Name.starts_with("inc") || Name.starts_with("dec")) {
1028-
// These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1029-
// there's no new declaration.
1027+
if (Name.starts_with("inc") || Name.starts_with("dec") ||
1028+
Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1029+
// These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1030+
// and usub_sat so there's no new declaration.
10301031
NewFn = nullptr;
10311032
return true;
10321033
}
@@ -4053,7 +4054,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
40534054
.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
40544055
.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
40554056
.StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4056-
.StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd);
4057+
.StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4058+
.StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4059+
.StartsWith("atomic.csub", AtomicRMWInst::USubSat);
40574060

40584061
unsigned NumOperands = CI->getNumOperands();
40594062
if (NumOperands < 3) // Malformed bitcode.

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT_D16, SItbuffer_store_d16>;
271271
// FIXME: Check MMO is atomic
272272
def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap_glue>;
273273
def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap_glue>;
274+
def : GINodeEquiv<G_ATOMICRMW_USUB_COND, atomic_load_usub_cond_glue>;
275+
def : GINodeEquiv<G_ATOMICRMW_USUB_SAT, atomic_load_usub_sat_glue>;
274276
def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin_glue>;
275277
def : GINodeEquiv<G_ATOMICRMW_FMAX, atomic_load_fmax_glue>;
276278

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3536,6 +3536,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
35363536
case TargetOpcode::G_ATOMICRMW_UMAX:
35373537
case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
35383538
case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3539+
case TargetOpcode::G_ATOMICRMW_USUB_COND:
3540+
case TargetOpcode::G_ATOMICRMW_USUB_SAT:
35393541
case TargetOpcode::G_ATOMICRMW_FADD:
35403542
case TargetOpcode::G_ATOMICRMW_FMIN:
35413543
case TargetOpcode::G_ATOMICRMW_FMAX:

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -622,15 +622,11 @@ defm int_amdgcn_flat_atomic_fmin : noret_op;
622622
defm int_amdgcn_flat_atomic_fmax : noret_op;
623623
defm int_amdgcn_global_atomic_fmin : noret_op;
624624
defm int_amdgcn_global_atomic_fmax : noret_op;
625-
defm int_amdgcn_global_atomic_csub : noret_op;
626625
defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
627626
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
628627
defm int_amdgcn_flat_atomic_fmax_num : noret_op;
629628
defm int_amdgcn_global_atomic_fmin_num : noret_op;
630629
defm int_amdgcn_global_atomic_fmax_num : noret_op;
631-
defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op;
632-
defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op;
633-
defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op;
634630

635631
multiclass noret_binary_atomic_op<SDNode atomic_op> {
636632
let HasNoUse = true in
@@ -681,6 +677,8 @@ defm atomic_load_fmin : binary_atomic_op_fp_all_as<atomic_load_fmin>;
681677
defm atomic_load_fmax : binary_atomic_op_fp_all_as<atomic_load_fmax>;
682678
defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
683679
defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
680+
defm atomic_load_usub_cond : binary_atomic_op_all_as<atomic_load_usub_cond>;
681+
defm atomic_load_usub_sat : binary_atomic_op_all_as<atomic_load_usub_sat>;
684682
defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
685683

686684
def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
16471647
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
16481648
}
16491649

1650+
auto &Atomics32 =
1651+
getActionDefinitionsBuilder({G_ATOMICRMW_USUB_COND, G_ATOMICRMW_USUB_SAT})
1652+
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr}, {S32, RegionPtr}});
1653+
if (ST.hasFlatAddressSpace()) {
1654+
Atomics32.legalFor({{S32, FlatPtr}});
1655+
}
1656+
16501657
// TODO: v2bf16 operations, and fat buffer pointer support.
16511658
auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD);
16521659
if (ST.hasLDSFPAtomicAddF32()) {
@@ -6150,9 +6157,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
61506157
case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
61516158
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
61526159
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX;
6153-
case Intrinsic::amdgcn_raw_buffer_atomic_cond_sub_u32:
6154-
case Intrinsic::amdgcn_struct_buffer_atomic_cond_sub_u32:
6155-
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32;
61566160
default:
61576161
llvm_unreachable("unhandled atomic opcode");
61586162
}

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,15 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
11501150
case AtomicRMWInst::UIncWrap:
11511151
case AtomicRMWInst::UDecWrap:
11521152
report_fatal_error("wrapping increment/decrement not supported for "
1153-
"buffer resources and should've ben expanded away");
1153+
"buffer resources and should've been expanded away");
1154+
break;
1155+
case AtomicRMWInst::USubCond:
1156+
report_fatal_error("conditional subtract not supported for buffer "
1157+
"resources and should've been expanded away");
1158+
break;
1159+
case AtomicRMWInst::USubSat:
1160+
report_fatal_error("subtract with clamp not supported for buffer "
1161+
"resources and should've been expanded away");
11541162
break;
11551163
case AtomicRMWInst::BAD_BINOP:
11561164
llvm_unreachable("Not sure how we got a bad binop");

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4906,7 +4906,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
49064906
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
49074907
break;
49084908
}
4909-
case Intrinsic::amdgcn_global_atomic_csub:
49104909
case Intrinsic::amdgcn_global_atomic_fmin:
49114910
case Intrinsic::amdgcn_global_atomic_fmax:
49124911
case Intrinsic::amdgcn_global_atomic_fmin_num:
@@ -4915,7 +4914,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
49154914
case Intrinsic::amdgcn_flat_atomic_fmax:
49164915
case Intrinsic::amdgcn_flat_atomic_fmin_num:
49174916
case Intrinsic::amdgcn_flat_atomic_fmax_num:
4918-
case Intrinsic::amdgcn_atomic_cond_sub_u32:
49194917
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
49204918
case Intrinsic::amdgcn_global_load_tr_b64:
49214919
case Intrinsic::amdgcn_global_load_tr_b128:
@@ -5247,6 +5245,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
52475245
case AMDGPU::G_ATOMICRMW_FMAX:
52485246
case AMDGPU::G_ATOMICRMW_UINC_WRAP:
52495247
case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5248+
case AMDGPU::G_ATOMICRMW_USUB_COND:
5249+
case AMDGPU::G_ATOMICRMW_USUB_SAT:
52505250
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
52515251
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
52525252
OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,6 @@ def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
237237
def : SourceOfDivergence<int_r600_read_tidig_x>;
238238
def : SourceOfDivergence<int_r600_read_tidig_y>;
239239
def : SourceOfDivergence<int_r600_read_tidig_z>;
240-
def : SourceOfDivergence<int_amdgcn_atomic_cond_sub_u32>;
241-
def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
242240
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
243241
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
244242
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
@@ -264,7 +262,6 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd>;
264262
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmin>;
265263
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmax>;
266264
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
267-
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cond_sub_u32>;
268265
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_swap>;
269266
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_add>;
270267
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_sub>;
@@ -281,7 +278,6 @@ def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fadd>;
281278
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmin>;
282279
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmax>;
283280
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cmpswap>;
284-
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cond_sub_u32>;
285281
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
286282
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
287283
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
@@ -298,7 +294,6 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd>;
298294
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmin>;
299295
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmax>;
300296
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
301-
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cond_sub_u32>;
302297
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_swap>;
303298
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_add>;
304299
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_sub>;
@@ -315,7 +310,6 @@ def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fadd>;
315310
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmin>;
316311
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmax>;
317312
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cmpswap>;
318-
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cond_sub_u32>;
319313
def : SourceOfDivergence<int_amdgcn_ps_live>;
320314
def : SourceOfDivergence<int_amdgcn_live_mask>;
321315
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1128,7 +1128,7 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
11281128

11291129
let OtherPredicates = [HasGFX10_BEncoding] in {
11301130
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics <
1131-
"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
1131+
"buffer_atomic_csub", VGPR_32, i32
11321132
>;
11331133
}
11341134

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -734,17 +734,6 @@ defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32>;
734734
defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
735735
defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32>;
736736

737-
multiclass DSAtomicRetNoRetPatIntrinsic_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
738-
ValueType vt, string frag> {
739-
def : DSAtomicRetPat<inst, vt,
740-
!cast<PatFrag>(frag#"_local_addrspace")>;
741-
742-
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
743-
def : DSAtomicRetPat<noRetInst, vt,
744-
!cast<PatFrag>(frag#"_noret_local_addrspace"), /* complexity */ 1>;
745-
}
746-
747-
defm : DSAtomicRetNoRetPatIntrinsic_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "int_amdgcn_atomic_cond_sub_u32">;
748737
} // let SubtargetPredicate = isGFX12Plus
749738

750739
//===----------------------------------------------------------------------===//
@@ -1006,7 +995,34 @@ multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
1006995
}
1007996
}
1008997

998+
multiclass DSAtomicRetNoRetPatCondSub_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
999+
ValueType vt, string frag> {
1000+
let OtherPredicates = [LDSRequiresM0Init] in {
1001+
def : DSAtomicRetPat<inst, vt,
1002+
!cast<PatFrag>(frag#"_local_m0_"#vt)>;
1003+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1004+
def : DSAtomicRetPat<noRetInst, vt,
1005+
!cast<PatFrag>(frag#"_local_m0_noret_"#vt), /* complexity */ 1>;
1006+
}
1007+
1008+
let OtherPredicates = [NotLDSRequiresM0Init] in {
1009+
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1010+
!cast<PatFrag>(frag#"_local_"#vt)>;
1011+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1012+
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1013+
!cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
1014+
}
10091015

1016+
let OtherPredicates = [HasGDS] in {
1017+
def : DSAtomicRetPat<inst, vt,
1018+
!cast<PatFrag>(frag#"_region_m0_"#vt),
1019+
/* complexity */ 0, /* gds */ 1>;
1020+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1021+
def : DSAtomicRetPat<noRetInst, vt,
1022+
!cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1023+
/* complexity */ 1, /* gds */ 1>;
1024+
}
1025+
}
10101026

10111027
let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
10121028
// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
@@ -1089,6 +1105,14 @@ defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_l
10891105
defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_BF16, DS_PK_ADD_BF16, v2bf16, "atomic_load_fadd">;
10901106
}
10911107

1108+
let SubtargetPredicate = isGFX12Plus in {
1109+
1110+
defm : DSAtomicRetNoRetPatCondSub_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "atomic_load_usub_cond">;
1111+
1112+
defm : DSAtomicRetNoRetPat_mc<DS_SUB_CLAMP_RTN_U32, DS_SUB_CLAMP_U32, i32, "atomic_load_usub_sat">;
1113+
1114+
} // let SubtargetPredicate = isGFX12Plus
1115+
10921116
let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
10931117
defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
10941118
}

0 commit comments

Comments
 (0)