Skip to content

Commit d4bb0b2

Browse files
committed
AMDGPU: Remove flat/global fmin/fmax intrinsics
These have been replaced with atomicrmw
1 parent 3c6041d commit d4bb0b2

18 files changed

+127
-1178
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2966,11 +2966,6 @@ def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v
29662966
// gfx90a intrinsics
29672967
// ===----------------------------------------------------------------------===//
29682968

2969-
def int_amdgcn_global_atomic_fmin : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
2970-
def int_amdgcn_global_atomic_fmax : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
2971-
def int_amdgcn_flat_atomic_fmin : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
2972-
def int_amdgcn_flat_atomic_fmax : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
2973-
29742969
defset list<Intrinsic> AMDGPUMFMAIntrinsics90A = {
29752970
def int_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4i16_ty>;
29762971
def int_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,14 +1041,17 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
10411041
break; // No other 'amdgcn.atomic.*'
10421042
}
10431043

1044-
if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
1045-
Name.starts_with("ds.fmax") ||
1046-
Name.starts_with("global.atomic.fadd") ||
1047-
Name.starts_with("flat.atomic.fadd")) {
1048-
// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1049-
// declaration.
1050-
NewFn = nullptr;
1051-
return true;
1044+
if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1045+
Name.consume_front("flat.atomic.")) {
1046+
if (Name.starts_with("fadd") ||
1047+
// FIXME: We should also remove fmin.num and fmax.num intrinsics.
1048+
(Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1049+
(Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1050+
// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1051+
// declaration.
1052+
NewFn = nullptr;
1053+
return true;
1054+
}
10521055
}
10531056

10541057
if (Name.starts_with("ldexp.")) {
@@ -4218,7 +4221,11 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
42184221
.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
42194222
.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
42204223
.StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4221-
.StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd);
4224+
.StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4225+
.StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4226+
.StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4227+
.StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4228+
.StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
42224229

42234230
unsigned NumOperands = CI->getNumOperands();
42244231
if (NumOperands < 3) // Malformed bitcode.

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -618,10 +618,6 @@ multiclass local_addr_space_atomic_op {
618618
}
619619
}
620620

621-
defm int_amdgcn_flat_atomic_fmin : noret_op;
622-
defm int_amdgcn_flat_atomic_fmax : noret_op;
623-
defm int_amdgcn_global_atomic_fmin : noret_op;
624-
defm int_amdgcn_global_atomic_fmax : noret_op;
625621
defm int_amdgcn_global_atomic_csub : noret_op;
626622
defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
627623
defm int_amdgcn_flat_atomic_fmin_num : noret_op;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4913,12 +4913,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
49134913
break;
49144914
}
49154915
case Intrinsic::amdgcn_global_atomic_csub:
4916-
case Intrinsic::amdgcn_global_atomic_fmin:
4917-
case Intrinsic::amdgcn_global_atomic_fmax:
49184916
case Intrinsic::amdgcn_global_atomic_fmin_num:
49194917
case Intrinsic::amdgcn_global_atomic_fmax_num:
4920-
case Intrinsic::amdgcn_flat_atomic_fmin:
4921-
case Intrinsic::amdgcn_flat_atomic_fmax:
49224918
case Intrinsic::amdgcn_flat_atomic_fmin_num:
49234919
case Intrinsic::amdgcn_flat_atomic_fmax_num:
49244920
case Intrinsic::amdgcn_atomic_cond_sub_u32:

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,13 +239,9 @@ def : SourceOfDivergence<int_r600_read_tidig_y>;
239239
def : SourceOfDivergence<int_r600_read_tidig_z>;
240240
def : SourceOfDivergence<int_amdgcn_atomic_cond_sub_u32>;
241241
def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
242-
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
243-
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
244242
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
245243
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>;
246244
def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>;
247-
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
248-
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
249245
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;
250246
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>;
251247
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,8 +1045,6 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
10451045
switch (IID) {
10461046
case Intrinsic::amdgcn_is_shared:
10471047
case Intrinsic::amdgcn_is_private:
1048-
case Intrinsic::amdgcn_flat_atomic_fmax:
1049-
case Intrinsic::amdgcn_flat_atomic_fmin:
10501048
case Intrinsic::amdgcn_flat_atomic_fmax_num:
10511049
case Intrinsic::amdgcn_flat_atomic_fmin_num:
10521050
OpIndexes.push_back(0);
@@ -1106,8 +1104,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
11061104
return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},
11071105
{NewV, MaskOp});
11081106
}
1109-
case Intrinsic::amdgcn_flat_atomic_fmax:
1110-
case Intrinsic::amdgcn_flat_atomic_fmin:
11111107
case Intrinsic::amdgcn_flat_atomic_fmax_num:
11121108
case Intrinsic::amdgcn_flat_atomic_fmin_num: {
11131109
Type *DestTy = II->getType();

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,15 +1604,11 @@ let OtherPredicates = [isGFX12Plus] in {
16041604
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
16051605
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
16061606
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1607-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
1608-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
16091607
}
16101608

16111609
let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
16121610
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
16131611
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1614-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
1615-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
16161612
}
16171613

16181614
let OtherPredicates = [isGFX12Only] in {
@@ -1642,13 +1638,6 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_globa
16421638
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
16431639
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
16441640
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1645-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1646-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1647-
}
1648-
1649-
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
1650-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1651-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
16521641
}
16531642

16541643
let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,13 +1367,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
13671367
MachineMemOperand::MODereferenceable;
13681368
return true;
13691369
}
1370-
case Intrinsic::amdgcn_global_atomic_fmin:
1371-
case Intrinsic::amdgcn_global_atomic_fmax:
13721370
case Intrinsic::amdgcn_global_atomic_fmin_num:
13731371
case Intrinsic::amdgcn_global_atomic_fmax_num:
13741372
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
1375-
case Intrinsic::amdgcn_flat_atomic_fmin:
1376-
case Intrinsic::amdgcn_flat_atomic_fmax:
13771373
case Intrinsic::amdgcn_flat_atomic_fmin_num:
13781374
case Intrinsic::amdgcn_flat_atomic_fmax_num:
13791375
case Intrinsic::amdgcn_atomic_cond_sub_u32: {
@@ -1485,14 +1481,10 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
14851481
case Intrinsic::amdgcn_ds_consume:
14861482
case Intrinsic::amdgcn_ds_ordered_add:
14871483
case Intrinsic::amdgcn_ds_ordered_swap:
1488-
case Intrinsic::amdgcn_flat_atomic_fmax:
14891484
case Intrinsic::amdgcn_flat_atomic_fmax_num:
1490-
case Intrinsic::amdgcn_flat_atomic_fmin:
14911485
case Intrinsic::amdgcn_flat_atomic_fmin_num:
14921486
case Intrinsic::amdgcn_global_atomic_csub:
1493-
case Intrinsic::amdgcn_global_atomic_fmax:
14941487
case Intrinsic::amdgcn_global_atomic_fmax_num:
1495-
case Intrinsic::amdgcn_global_atomic_fmin:
14961488
case Intrinsic::amdgcn_global_atomic_fmin_num:
14971489
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
14981490
case Intrinsic::amdgcn_global_load_tr_b64:
@@ -9397,12 +9389,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
93979389
DAG.setNodeMemRefs(NewNode, {MemRef});
93989390
return SDValue(NewNode, 0);
93999391
}
9400-
case Intrinsic::amdgcn_global_atomic_fmin:
9401-
case Intrinsic::amdgcn_global_atomic_fmax:
94029392
case Intrinsic::amdgcn_global_atomic_fmin_num:
94039393
case Intrinsic::amdgcn_global_atomic_fmax_num:
9404-
case Intrinsic::amdgcn_flat_atomic_fmin:
9405-
case Intrinsic::amdgcn_flat_atomic_fmax:
94069394
case Intrinsic::amdgcn_flat_atomic_fmin_num:
94079395
case Intrinsic::amdgcn_flat_atomic_fmax_num: {
94089396
MemSDNode *M = cast<MemSDNode>(Op);
@@ -9413,16 +9401,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94139401
};
94149402
unsigned Opcode = 0;
94159403
switch (IntrID) {
9416-
case Intrinsic::amdgcn_global_atomic_fmin:
94179404
case Intrinsic::amdgcn_global_atomic_fmin_num:
9418-
case Intrinsic::amdgcn_flat_atomic_fmin:
94199405
case Intrinsic::amdgcn_flat_atomic_fmin_num: {
94209406
Opcode = ISD::ATOMIC_LOAD_FMIN;
94219407
break;
94229408
}
9423-
case Intrinsic::amdgcn_global_atomic_fmax:
94249409
case Intrinsic::amdgcn_global_atomic_fmax_num:
9425-
case Intrinsic::amdgcn_flat_atomic_fmax:
94269410
case Intrinsic::amdgcn_flat_atomic_fmax_num: {
94279411
Opcode = ISD::ATOMIC_LOAD_FMAX;
94289412
break;

llvm/test/Bitcode/amdgcn-atomic.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,70 @@ define float @upgrade_amdgcn_global_atomic_fadd_f32_p1_f32(ptr addrspace(1) %ptr
354354
ret float %result
355355
}
356356

357+
declare float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr nocapture, float) #0
358+
359+
define float @upgrade_amdgcn_flat_atomic_fmin_f32_p0_f32(ptr %ptr, float %data) {
360+
; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
361+
%result = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %ptr, float %data)
362+
ret float %result
363+
}
364+
365+
declare float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) nocapture, float) #0
366+
367+
define float @upgrade_amdgcn_global_atomic_fmin_f32_p1_f32(ptr addrspace(1) %ptr, float %data) {
368+
; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
369+
%result = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
370+
ret float %result
371+
}
372+
373+
declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr nocapture, double) #0
374+
375+
define double @upgrade_amdgcn_flat_atomic_fmin_f64_p0_f64(ptr %ptr, double %data) {
376+
; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
377+
%result = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
378+
ret double %result
379+
}
380+
381+
declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) nocapture, double) #0
382+
383+
define double @upgrade_amdgcn_global_atomic_fmin_f64_p1_f64(ptr addrspace(1) %ptr, double %data) {
384+
; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
385+
%result = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
386+
ret double %result
387+
}
388+
389+
declare float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr nocapture, float) #0
390+
391+
define float @upgrade_amdgcn_flat_atomic_fmax_f32_p0_f32(ptr %ptr, float %data) {
392+
; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
393+
%result = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %ptr, float %data)
394+
ret float %result
395+
}
396+
397+
declare float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) nocapture, float) #0
398+
399+
define float @upgrade_amdgcn_global_atomic_fmax_f32_p1_f32(ptr addrspace(1) %ptr, float %data) {
400+
; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
401+
%result = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
402+
ret float %result
403+
}
404+
405+
declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr nocapture, double) #0
406+
407+
define double @upgrade_amdgcn_flat_atomic_fmax_f64_p0_f64(ptr %ptr, double %data) {
408+
; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
409+
%result = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
410+
ret double %result
411+
}
412+
413+
declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) nocapture, double) #0
414+
415+
define double @upgrade_amdgcn_global_atomic_fmax_f64_p1_f64(ptr addrspace(1) %ptr, double %data) {
416+
; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
417+
%result = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
418+
ret double %result
419+
}
420+
357421
attributes #0 = { argmemonly nounwind willreturn }
358422

359423
; CHECK: !0 = !{i32 5, i32 6}

0 commit comments

Comments
 (0)