Skip to content

Commit ece3239

Browse files
committed
AMDGPU: Add subtarget feature for memory atomic fadd f64
1 parent 1a5d8b8 commit ece3239

File tree

5 files changed

+31
-18
lines changed

5 files changed

+31
-18
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
788788
"Has flat_atomic_add_f32 instruction"
789789
>;
790790

791+
def FeatureFlatBufferGlobalAtomicFaddF64Inst
792+
: SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
793+
"HasFlatBufferGlobalAtomicFaddF64Inst",
794+
"true",
795+
"Has flat, buffer, and global instructions for f64 atomic fadd"
796+
>;
797+
791798
def FeatureMemoryAtomicFAddF32DenormalSupport
792799
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
793800
"HasMemoryAtomicFaddF32DenormalSupport",
@@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
13901397
FeatureBackOffBarrier,
13911398
FeatureKernargPreload,
13921399
FeatureAtomicFMinFMaxF64GlobalInsts,
1393-
FeatureAtomicFMinFMaxF64FlatInsts
1400+
FeatureAtomicFMinFMaxF64FlatInsts,
1401+
FeatureFlatBufferGlobalAtomicFaddF64Inst
13941402
])>;
13951403

13961404
def FeatureISAVersion9_0_C : FeatureSet<
@@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14351443
FeatureAtomicFMinFMaxF64GlobalInsts,
14361444
FeatureAtomicFMinFMaxF64FlatInsts,
14371445
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
1438-
FeatureMemoryAtomicFAddF32DenormalSupport
1446+
FeatureMemoryAtomicFAddF32DenormalSupport,
1447+
FeatureFlatBufferGlobalAtomicFaddF64Inst
14391448
]>;
14401449

14411450
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1932,11 +1941,9 @@ def isGFX12Plus :
19321941
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
19331942
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
19341943

1935-
1936-
def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
1937-
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
1938-
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
1939-
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
1944+
def HasFlatBufferGlobalAtomicFaddF64Inst :
1945+
Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">,
1946+
AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>;
19401947

19411948
def HasAtomicFMinFMaxF32GlobalInsts :
19421949
Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in {
13121312
}
13131313
} // End SubtargetPredicate = isGFX90APlus
13141314

1315-
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
1315+
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
13161316
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
1317+
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
13171318

1319+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
13181320
// Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
13191321
// depending on some subtargets.
13201322
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
13211323
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
1322-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1324+
}
13231325

13241326
def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
13251327
let SubtargetPredicate = isGFX940Plus;
@@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
18361838
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
18371839
} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts
18381840

1839-
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
1841+
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
18401842
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
1841-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1843+
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
18421844

18431845
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
18441846
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -772,10 +772,10 @@ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64",
772772
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
773773
}
774774

775-
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
775+
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
776776
defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
777777
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
778-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
778+
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
779779

780780
let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
781781
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>;
@@ -1655,7 +1655,7 @@ defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin",
16551655
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
16561656
}
16571657

1658-
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1658+
let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
16591659
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
16601660
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
16611661
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
174174
bool HasAtomicGlobalPkAddBF16Inst = false;
175175
bool HasAtomicBufferPkAddBF16Inst = false;
176176
bool HasFlatAtomicFaddF32Inst = false;
177+
bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
177178
bool HasDefaultComponentZero = false;
178179
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
179180
bool HasDefaultComponentBroadcast = false;
@@ -660,9 +661,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
660661
return GFX10_BEncoding;
661662
}
662663

663-
// BUFFER/FLAT/GLOBAL_ATOMIC_ADD/MIN/MAX_F64
664-
bool hasBufferFlatGlobalAtomicsF64() const { return hasGFX90AInsts(); }
665-
666664
bool hasExportInsts() const {
667665
return !hasGFX940Insts();
668666
}
@@ -873,6 +871,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
873871

874872
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
875873

874+
/// \return true if the target has flat, global, and buffer atomic fadd for
875+
/// double.
876+
bool hasFlatBufferGlobalAtomicFaddF64Inst() const {
877+
return HasFlatBufferGlobalAtomicFaddF64Inst;
878+
}
879+
876880
/// \return true if the target's flat, global, and buffer atomic fadd for
877881
/// float supports denormal handling.
878882
bool hasMemoryAtomicFaddF32DenormalSupport() const {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16216,7 +16216,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1621616216
return AtomicExpansionKind::CmpXChg;
1621716217

1621816218
// global and flat atomic fadd f64: gfx90a, gfx940.
16219-
if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
16219+
if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
1622016220
return ReportUnsafeHWInst(AtomicExpansionKind::None);
1622116221

1622216222
if (AS != AMDGPUAS::FLAT_ADDRESS) {

0 commit comments

Comments
 (0)