Skip to content

Commit 580d9a9

Browse files
committed
AMDGPU: Start selecting flat/global atomicrmw fmin/fmax.
Define subtarget features for atomic fmin/fmax support. The flat/global support is a real messe. We had float/double support at the beginning in gfx6 and gfx7. gfx8 removed these. gfx10 reintroduced them. gfx11 removed the f64 versions again. gfx9 partially reintroduced them, in gfx90a and gfx940 but only for f64.
1 parent 781d5cf commit 580d9a9

21 files changed

+8637
-11981
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts",
351351
"GFX90AInsts",
352352
"true",
353353
"Additional instructions for GFX90A+"
354+
// [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO
354355
>;
355356

356357
def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
@@ -711,6 +712,30 @@ def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts",
711712
[FeatureFlatGlobalInsts]
712713
>;
713714

715+
def FeatureAtomicFMinFMaxF32GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f32",
716+
"HasAtomicFMinFMaxF32GlobalInsts",
717+
"true",
718+
"Has global/buffer instructions for atomicrmw fmin/fmax for float"
719+
>;
720+
721+
def FeatureAtomicFMinFMaxF64GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f64",
722+
"HasAtomicFMinFMaxF64GlobalInsts",
723+
"true",
724+
"Has global/buffer instructions for atomicrmw fmin/fmax for float"
725+
>;
726+
727+
def FeatureAtomicFMinFMaxF32FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f32",
728+
"HasAtomicFMinFMaxF32FlatInsts",
729+
"true",
730+
"Has flat memory instructions for atomicrmw fmin/fmax for float"
731+
>;
732+
733+
def FeatureAtomicFMinFMaxF64FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f64",
734+
"HasAtomicFMinFMaxF64FlatInsts",
735+
"true",
736+
"Has flat memory instructions for atomicrmw fmin/fmax for double"
737+
>;
738+
714739
def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts",
715740
"HasAtomicFaddNoRtnInsts",
716741
"true",
@@ -1061,7 +1086,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
10611086
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10621087
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
10631088
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
1064-
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
1089+
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1090+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts
10651091
]
10661092
>;
10671093

@@ -1072,7 +1098,9 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
10721098
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
10731099
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10741100
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
1075-
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
1101+
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1102+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
1103+
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts
10761104
]
10771105
>;
10781106

@@ -1127,7 +1155,9 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
11271155
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
11281156
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
11291157
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1130-
FeatureMaxHardClauseLength63
1158+
FeatureMaxHardClauseLength63,
1159+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
1160+
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts
11311161
]
11321162
>;
11331163

@@ -1148,7 +1178,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
11481178
FeatureA16, FeatureFastDenormalF32, FeatureG16,
11491179
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
11501180
FeatureGWS, FeatureDefaultComponentZero,
1151-
FeatureMaxHardClauseLength32
1181+
FeatureMaxHardClauseLength32,
1182+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
11521183
]
11531184
>;
11541185

@@ -1168,7 +1199,9 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
11681199
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
11691200
FeatureA16, FeatureFastDenormalF32, FeatureG16,
11701201
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
1171-
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast
1202+
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast,
1203+
FeatureMaxHardClauseLength32,
1204+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
11721205
]
11731206
>;
11741207

@@ -1331,7 +1364,10 @@ def FeatureISAVersion9_0_A : FeatureSet<
13311364
FeaturePackedTID,
13321365
FullRate64Ops,
13331366
FeatureBackOffBarrier,
1334-
FeatureKernargPreload])>;
1367+
FeatureKernargPreload,
1368+
FeatureAtomicFMinFMaxF64GlobalInsts,
1369+
FeatureAtomicFMinFMaxF64FlatInsts
1370+
])>;
13351371

13361372
def FeatureISAVersion9_0_C : FeatureSet<
13371373
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
@@ -1371,7 +1407,10 @@ def FeatureISAVersion9_4_Common : FeatureSet<
13711407
FeatureArchitectedFlatScratch,
13721408
FullRate64Ops,
13731409
FeatureBackOffBarrier,
1374-
FeatureKernargPreload]>;
1410+
FeatureKernargPreload,
1411+
FeatureAtomicFMinFMaxF64GlobalInsts,
1412+
FeatureAtomicFMinFMaxF64FlatInsts
1413+
]>;
13751414

13761415
def FeatureISAVersion9_4_0 : FeatureSet<
13771416
!listconcat(FeatureISAVersion9_4_Common.Features,
@@ -1862,11 +1901,28 @@ def isGFX12Plus :
18621901
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
18631902
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
18641903

1865-
def HasBufferFlatGlobalAtomicsF64 :
1904+
1905+
def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
18661906
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
18671907
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
18681908
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
18691909

1910+
def HasAtomicFMinFMaxF32GlobalInsts :
1911+
Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,
1912+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32GlobalInsts)>;
1913+
1914+
def HasAtomicFMinFMaxF64GlobalInsts :
1915+
Predicate<"Subtarget->hasAtomicFMinFMaxF64GlobalInsts()">,
1916+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64GlobalInsts)>;
1917+
1918+
def HasAtomicFMinFMaxF32FlatInsts :
1919+
Predicate<"Subtarget->hasAtomicFMinFMaxF32FlatInsts()">,
1920+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32FlatInsts)>;
1921+
1922+
def HasAtomicFMinFMaxF64FlatInsts :
1923+
Predicate<"Subtarget->hasAtomicFMinFMaxF64FlatInsts()">,
1924+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64FlatInsts)>;
1925+
18701926
def HasLdsAtomicAddF64 :
18711927
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
18721928
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,21 +1149,21 @@ let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
11491149
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <
11501150
"buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag
11511151
>;
1152+
}
1153+
1154+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
11521155
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <
11531156
"buffer_atomic_fmin", VGPR_32, f32, null_frag
11541157
>;
11551158
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <
11561159
"buffer_atomic_fmax", VGPR_32, f32, null_frag
11571160
>;
1158-
11591161
}
11601162

11611163
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1162-
11631164
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
11641165
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
11651166
>;
1166-
11671167
}
11681168

11691169
let SubtargetPredicate = HasD16LoadStore in {
@@ -1645,6 +1645,16 @@ defm : BufferAtomicPat<"atomic_load_udec_wrap_global", Ty, "BUFFER_ATOMIC_DEC" #
16451645

16461646
} // end foreach Ty
16471647

1648+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
1649+
defm : BufferAtomicPat<"atomic_load_fmin_global", f32, "BUFFER_ATOMIC_FMIN">;
1650+
defm : BufferAtomicPat<"atomic_load_fmax_global", f32, "BUFFER_ATOMIC_FMAX">;
1651+
}
1652+
1653+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
1654+
defm : BufferAtomicPat<"atomic_load_fmin_global", f64, "BUFFER_ATOMIC_MIN_F64">;
1655+
defm : BufferAtomicPat<"atomic_load_fmax_global", f64, "BUFFER_ATOMIC_MAX_F64">;
1656+
}
1657+
16481658
defm : BufferAtomicCmpSwapPat<i32, v2i32, "BUFFER_ATOMIC_CMPSWAP">;
16491659
defm : BufferAtomicCmpSwapPat<i64, v2i64, "BUFFER_ATOMIC_CMPSWAP_X2">;
16501660

@@ -1746,11 +1756,12 @@ let SubtargetPredicate = HasAtomicCSubNoRtnInsts in {
17461756
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
17471757
}
17481758

1749-
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
1759+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
17501760
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
17511761
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
17521762
}
1753-
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1763+
1764+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
17541765
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
17551766
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
17561767
}
@@ -1822,9 +1833,12 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
18221833

18231834
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
18241835
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
1836+
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1837+
1838+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
18251839
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
18261840
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
1827-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1841+
} //End let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts
18281842

18291843
multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, string Inst> {
18301844
foreach RtnMode = ["ret", "noret"] in {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -752,19 +752,29 @@ defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
752752

753753
// GFX7-, GFX10-only flat instructions.
754754
let SubtargetPredicate = isGFX7GFX10 in {
755-
756755
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
757756
VReg_64, f64, v2f64, VReg_128>;
758-
759757
} // End SubtargetPredicate = isGFX7GFX10
760758

759+
760+
// The names may be flat_atomic_fmin_x2 on some subtargets, but we
761+
// choose this as the canonical name.
762+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
763+
defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo <"flat_atomic_min_f64",
764+
VReg_64, f64>;
765+
766+
defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo <"flat_atomic_max_f64",
767+
VReg_64, f64>;
768+
}
769+
770+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
771+
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
772+
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
773+
}
774+
761775
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
762776
defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
763-
defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
764-
defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
765777
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
766-
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
767-
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
768778
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
769779

770780
let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
@@ -1415,6 +1425,17 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
14151425
defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
14161426
defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
14171427
defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1428+
1429+
let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
1430+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_"#as, f32>;
1431+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_"#as, f32>;
1432+
}
1433+
1434+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
1435+
defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_"#as, f64>;
1436+
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
1437+
}
1438+
14181439
} // end foreach as
14191440

14201441
let SubtargetPredicate = isGFX12Plus in {
@@ -1576,33 +1597,22 @@ let OtherPredicates = [isGFX12Plus] in {
15761597
}
15771598
}
15781599

1579-
let OtherPredicates = [isGFX10Plus] in {
1600+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
15801601
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
15811602
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1582-
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1583-
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1584-
}
1585-
1586-
let OtherPredicates = [isGFX10GFX11] in {
15871603
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
15881604
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
1605+
}
15891606

1607+
let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
1608+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1609+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
15901610
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
15911611
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
15921612
}
15931613

1594-
let OtherPredicates = [isGFX10Only] in {
1595-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1596-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1597-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1598-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1599-
defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1600-
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1601-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1602-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1603-
}
1604-
16051614
let OtherPredicates = [isGFX12Only] in {
1615+
// FIXME: Remove these intrinsics
16061616
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
16071617
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
16081618
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
@@ -1632,22 +1642,26 @@ defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_am
16321642
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
16331643
}
16341644

1635-
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1636-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1645+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
16371646
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
16381647
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1639-
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1640-
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
16411648
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
16421649
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1643-
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1644-
defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1645-
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1646-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>;
1650+
}
1651+
1652+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
16471653
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
16481654
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
16491655
}
16501656

1657+
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1658+
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1659+
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1660+
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
1661+
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1662+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>;
1663+
}
1664+
16511665
let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
16521666
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
16531667
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
159159
bool HasFP8Insts = false;
160160
bool HasFP8ConversionInsts = false;
161161
bool HasPkFmacF16Inst = false;
162+
bool HasAtomicFMinFMaxF32GlobalInsts = false;
163+
bool HasAtomicFMinFMaxF64GlobalInsts = false;
164+
bool HasAtomicFMinFMaxF32FlatInsts = false;
165+
bool HasAtomicFMinFMaxF64FlatInsts = false;
162166
bool HasAtomicDsPkAdd16Insts = false;
163167
bool HasAtomicFlatPkAdd16Insts = false;
164168
bool HasAtomicFaddRtnInsts = false;
@@ -820,6 +824,22 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
820824
return HasPkFmacF16Inst;
821825
}
822826

827+
bool hasAtomicFMinFMaxF32GlobalInsts() const {
828+
return HasAtomicFMinFMaxF32GlobalInsts;
829+
}
830+
831+
bool hasAtomicFMinFMaxF64GlobalInsts() const {
832+
return HasAtomicFMinFMaxF64GlobalInsts;
833+
}
834+
835+
bool hasAtomicFMinFMaxF32FlatInsts() const {
836+
return HasAtomicFMinFMaxF32FlatInsts;
837+
}
838+
839+
bool hasAtomicFMinFMaxF64FlatInsts() const {
840+
return HasAtomicFMinFMaxF64FlatInsts;
841+
}
842+
823843
bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }
824844

825845
bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }

0 commit comments

Comments
 (0)