Skip to content

Commit 5e389ee

Browse files
committed
AMDGPU: Start selecting flat/global atomicrmw fmin/fmax.
Define subtarget features for atomic fmin/fmax support. The flat/global support is a real messe. We had float/double support at the beginning in gfx6 and gfx7. gfx8 removed these. gfx10 reintroduced them. gfx11 removed the f64 versions again. gfx9 partially reintroduced them, in gfx90a and gfx940 but only for f64.
1 parent a5b973d commit 5e389ee

21 files changed

+8604
-12036
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts",
351351
"GFX90AInsts",
352352
"true",
353353
"Additional instructions for GFX90A+"
354+
// [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO
354355
>;
355356

356357
def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
@@ -711,6 +712,30 @@ def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts",
711712
[FeatureFlatGlobalInsts]
712713
>;
713714

715+
def FeatureAtomicFMinFMaxF32GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f32",
716+
"HasAtomicFMinFMaxF32GlobalInsts",
717+
"true",
718+
"Has global/buffer instructions for atomicrmw fmin/fmax for float"
719+
>;
720+
721+
def FeatureAtomicFMinFMaxF64GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f64",
722+
"HasAtomicFMinFMaxF64GlobalInsts",
723+
"true",
724+
"Has global/buffer instructions for atomicrmw fmin/fmax for float"
725+
>;
726+
727+
def FeatureAtomicFMinFMaxF32FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f32",
728+
"HasAtomicFMinFMaxF32FlatInsts",
729+
"true",
730+
"Has flat memory instructions for atomicrmw fmin/fmax for float"
731+
>;
732+
733+
def FeatureAtomicFMinFMaxF64FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f64",
734+
"HasAtomicFMinFMaxF64FlatInsts",
735+
"true",
736+
"Has flat memory instructions for atomicrmw fmin/fmax for double"
737+
>;
738+
714739
def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts",
715740
"HasAtomicFaddNoRtnInsts",
716741
"true",
@@ -1061,7 +1086,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
10611086
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10621087
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
10631088
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
1064-
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
1089+
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1090+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts
10651091
]
10661092
>;
10671093

@@ -1072,7 +1098,9 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
10721098
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
10731099
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10741100
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
1075-
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
1101+
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1102+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
1103+
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts
10761104
]
10771105
>;
10781106

@@ -1127,7 +1155,9 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
11271155
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
11281156
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
11291157
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1130-
FeatureMaxHardClauseLength63
1158+
FeatureMaxHardClauseLength63,
1159+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
1160+
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts
11311161
]
11321162
>;
11331163

@@ -1148,7 +1178,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
11481178
FeatureA16, FeatureFastDenormalF32, FeatureG16,
11491179
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
11501180
FeatureGWS, FeatureDefaultComponentZero,
1151-
FeatureMaxHardClauseLength32
1181+
FeatureMaxHardClauseLength32,
1182+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
11521183
]
11531184
>;
11541185

@@ -1168,7 +1199,9 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
11681199
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
11691200
FeatureA16, FeatureFastDenormalF32, FeatureG16,
11701201
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
1171-
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast
1202+
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast,
1203+
FeatureMaxHardClauseLength32,
1204+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
11721205
]
11731206
>;
11741207

@@ -1331,7 +1364,10 @@ def FeatureISAVersion9_0_A : FeatureSet<
13311364
FeaturePackedTID,
13321365
FullRate64Ops,
13331366
FeatureBackOffBarrier,
1334-
FeatureKernargPreload])>;
1367+
FeatureKernargPreload,
1368+
FeatureAtomicFMinFMaxF64GlobalInsts,
1369+
FeatureAtomicFMinFMaxF64FlatInsts
1370+
])>;
13351371

13361372
def FeatureISAVersion9_0_C : FeatureSet<
13371373
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
@@ -1371,7 +1407,10 @@ def FeatureISAVersion9_4_Common : FeatureSet<
13711407
FeatureArchitectedFlatScratch,
13721408
FullRate64Ops,
13731409
FeatureBackOffBarrier,
1374-
FeatureKernargPreload]>;
1410+
FeatureKernargPreload,
1411+
FeatureAtomicFMinFMaxF64GlobalInsts,
1412+
FeatureAtomicFMinFMaxF64FlatInsts
1413+
]>;
13751414

13761415
def FeatureISAVersion9_4_0 : FeatureSet<
13771416
!listconcat(FeatureISAVersion9_4_Common.Features,
@@ -1862,11 +1901,28 @@ def isGFX12Plus :
18621901
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
18631902
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
18641903

1865-
def HasBufferFlatGlobalAtomicsF64 :
1904+
1905+
def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
18661906
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
18671907
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
18681908
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
18691909

1910+
def HasAtomicFMinFMaxF32GlobalInsts :
1911+
Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,
1912+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32GlobalInsts)>;
1913+
1914+
def HasAtomicFMinFMaxF64GlobalInsts :
1915+
Predicate<"Subtarget->hasAtomicFMinFMaxF64GlobalInsts()">,
1916+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64GlobalInsts)>;
1917+
1918+
def HasAtomicFMinFMaxF32FlatInsts :
1919+
Predicate<"Subtarget->hasAtomicFMinFMaxF32FlatInsts()">,
1920+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32FlatInsts)>;
1921+
1922+
def HasAtomicFMinFMaxF64FlatInsts :
1923+
Predicate<"Subtarget->hasAtomicFMinFMaxF64FlatInsts()">,
1924+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64FlatInsts)>;
1925+
18701926
def HasLdsAtomicAddF64 :
18711927
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
18721928
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,21 +1149,21 @@ let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
11491149
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <
11501150
"buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag
11511151
>;
1152+
}
1153+
1154+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
11521155
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <
11531156
"buffer_atomic_fmin", VGPR_32, f32, null_frag
11541157
>;
11551158
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <
11561159
"buffer_atomic_fmax", VGPR_32, f32, null_frag
11571160
>;
1158-
11591161
}
11601162

11611163
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1162-
11631164
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
11641165
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
11651166
>;
1166-
11671167
}
11681168

11691169
let SubtargetPredicate = HasD16LoadStore in {
@@ -1657,6 +1657,16 @@ defm : BufferAtomicPat<"atomic_load_udec_wrap_global", Ty, "BUFFER_ATOMIC_DEC" #
16571657

16581658
} // end foreach Ty
16591659

1660+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
1661+
defm : BufferAtomicPat<"atomic_load_fmin_global", f32, "BUFFER_ATOMIC_FMIN">;
1662+
defm : BufferAtomicPat<"atomic_load_fmax_global", f32, "BUFFER_ATOMIC_FMAX">;
1663+
}
1664+
1665+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
1666+
defm : BufferAtomicPat<"atomic_load_fmin_global", f64, "BUFFER_ATOMIC_MIN_F64">;
1667+
defm : BufferAtomicPat<"atomic_load_fmax_global", f64, "BUFFER_ATOMIC_MAX_F64">;
1668+
}
1669+
16601670
defm : BufferAtomicCmpSwapPat<i32, v2i32, "BUFFER_ATOMIC_CMPSWAP">;
16611671
defm : BufferAtomicCmpSwapPat<i64, v2i64, "BUFFER_ATOMIC_CMPSWAP_X2">;
16621672

@@ -1711,7 +1721,7 @@ multiclass SIBufferAtomicPat_Common<string OpPrefix, ValueType vt, string Inst,
17111721

17121722
multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
17131723
list<string> RtnModes = ["ret", "noret"]> {
1714-
let SubtargetPredicate = HasUnrestrictedSOffset in {
1724+
let OtherPredicates = [HasUnrestrictedSOffset] in {
17151725
defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst, RtnModes>;
17161726
}
17171727
defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst # "_VBUFFER", RtnModes>;
@@ -1744,22 +1754,24 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">;
17441754
defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
17451755
defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
17461756

1747-
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1757+
let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
17481758
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;
17491759

17501760
let SubtargetPredicate = isGFX12Plus in {
17511761
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd_bf16", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
17521762
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>;
1763+
}
17531764

1754-
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1765+
let SubtargetPredicate = HasAtomicCSubNoRtnInsts in {
17551766
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
17561767
}
17571768

1758-
let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
1769+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
17591770
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
17601771
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
17611772
}
1762-
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1773+
1774+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
17631775
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
17641776
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
17651777
}
@@ -1815,7 +1827,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
18151827
defm : BufferAtomicPatterns_NO_RTN_Common<name, vt, opcode # "_VBUFFER">;
18161828
}
18171829

1818-
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
1830+
let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
18191831
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["noret"]>;
18201832

18211833
let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
@@ -1826,7 +1838,7 @@ let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
18261838
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["noret"]>;
18271839
} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts]
18281840

1829-
let OtherPredicates = [HasAtomicFaddRtnInsts] in
1841+
let SubtargetPredicate = HasAtomicFaddRtnInsts in
18301842
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["ret"]>;
18311843

18321844
let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
@@ -1837,11 +1849,14 @@ let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
18371849
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["ret"]>;
18381850
} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts]
18391851

1840-
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1852+
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
18411853
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
1854+
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1855+
1856+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
18421857
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
18431858
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
1844-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1859+
} //End let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts
18451860

18461861
multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, string Inst> {
18471862
foreach RtnMode = ["ret", "noret"] in {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -752,19 +752,29 @@ defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
752752

753753
// GFX7-, GFX10-only flat instructions.
754754
let SubtargetPredicate = isGFX7GFX10 in {
755-
756755
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
757756
VReg_64, f64, v2f64, VReg_128>;
758-
759757
} // End SubtargetPredicate = isGFX7GFX10
760758

759+
760+
// The names may be flat_atomic_fmin_x2 on some subtargets, but we
761+
// choose this as the canonical name.
762+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
763+
defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo <"flat_atomic_min_f64",
764+
VReg_64, f64>;
765+
766+
defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo <"flat_atomic_max_f64",
767+
VReg_64, f64>;
768+
}
769+
770+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
771+
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
772+
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
773+
}
774+
761775
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
762776
defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
763-
defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
764-
defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
765777
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
766-
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
767-
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
768778
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
769779

770780
let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
@@ -1421,6 +1431,17 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
14211431
defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
14221432
defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
14231433
defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1434+
1435+
let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
1436+
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_"#as, f32>;
1437+
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_"#as, f32>;
1438+
}
1439+
1440+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
1441+
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_"#as, f64>;
1442+
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
1443+
}
1444+
14241445
} // end foreach as
14251446

14261447
let SubtargetPredicate = isGFX12Plus in {
@@ -1582,33 +1603,33 @@ let OtherPredicates = [isGFX12Plus] in {
15821603
}
15831604
}
15841605

1585-
let OtherPredicates = [isGFX10Plus] in {
1606+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
15861607
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
15871608
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1588-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1589-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1590-
}
1591-
1592-
let OtherPredicates = [isGFX10GFX11] in {
15931609
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
15941610
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
1611+
}
15951612

1613+
let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
1614+
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1615+
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
15961616
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
15971617
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
15981618
}
15991619

1600-
let OtherPredicates = [isGFX10Only] in {
1601-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1602-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1603-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1604-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1605-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1606-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1607-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1608-
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1609-
}
1620+
// let OtherPredicates = [isGFX10Only] in { // fixme
1621+
// defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1622+
// defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1623+
// defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1624+
// defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1625+
// defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1626+
// defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1627+
// defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1628+
// defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1629+
// }
16101630

16111631
let OtherPredicates = [isGFX12Only] in {
1632+
// FIXME: Remove these intrinsics
16121633
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
16131634
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
16141635
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
@@ -1638,22 +1659,26 @@ defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_am
16381659
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
16391660
}
16401661

1641-
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1642-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1662+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
16431663
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
16441664
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1645-
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1646-
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
16471665
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
16481666
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1649-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1650-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1651-
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1652-
defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>;
1667+
}
1668+
1669+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
16531670
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
16541671
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
16551672
}
16561673

1674+
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1675+
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1676+
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1677+
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
1678+
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1679+
defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>;
1680+
}
1681+
16571682
let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
16581683
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
16591684
defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>;

0 commit comments

Comments
 (0)