Skip to content

Commit 9f8e7c3

Browse files
authored
AMDGPU: Create pseudo to real mapping for flat/buffer atomic fmin/fmax (#95591)
The global/flat/buffer atomic fmin/fmax situation is a mess. These instructions have been renamed 3 times. We currently have separate pseudos defined for the same opcodes with the different names (e.g. GLOBAL_ATOMIC_MIN_F64 from gfx90a and GLOBAL_ATOMIC_FMIN_X2 from gfx10). Use the _FMIN versions as the canonical name for the f32 versions. Use the _MIN_F64 style as the canonical name for the f64 case. This is because gfx90a has the most sensible names, but does not have the f32 versions.t sho Wire through the pseudo to use for the instruction properties vs. the assembly name like in other cases. This will simplify handling of direct atomicrmw selection. This will simplify directly selecting these from atomicrmw.
1 parent cf5ce8c commit 9f8e7c3

File tree

4 files changed

+129
-125
lines changed

4 files changed

+129
-125
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1864,7 +1864,9 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
18641864

18651865
def HasBufferFlatGlobalAtomicsF64 :
18661866
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
1867-
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
1867+
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
1868+
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
1869+
18681870
def HasLdsAtomicAddF64 :
18691871
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
18701872
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 53 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,12 +1163,6 @@ let SubtargetPredicate = isGFX6GFX7GFX10 in {
11631163
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
11641164
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
11651165
>;
1166-
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <
1167-
"buffer_atomic_fmin_x2", VReg_64, f64, null_frag
1168-
>;
1169-
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
1170-
"buffer_atomic_fmax_x2", VReg_64, f64, null_frag
1171-
>;
11721166

11731167
}
11741168

@@ -1318,6 +1312,9 @@ let SubtargetPredicate = isGFX90APlus in {
13181312

13191313
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
13201314
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
1315+
1316+
// Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
1317+
// depending on some subtargets.
13211318
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
13221319
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
13231320
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
@@ -1751,8 +1748,8 @@ let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
17511748
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
17521749
}
17531750
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1754-
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_FMIN_X2">;
1755-
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_FMAX_X2">;
1751+
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
1752+
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
17561753
}
17571754

17581755
class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
@@ -2303,6 +2300,12 @@ let OtherPredicates = [HasPackedD16VMem] in {
23032300
// Target-specific instruction encodings.
23042301
//===----------------------------------------------------------------------===//
23052302

2303+
// Shortcut to default Mnemonic from BUF_Pseudo. Hides the cast to the
2304+
// specific pseudo (bothen in this case) since any of them will work.
2305+
class get_BUF_ps<string name> {
2306+
string Mnemonic = !cast<BUF_Pseudo>(name # "_OFFSET").Mnemonic;
2307+
}
2308+
23062309
//===----------------------------------------------------------------------===//
23072310
// Base ENC_MUBUF for GFX6, GFX7, GFX10, GFX11.
23082311
//===----------------------------------------------------------------------===//
@@ -2334,8 +2337,8 @@ multiclass MUBUF_Real_gfx11<bits<8> op, string real_name = !cast<MUBUF_Pseudo>(N
23342337
}
23352338
}
23362339

2337-
class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
2338-
Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef> {
2340+
class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef, string asmName> :
2341+
Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef, asmName> {
23392342
let Inst{12} = ps.offen;
23402343
let Inst{13} = ps.idxen;
23412344
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
@@ -2345,19 +2348,21 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
23452348
let Inst{55} = ps.tfe;
23462349
}
23472350

2348-
multiclass MUBUF_Real_gfx10<bits<8> op> {
2349-
defvar ps = !cast<MUBUF_Pseudo>(NAME);
2350-
def _gfx10 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
2351+
multiclass MUBUF_Real_gfx10<bits<8> op, string psName = NAME,
2352+
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
2353+
defvar ps = !cast<MUBUF_Pseudo>(psName);
2354+
def _gfx10 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10, asmName> {
23512355
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
23522356
let Inst{25} = op{7};
23532357
let AssemblerPredicate = isGFX10Only;
23542358
let DecoderNamespace = "GFX10";
23552359
}
23562360
}
23572361

2358-
multiclass MUBUF_Real_gfx6_gfx7<bits<8> op> {
2359-
defvar ps = !cast<MUBUF_Pseudo>(NAME);
2360-
def _gfx6_gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
2362+
multiclass MUBUF_Real_gfx6_gfx7<bits<8> op, string psName = NAME,
2363+
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
2364+
defvar ps = !cast<MUBUF_Pseudo>(psName);
2365+
def _gfx6_gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, asmName> {
23612366
let Inst{15} = ps.addr64;
23622367
let AssemblerPredicate = isGFX6GFX7;
23632368
let DecoderNamespace = "GFX6GFX7";
@@ -2366,7 +2371,7 @@ multiclass MUBUF_Real_gfx6_gfx7<bits<8> op> {
23662371

23672372
multiclass MUBUF_Real_gfx6<bits<8> op> {
23682373
defvar ps = !cast<MUBUF_Pseudo>(NAME);
2369-
def _gfx6 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
2374+
def _gfx6 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, ps.Mnemonic> {
23702375
let Inst{15} = ps.addr64;
23712376
let AssemblerPredicate = isGFX6;
23722377
let DecoderNamespace = "GFX6";
@@ -2375,7 +2380,7 @@ multiclass MUBUF_Real_gfx6<bits<8> op> {
23752380

23762381
multiclass MUBUF_Real_gfx7<bits<8> op> {
23772382
defvar ps = !cast<MUBUF_Pseudo>(NAME);
2378-
def _gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
2383+
def _gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, ps.Mnemonic> {
23792384
let Inst{15} = ps.addr64;
23802385
let AssemblerPredicate = isGFX7Only;
23812386
let DecoderNamespace = "GFX7";
@@ -2476,12 +2481,6 @@ multiclass VBUFFER_MTBUF_Real_gfx12<bits<4> op, string real_name> {
24762481
// MUBUF - GFX11, GFX12.
24772482
//===----------------------------------------------------------------------===//
24782483

2479-
// Shortcut to default Mnemonic from BUF_Pseudo. Hides the cast to the
2480-
// specific pseudo (bothen in this case) since any of them will work.
2481-
class get_BUF_ps<string name> {
2482-
string Mnemonic = !cast<BUF_Pseudo>(name # "_BOTHEN").Mnemonic;
2483-
}
2484-
24852484
// gfx11 instruction that accept both old and new assembler name.
24862485
class Mnem_gfx11_gfx12 <string mnemonic, string real_name> :
24872486
AMDGPUMnemonicAlias<mnemonic, real_name> {
@@ -2703,18 +2702,20 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op, bit isTFE = 0> {
27032702
defm _LDS_BOTHEN : MUBUF_Real_gfx10<op>;
27042703
}
27052704
}
2706-
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
2707-
defm _BOTHEN_RTN : MUBUF_Real_gfx10<op>;
2708-
defm _IDXEN_RTN : MUBUF_Real_gfx10<op>;
2709-
defm _OFFEN_RTN : MUBUF_Real_gfx10<op>;
2710-
defm _OFFSET_RTN : MUBUF_Real_gfx10<op>;
2705+
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op, string psName = NAME,
2706+
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
2707+
defm _BOTHEN_RTN : MUBUF_Real_gfx10<op, psName#"_BOTHEN_RTN", asmName>;
2708+
defm _IDXEN_RTN : MUBUF_Real_gfx10<op, psName#"_IDXEN_RTN", asmName>;
2709+
defm _OFFEN_RTN : MUBUF_Real_gfx10<op, psName#"_OFFEN_RTN", asmName>;
2710+
defm _OFFSET_RTN : MUBUF_Real_gfx10<op, psName#"_OFFSET_RTN", asmName>;
27112711
}
2712-
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
2713-
MUBUF_Real_Atomics_RTN_gfx10<op> {
2714-
defm _BOTHEN : MUBUF_Real_gfx10<op>;
2715-
defm _IDXEN : MUBUF_Real_gfx10<op>;
2716-
defm _OFFEN : MUBUF_Real_gfx10<op>;
2717-
defm _OFFSET : MUBUF_Real_gfx10<op>;
2712+
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op, string psName = NAME,
2713+
string asmName = get_BUF_ps<psName>.Mnemonic> :
2714+
MUBUF_Real_Atomics_RTN_gfx10<op, psName, asmName> {
2715+
defm _BOTHEN : MUBUF_Real_gfx10<op, psName#"_BOTHEN", asmName>;
2716+
defm _IDXEN : MUBUF_Real_gfx10<op, psName#"_IDXEN", asmName>;
2717+
defm _OFFEN : MUBUF_Real_gfx10<op, psName#"_OFFEN", asmName>;
2718+
defm _OFFSET : MUBUF_Real_gfx10<op, psName#"_OFFSET", asmName>;
27182719
}
27192720

27202721
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
@@ -2769,18 +2770,18 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op, bit isTFE = 0> {
27692770
defm _LDS_BOTHEN : MUBUF_Real_gfx6_gfx7<op>;
27702771
}
27712772
}
2772-
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> {
2773-
defm _ADDR64 : MUBUF_Real_gfx6_gfx7<op>;
2774-
defm _BOTHEN : MUBUF_Real_gfx6_gfx7<op>;
2775-
defm _IDXEN : MUBUF_Real_gfx6_gfx7<op>;
2776-
defm _OFFEN : MUBUF_Real_gfx6_gfx7<op>;
2777-
defm _OFFSET : MUBUF_Real_gfx6_gfx7<op>;
2773+
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op, string psName, string asmName> {
2774+
defm _ADDR64 : MUBUF_Real_gfx6_gfx7<op, psName#"_ADDR64", asmName>;
2775+
defm _BOTHEN : MUBUF_Real_gfx6_gfx7<op, psName#"_BOTHEN", asmName>;
2776+
defm _IDXEN : MUBUF_Real_gfx6_gfx7<op, psName#"_IDXEN", asmName>;
2777+
defm _OFFEN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFEN", asmName>;
2778+
defm _OFFSET : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFSET", asmName>;
27782779

2779-
defm _ADDR64_RTN : MUBUF_Real_gfx6_gfx7<op>;
2780-
defm _BOTHEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
2781-
defm _IDXEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
2782-
defm _OFFEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
2783-
defm _OFFSET_RTN : MUBUF_Real_gfx6_gfx7<op>;
2780+
defm _ADDR64_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_ADDR64_RTN", asmName>;
2781+
defm _BOTHEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_BOTHEN_RTN", asmName>;
2782+
defm _IDXEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_IDXEN_RTN", asmName>;
2783+
defm _OFFEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFEN_RTN", asmName>;
2784+
defm _OFFSET_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFSET_RTN", asmName>;
27842785
}
27852786

27862787
multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
@@ -2795,8 +2796,10 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> {
27952796
defm _TFE : MUBUF_Real_AllAddr_Lds_Helper_gfx6_gfx7_gfx10<op, 1>;
27962797
}
27972798

2798-
multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
2799-
MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
2799+
multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op, string psName = NAME,
2800+
string asmName = get_BUF_ps<psName>.Mnemonic> :
2801+
MUBUF_Real_Atomics_gfx6_gfx7<op, psName, asmName>,
2802+
MUBUF_Real_Atomics_gfx10<op, psName, asmName>;
28002803

28012804
// FIXME-GFX6: Following instructions are available only on GFX6.
28022805
//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>;
@@ -2856,8 +2859,8 @@ defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
28562859
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
28572860
// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
28582861
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
2859-
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
2860-
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
2862+
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f, "BUFFER_ATOMIC_MIN_F64", "buffer_atomic_fmin_x2">;
2863+
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060, "BUFFER_ATOMIC_MAX_F64", "buffer_atomic_fmax_x2">;
28612864

28622865
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_gfx10<0x034>;
28632866

0 commit comments

Comments
 (0)