Skip to content

Commit b00ad0d

Browse files
committed
AMDGPU: Create pseudo to real mapping for flat/buffer atomic fmin/fmax
The global/flat/buffer atomic fmin/fmax situation is a mess. These instructions have been renamed 3 times. We currently have separate pseudos defined for the same opcodes with the different names (e.g. GLOBAL_ATOMIC_MIN_F64 from gfx90a and GLOBAL_ATOMIC_FMIN_X2 from gfx10). Use the _FMIN versions as the canonical name for the f32 versions. Use the _MIN_F64 style as the canonical name for the f64 case. This is because gfx90a has the most sensible names, but does not have the f32 versions.t sho Wire through the pseudo to use for the instruction properties vs. the assembly name like in other cases. This will simplify handling of direct atomicrmw selection. This will simplify directly selecting these from atomicrmw.
1 parent 2a29fa0 commit b00ad0d

File tree

4 files changed

+129
-125
lines changed

4 files changed

+129
-125
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1864,7 +1864,9 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
18641864

18651865
def HasBufferFlatGlobalAtomicsF64 :
18661866
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
1867-
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
1867+
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
1868+
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
1869+
18681870
def HasLdsAtomicAddF64 :
18691871
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
18701872
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 53 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,12 +1163,6 @@ let SubtargetPredicate = isGFX6GFX7GFX10 in {
11631163
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
11641164
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
11651165
>;
1166-
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <
1167-
"buffer_atomic_fmin_x2", VReg_64, f64, null_frag
1168-
>;
1169-
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
1170-
"buffer_atomic_fmax_x2", VReg_64, f64, null_frag
1171-
>;
11721166

11731167
}
11741168

@@ -1318,6 +1312,9 @@ let SubtargetPredicate = isGFX90APlus in {
13181312

13191313
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
13201314
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
1315+
1316+
// Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
1317+
// depending on some subtargets.
13211318
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
13221319
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
13231320
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
@@ -1763,8 +1760,8 @@ let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
17631760
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
17641761
}
17651762
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1766-
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_FMIN_X2">;
1767-
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_FMAX_X2">;
1763+
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
1764+
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
17681765
}
17691766

17701767
class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
@@ -2315,6 +2312,12 @@ let OtherPredicates = [HasPackedD16VMem] in {
23152312
// Target-specific instruction encodings.
23162313
//===----------------------------------------------------------------------===//
23172314

2315+
// Shortcut to default Mnemonic from BUF_Pseudo. Hides the cast to the
2316+
// specific pseudo (bothen in this case) since any of them will work.
2317+
class get_BUF_ps<string name> {
2318+
string Mnemonic = !cast<BUF_Pseudo>(name # "_OFFSET").Mnemonic;
2319+
}
2320+
23182321
//===----------------------------------------------------------------------===//
23192322
// Base ENC_MUBUF for GFX6, GFX7, GFX10, GFX11.
23202323
//===----------------------------------------------------------------------===//
@@ -2346,8 +2349,8 @@ multiclass MUBUF_Real_gfx11<bits<8> op, string real_name = !cast<MUBUF_Pseudo>(N
23462349
}
23472350
}
23482351

2349-
class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
2350-
Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef> {
2352+
class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef, string asmName> :
2353+
Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef, asmName> {
23512354
let Inst{12} = ps.offen;
23522355
let Inst{13} = ps.idxen;
23532356
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
@@ -2357,19 +2360,21 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
23572360
let Inst{55} = ps.tfe;
23582361
}
23592362

2360-
multiclass MUBUF_Real_gfx10<bits<8> op> {
2361-
defvar ps = !cast<MUBUF_Pseudo>(NAME);
2362-
def _gfx10 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
2363+
multiclass MUBUF_Real_gfx10<bits<8> op, string psName = NAME,
2364+
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
2365+
defvar ps = !cast<MUBUF_Pseudo>(psName);
2366+
def _gfx10 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10, asmName> {
23632367
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
23642368
let Inst{25} = op{7};
23652369
let AssemblerPredicate = isGFX10Only;
23662370
let DecoderNamespace = "GFX10";
23672371
}
23682372
}
23692373

2370-
multiclass MUBUF_Real_gfx6_gfx7<bits<8> op> {
2371-
defvar ps = !cast<MUBUF_Pseudo>(NAME);
2372-
def _gfx6_gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
2374+
multiclass MUBUF_Real_gfx6_gfx7<bits<8> op, string psName = NAME,
2375+
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
2376+
defvar ps = !cast<MUBUF_Pseudo>(psName);
2377+
def _gfx6_gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, asmName> {
23732378
let Inst{15} = ps.addr64;
23742379
let AssemblerPredicate = isGFX6GFX7;
23752380
let DecoderNamespace = "GFX6GFX7";
@@ -2378,7 +2383,7 @@ multiclass MUBUF_Real_gfx6_gfx7<bits<8> op> {
23782383

23792384
multiclass MUBUF_Real_gfx6<bits<8> op> {
23802385
defvar ps = !cast<MUBUF_Pseudo>(NAME);
2381-
def _gfx6 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
2386+
def _gfx6 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, ps.Mnemonic> {
23822387
let Inst{15} = ps.addr64;
23832388
let AssemblerPredicate = isGFX6;
23842389
let DecoderNamespace = "GFX6";
@@ -2387,7 +2392,7 @@ multiclass MUBUF_Real_gfx6<bits<8> op> {
23872392

23882393
multiclass MUBUF_Real_gfx7<bits<8> op> {
23892394
defvar ps = !cast<MUBUF_Pseudo>(NAME);
2390-
def _gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
2395+
def _gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, ps.Mnemonic> {
23912396
let Inst{15} = ps.addr64;
23922397
let AssemblerPredicate = isGFX7Only;
23932398
let DecoderNamespace = "GFX7";
@@ -2488,12 +2493,6 @@ multiclass VBUFFER_MTBUF_Real_gfx12<bits<4> op, string real_name> {
24882493
// MUBUF - GFX11, GFX12.
24892494
//===----------------------------------------------------------------------===//
24902495

2491-
// Shortcut to default Mnemonic from BUF_Pseudo. Hides the cast to the
2492-
// specific pseudo (bothen in this case) since any of them will work.
2493-
class get_BUF_ps<string name> {
2494-
string Mnemonic = !cast<BUF_Pseudo>(name # "_BOTHEN").Mnemonic;
2495-
}
2496-
24972496
// gfx11 instruction that accept both old and new assembler name.
24982497
class Mnem_gfx11_gfx12 <string mnemonic, string real_name> :
24992498
AMDGPUMnemonicAlias<mnemonic, real_name> {
@@ -2715,18 +2714,20 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op, bit isTFE = 0> {
27152714
defm _LDS_BOTHEN : MUBUF_Real_gfx10<op>;
27162715
}
27172716
}
2718-
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
2719-
defm _BOTHEN_RTN : MUBUF_Real_gfx10<op>;
2720-
defm _IDXEN_RTN : MUBUF_Real_gfx10<op>;
2721-
defm _OFFEN_RTN : MUBUF_Real_gfx10<op>;
2722-
defm _OFFSET_RTN : MUBUF_Real_gfx10<op>;
2717+
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op, string psName = NAME,
2718+
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
2719+
defm _BOTHEN_RTN : MUBUF_Real_gfx10<op, psName#"_BOTHEN_RTN", asmName>;
2720+
defm _IDXEN_RTN : MUBUF_Real_gfx10<op, psName#"_IDXEN_RTN", asmName>;
2721+
defm _OFFEN_RTN : MUBUF_Real_gfx10<op, psName#"_OFFEN_RTN", asmName>;
2722+
defm _OFFSET_RTN : MUBUF_Real_gfx10<op, psName#"_OFFSET_RTN", asmName>;
27232723
}
2724-
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
2725-
MUBUF_Real_Atomics_RTN_gfx10<op> {
2726-
defm _BOTHEN : MUBUF_Real_gfx10<op>;
2727-
defm _IDXEN : MUBUF_Real_gfx10<op>;
2728-
defm _OFFEN : MUBUF_Real_gfx10<op>;
2729-
defm _OFFSET : MUBUF_Real_gfx10<op>;
2724+
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op, string psName = NAME,
2725+
string asmName = get_BUF_ps<psName>.Mnemonic> :
2726+
MUBUF_Real_Atomics_RTN_gfx10<op, psName, asmName> {
2727+
defm _BOTHEN : MUBUF_Real_gfx10<op, psName#"_BOTHEN", asmName>;
2728+
defm _IDXEN : MUBUF_Real_gfx10<op, psName#"_IDXEN", asmName>;
2729+
defm _OFFEN : MUBUF_Real_gfx10<op, psName#"_OFFEN", asmName>;
2730+
defm _OFFSET : MUBUF_Real_gfx10<op, psName#"_OFFSET", asmName>;
27302731
}
27312732

27322733
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
@@ -2781,18 +2782,18 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op, bit isTFE = 0> {
27812782
defm _LDS_BOTHEN : MUBUF_Real_gfx6_gfx7<op>;
27822783
}
27832784
}
2784-
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> {
2785-
defm _ADDR64 : MUBUF_Real_gfx6_gfx7<op>;
2786-
defm _BOTHEN : MUBUF_Real_gfx6_gfx7<op>;
2787-
defm _IDXEN : MUBUF_Real_gfx6_gfx7<op>;
2788-
defm _OFFEN : MUBUF_Real_gfx6_gfx7<op>;
2789-
defm _OFFSET : MUBUF_Real_gfx6_gfx7<op>;
2785+
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op, string psName, string asmName> {
2786+
defm _ADDR64 : MUBUF_Real_gfx6_gfx7<op, psName#"_ADDR64", asmName>;
2787+
defm _BOTHEN : MUBUF_Real_gfx6_gfx7<op, psName#"_BOTHEN", asmName>;
2788+
defm _IDXEN : MUBUF_Real_gfx6_gfx7<op, psName#"_IDXEN", asmName>;
2789+
defm _OFFEN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFEN", asmName>;
2790+
defm _OFFSET : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFSET", asmName>;
27902791

2791-
defm _ADDR64_RTN : MUBUF_Real_gfx6_gfx7<op>;
2792-
defm _BOTHEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
2793-
defm _IDXEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
2794-
defm _OFFEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
2795-
defm _OFFSET_RTN : MUBUF_Real_gfx6_gfx7<op>;
2792+
defm _ADDR64_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_ADDR64_RTN", asmName>;
2793+
defm _BOTHEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_BOTHEN_RTN", asmName>;
2794+
defm _IDXEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_IDXEN_RTN", asmName>;
2795+
defm _OFFEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFEN_RTN", asmName>;
2796+
defm _OFFSET_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFSET_RTN", asmName>;
27962797
}
27972798

27982799
multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
@@ -2807,8 +2808,10 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> {
28072808
defm _TFE : MUBUF_Real_AllAddr_Lds_Helper_gfx6_gfx7_gfx10<op, 1>;
28082809
}
28092810

2810-
multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
2811-
MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
2811+
multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op, string psName = NAME,
2812+
string asmName = get_BUF_ps<psName>.Mnemonic> :
2813+
MUBUF_Real_Atomics_gfx6_gfx7<op, psName, asmName>,
2814+
MUBUF_Real_Atomics_gfx10<op, psName, asmName>;
28122815

28132816
// FIXME-GFX6: Following instructions are available only on GFX6.
28142817
//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>;
@@ -2868,8 +2871,8 @@ defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
28682871
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
28692872
// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
28702873
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
2871-
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
2872-
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
2874+
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f, "BUFFER_ATOMIC_MIN_F64", "buffer_atomic_fmin_x2">;
2875+
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060, "BUFFER_ATOMIC_MAX_F64", "buffer_atomic_fmax_x2">;
28732876

28742877
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_gfx10<0x034>;
28752878

0 commit comments

Comments
 (0)