Skip to content

AMDGPU: Create pseudo to real mapping for flat/buffer atomic fmin/fmax #95591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1864,7 +1864,9 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,

def HasBufferFlatGlobalAtomicsF64 :
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;

def HasLdsAtomicAddF64 :
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
Expand Down
103 changes: 53 additions & 50 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1163,12 +1163,6 @@ let SubtargetPredicate = isGFX6GFX7GFX10 in {
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
>;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_fmin_x2", VReg_64, f64, null_frag
>;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_fmax_x2", VReg_64, f64, null_frag
>;

}

Expand Down Expand Up @@ -1318,6 +1312,9 @@ let SubtargetPredicate = isGFX90APlus in {

let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;

// Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
// depending on some subtargets.
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
Expand Down Expand Up @@ -1751,8 +1748,8 @@ let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
}
let SubtargetPredicate = isGFX6GFX7GFX10 in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_FMIN_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_FMAX_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
}

class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
Expand Down Expand Up @@ -2303,6 +2300,12 @@ let OtherPredicates = [HasPackedD16VMem] in {
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//

// Shortcut to default Mnemonic from BUF_Pseudo. Hides the cast to the
// specific pseudo (bothen in this case) since any of them will work.
class get_BUF_ps<string name> {
string Mnemonic = !cast<BUF_Pseudo>(name # "_OFFSET").Mnemonic;
}

//===----------------------------------------------------------------------===//
// Base ENC_MUBUF for GFX6, GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -2334,8 +2337,8 @@ multiclass MUBUF_Real_gfx11<bits<8> op, string real_name = !cast<MUBUF_Pseudo>(N
}
}

class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef> {
class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef, string asmName> :
Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef, asmName> {
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
Expand All @@ -2345,19 +2348,21 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
let Inst{55} = ps.tfe;
}

multiclass MUBUF_Real_gfx10<bits<8> op> {
defvar ps = !cast<MUBUF_Pseudo>(NAME);
def _gfx10 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
multiclass MUBUF_Real_gfx10<bits<8> op, string psName = NAME,
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
defvar ps = !cast<MUBUF_Pseudo>(psName);
def _gfx10 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10, asmName> {
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
let Inst{25} = op{7};
let AssemblerPredicate = isGFX10Only;
let DecoderNamespace = "GFX10";
}
}

multiclass MUBUF_Real_gfx6_gfx7<bits<8> op> {
defvar ps = !cast<MUBUF_Pseudo>(NAME);
def _gfx6_gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
multiclass MUBUF_Real_gfx6_gfx7<bits<8> op, string psName = NAME,
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
defvar ps = !cast<MUBUF_Pseudo>(psName);
def _gfx6_gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, asmName> {
let Inst{15} = ps.addr64;
let AssemblerPredicate = isGFX6GFX7;
let DecoderNamespace = "GFX6GFX7";
Expand All @@ -2366,7 +2371,7 @@ multiclass MUBUF_Real_gfx6_gfx7<bits<8> op> {

multiclass MUBUF_Real_gfx6<bits<8> op> {
defvar ps = !cast<MUBUF_Pseudo>(NAME);
def _gfx6 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
def _gfx6 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, ps.Mnemonic> {
let Inst{15} = ps.addr64;
let AssemblerPredicate = isGFX6;
let DecoderNamespace = "GFX6";
Expand All @@ -2375,7 +2380,7 @@ multiclass MUBUF_Real_gfx6<bits<8> op> {

multiclass MUBUF_Real_gfx7<bits<8> op> {
defvar ps = !cast<MUBUF_Pseudo>(NAME);
def _gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
def _gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, ps.Mnemonic> {
let Inst{15} = ps.addr64;
let AssemblerPredicate = isGFX7Only;
let DecoderNamespace = "GFX7";
Expand Down Expand Up @@ -2476,12 +2481,6 @@ multiclass VBUFFER_MTBUF_Real_gfx12<bits<4> op, string real_name> {
// MUBUF - GFX11, GFX12.
//===----------------------------------------------------------------------===//

// Shortcut to default Mnemonic from BUF_Pseudo. Hides the cast to the
// specific pseudo (bothen in this case) since any of them will work.
class get_BUF_ps<string name> {
string Mnemonic = !cast<BUF_Pseudo>(name # "_BOTHEN").Mnemonic;
}

// gfx11 instruction that accept both old and new assembler name.
class Mnem_gfx11_gfx12 <string mnemonic, string real_name> :
AMDGPUMnemonicAlias<mnemonic, real_name> {
Expand Down Expand Up @@ -2703,18 +2702,20 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op, bit isTFE = 0> {
defm _LDS_BOTHEN : MUBUF_Real_gfx10<op>;
}
}
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
defm _BOTHEN_RTN : MUBUF_Real_gfx10<op>;
defm _IDXEN_RTN : MUBUF_Real_gfx10<op>;
defm _OFFEN_RTN : MUBUF_Real_gfx10<op>;
defm _OFFSET_RTN : MUBUF_Real_gfx10<op>;
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op, string psName = NAME,
string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
defm _BOTHEN_RTN : MUBUF_Real_gfx10<op, psName#"_BOTHEN_RTN", asmName>;
defm _IDXEN_RTN : MUBUF_Real_gfx10<op, psName#"_IDXEN_RTN", asmName>;
defm _OFFEN_RTN : MUBUF_Real_gfx10<op, psName#"_OFFEN_RTN", asmName>;
defm _OFFSET_RTN : MUBUF_Real_gfx10<op, psName#"_OFFSET_RTN", asmName>;
}
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
MUBUF_Real_Atomics_RTN_gfx10<op> {
defm _BOTHEN : MUBUF_Real_gfx10<op>;
defm _IDXEN : MUBUF_Real_gfx10<op>;
defm _OFFEN : MUBUF_Real_gfx10<op>;
defm _OFFSET : MUBUF_Real_gfx10<op>;
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op, string psName = NAME,
string asmName = get_BUF_ps<psName>.Mnemonic> :
MUBUF_Real_Atomics_RTN_gfx10<op, psName, asmName> {
defm _BOTHEN : MUBUF_Real_gfx10<op, psName#"_BOTHEN", asmName>;
defm _IDXEN : MUBUF_Real_gfx10<op, psName#"_IDXEN", asmName>;
defm _OFFEN : MUBUF_Real_gfx10<op, psName#"_OFFEN", asmName>;
defm _OFFSET : MUBUF_Real_gfx10<op, psName#"_OFFSET", asmName>;
}

defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
Expand Down Expand Up @@ -2769,18 +2770,18 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op, bit isTFE = 0> {
defm _LDS_BOTHEN : MUBUF_Real_gfx6_gfx7<op>;
}
}
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> {
defm _ADDR64 : MUBUF_Real_gfx6_gfx7<op>;
defm _BOTHEN : MUBUF_Real_gfx6_gfx7<op>;
defm _IDXEN : MUBUF_Real_gfx6_gfx7<op>;
defm _OFFEN : MUBUF_Real_gfx6_gfx7<op>;
defm _OFFSET : MUBUF_Real_gfx6_gfx7<op>;
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op, string psName, string asmName> {
defm _ADDR64 : MUBUF_Real_gfx6_gfx7<op, psName#"_ADDR64", asmName>;
defm _BOTHEN : MUBUF_Real_gfx6_gfx7<op, psName#"_BOTHEN", asmName>;
defm _IDXEN : MUBUF_Real_gfx6_gfx7<op, psName#"_IDXEN", asmName>;
defm _OFFEN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFEN", asmName>;
defm _OFFSET : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFSET", asmName>;

defm _ADDR64_RTN : MUBUF_Real_gfx6_gfx7<op>;
defm _BOTHEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
defm _IDXEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
defm _OFFEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
defm _OFFSET_RTN : MUBUF_Real_gfx6_gfx7<op>;
defm _ADDR64_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_ADDR64_RTN", asmName>;
defm _BOTHEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_BOTHEN_RTN", asmName>;
defm _IDXEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_IDXEN_RTN", asmName>;
defm _OFFEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFEN_RTN", asmName>;
defm _OFFSET_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFSET_RTN", asmName>;
}

multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
Expand All @@ -2795,8 +2796,10 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> {
defm _TFE : MUBUF_Real_AllAddr_Lds_Helper_gfx6_gfx7_gfx10<op, 1>;
}

multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op, string psName = NAME,
string asmName = get_BUF_ps<psName>.Mnemonic> :
MUBUF_Real_Atomics_gfx6_gfx7<op, psName, asmName>,
MUBUF_Real_Atomics_gfx10<op, psName, asmName>;

// FIXME-GFX6: Following instructions are available only on GFX6.
//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>;
Expand Down Expand Up @@ -2856,8 +2859,8 @@ defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f, "BUFFER_ATOMIC_MIN_F64", "buffer_atomic_fmin_x2">;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060, "BUFFER_ATOMIC_MAX_F64", "buffer_atomic_fmax_x2">;

defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_gfx10<0x034>;

Expand Down
Loading