Skip to content

AMDGPU/NFC: Add predicate for supporting buffer/flat/global f64 atomics #80209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1800,6 +1800,10 @@ def isGFX12Plus :
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;

def HasBufferFlatGlobalAtomicsF64 :
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;

def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>;
def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1312,11 +1312,13 @@ let SubtargetPredicate = isGFX90APlus in {
def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> {
let SubtargetPredicate = isGFX90AOnly;
}
} // End SubtargetPredicate = isGFX90APlus

let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
} // End SubtargetPredicate = isGFX90APlus
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64

def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
let SubtargetPredicate = isGFX940Plus;
Expand Down Expand Up @@ -1806,11 +1808,11 @@ let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["ret"]>;
} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts]

let OtherPredicates = [isGFX90APlus] in {
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
} // End SubtargetPredicate = isGFX90APlus
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64

multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, string Inst> {
foreach RtnMode = ["ret", "noret"] in {
Expand Down Expand Up @@ -3339,7 +3341,7 @@ let SubtargetPredicate = isGFX90APlus in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>;
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_vi<0x50>;
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_vi<0x51>;
} // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus
} // End SubtargetPredicate = isGFX90APlus

def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> {
let AsmString = BUFFER_WBL2.Mnemonic; // drop flags
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -779,14 +779,14 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",

} // End SubtargetPredicate = isGFX7GFX10

let SubtargetPredicate = isGFX90APlus in {
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
} // End SubtargetPredicate = isGFX90APlus
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64

let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>;
Expand Down Expand Up @@ -1671,7 +1671,7 @@ defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_am
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
}

let OtherPredicates = [isGFX90APlus] in {
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return GFX10_BEncoding;
}

// BUFFER/FLAT/GLOBAL_ATOMIC_ADD/MIN/MAX_F64
bool hasBufferFlatGlobalAtomicsF64() const { return hasGFX90AInsts(); }

bool hasMultiDwordFlatScratchAddressing() const {
return getGeneration() >= GFX9;
}
Expand Down