Skip to content

[AMDGPU] GFX12: select @llvm.prefetch intrinsic #74576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1987,6 +1987,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_INDEXED_ZEXTLOAD, G_INDEXED_STORE})
.unsupported();

getActionDefinitionsBuilder(G_PREFETCH).alwaysLegal();

getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3258,6 +3258,24 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_AMDGPU_MAD_I64_I32:
applyMappingMAD_64_32(B, OpdMapper);
return;
case AMDGPU::G_PREFETCH: {
if (!Subtarget.hasPrefetch()) {
MI.eraseFromParent();
return;
}
unsigned PtrBank =
getRegBankID(MI.getOperand(0).getReg(), MRI, AMDGPU::SGPRRegBankID);
if (PtrBank == AMDGPU::VGPRRegBankID) {
MI.eraseFromParent();
return;
}
// FIXME: There is currently no support for prefetch in global isel.
// There is no node equivalence and what's worse there is no MMO produced
// for a prefetch on global isel path.
// Prefetch does not affect execution so erase it for now.
MI.eraseFromParent();
return;
}
default:
break;
}
Expand Down Expand Up @@ -5012,6 +5030,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FPTRUNC_ROUND_UPWARD:
case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD:
return getDefaultMappingVOP(MI);
case AMDGPU::G_PREFETCH:
OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
break;
}

return getInstructionMapping(/*ID*/1, /*Cost*/1,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasInstPrefetch() const { return getGeneration() >= GFX10; }

bool hasPrefetch() const { return GFX12Insts; }

// Has s_cmpk_* instructions.
bool hasSCmpK() const { return getGeneration() < GFX12; }

Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasMad64_32())
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom);

if (Subtarget->hasPrefetch())
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);

if (Subtarget->hasIEEEMinMax())
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
Expand Down Expand Up @@ -3868,6 +3871,23 @@ SDValue SITargetLowering::lowerGET_ROUNDING(SDValue Op,
return DAG.getMergeValues({Result, GetReg.getValue(1)}, SL);
}

SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
if (Op->isDivergent())
return SDValue();

switch (cast<MemSDNode>(Op)->getAddressSpace()) {
case AMDGPUAS::FLAT_ADDRESS:
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
break;
default:
return SDValue();
}

return Op;
}

Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<Register>(RegName)
Expand Down Expand Up @@ -5416,6 +5436,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTACKSAVE(Op, DAG);
case ISD::GET_ROUNDING:
return lowerGET_ROUNDING(Op, DAG);
case ISD::PREFETCH:
return lowerPREFETCH(Op, DAG);
}
return SDValue();
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;

SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;

Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
Offset = OffsetOp ? OffsetOp->getImm() : 0;
// Get appropriate operand, and compute width accordingly.
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
if (DataOpIdx == -1)
return false;
Width = getOpSize(LdSt, DataOpIdx);
return true;
}
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/AMDGPU/SMInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,14 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformL
}];
}

def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
(prefetch node:$ptr, node:$rw, node:$loc, node:$type),
[{ return !N->getOperand(1)->isDivergent();}]> {
let GISelPredicateCode = [{
return isInstrUniform(MI);
}];
}

def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
Expand Down Expand Up @@ -969,6 +977,21 @@ def : GCNPat <
}
} // let OtherPredicates = [HasShaderCyclesRegister]

multiclass SMPrefetchPat<string type, int cache_type> {
def : GCNPat <
(smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, (i32 cache_type)),
(!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0))
>;

def : GCNPat <
(smrd_prefetch (i64 SReg_64:$sbase), timm, timm, (i32 cache_type)),
(!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0))
>;
}

defm : SMPrefetchPat<"INST", 0>;
defm : SMPrefetchPat<"DATA", 1>;

//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
Expand Down
Loading