Skip to content

Commit 414d274

Browse files
[AMDGPU] GFX12: select @llvm.prefetch intrinsic (#74576)
Co-authored-by: Stanislav Mekhanoshin <[email protected]>
1 parent f740741 commit 414d274

File tree

8 files changed

+372
-0
lines changed

8 files changed

+372
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,6 +1990,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
19901990
G_INDEXED_ZEXTLOAD, G_INDEXED_STORE})
19911991
.unsupported();
19921992

1993+
getActionDefinitionsBuilder(G_PREFETCH).alwaysLegal();
1994+
19931995
getLegacyLegalizerInfo().computeTables();
19941996
verify(*ST.getInstrInfo());
19951997
}

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3258,6 +3258,24 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
32583258
case AMDGPU::G_AMDGPU_MAD_I64_I32:
32593259
applyMappingMAD_64_32(B, OpdMapper);
32603260
return;
3261+
case AMDGPU::G_PREFETCH: {
3262+
if (!Subtarget.hasPrefetch()) {
3263+
MI.eraseFromParent();
3264+
return;
3265+
}
3266+
unsigned PtrBank =
3267+
getRegBankID(MI.getOperand(0).getReg(), MRI, AMDGPU::SGPRRegBankID);
3268+
if (PtrBank == AMDGPU::VGPRRegBankID) {
3269+
MI.eraseFromParent();
3270+
return;
3271+
}
3272+
// FIXME: There is currently no support for prefetch in global isel.
3273+
// There is no node equivalence and what's worse there is no MMO produced
3274+
// for a prefetch on global isel path.
3275+
// Prefetch does not affect execution so erase it for now.
3276+
MI.eraseFromParent();
3277+
return;
3278+
}
32613279
default:
32623280
break;
32633281
}
@@ -5016,6 +5034,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50165034
case AMDGPU::G_FPTRUNC_ROUND_UPWARD:
50175035
case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD:
50185036
return getDefaultMappingVOP(MI);
5037+
case AMDGPU::G_PREFETCH:
5038+
OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
5039+
break;
50195040
}
50205041

50215042
return getInstructionMapping(/*ID*/1, /*Cost*/1,

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
833833

834834
bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
835835

836+
bool hasPrefetch() const { return GFX12Insts; }
837+
836838
// Has s_cmpk_* instructions.
837839
bool hasSCmpK() const { return getGeneration() < GFX12; }
838840

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
763763
if (Subtarget->hasMad64_32())
764764
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom);
765765

766+
if (Subtarget->hasPrefetch())
767+
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
768+
766769
if (Subtarget->hasIEEEMinMax())
767770
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
768771
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
@@ -3884,6 +3887,23 @@ SDValue SITargetLowering::lowerGET_ROUNDING(SDValue Op,
38843887
return DAG.getMergeValues({Result, GetReg.getValue(1)}, SL);
38853888
}
38863889

3890+
SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
3891+
if (Op->isDivergent())
3892+
return SDValue();
3893+
3894+
switch (cast<MemSDNode>(Op)->getAddressSpace()) {
3895+
case AMDGPUAS::FLAT_ADDRESS:
3896+
case AMDGPUAS::GLOBAL_ADDRESS:
3897+
case AMDGPUAS::CONSTANT_ADDRESS:
3898+
case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
3899+
break;
3900+
default:
3901+
return SDValue();
3902+
}
3903+
3904+
return Op;
3905+
}
3906+
38873907
Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT,
38883908
const MachineFunction &MF) const {
38893909
Register Reg = StringSwitch<Register>(RegName)
@@ -5432,6 +5452,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
54325452
return LowerSTACKSAVE(Op, DAG);
54335453
case ISD::GET_ROUNDING:
54345454
return lowerGET_ROUNDING(Op, DAG);
5455+
case ISD::PREFETCH:
5456+
return lowerPREFETCH(Op, DAG);
54355457
}
54365458
return SDValue();
54375459
}

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
416416
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
417417
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
418418

419+
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
420+
419421
Register getRegisterByName(const char* RegName, LLT VT,
420422
const MachineFunction &MF) const override;
421423

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
484484
Offset = OffsetOp ? OffsetOp->getImm() : 0;
485485
// Get appropriate operand, and compute width accordingly.
486486
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
487+
if (DataOpIdx == -1)
488+
return false;
487489
Width = getOpSize(LdSt, DataOpIdx);
488490
return true;
489491
}

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,14 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformL
818818
}];
819819
}
820820

821+
def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
822+
(prefetch node:$ptr, node:$rw, node:$loc, node:$type),
823+
[{ return !N->getOperand(1)->isDivergent();}]> {
824+
let GISelPredicateCode = [{
825+
return isInstrUniform(MI);
826+
}];
827+
}
828+
821829
def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
822830
def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
823831
def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
@@ -969,6 +977,21 @@ def : GCNPat <
969977
}
970978
} // let OtherPredicates = [HasShaderCyclesRegister]
971979

980+
multiclass SMPrefetchPat<string type, int cache_type> {
981+
def : GCNPat <
982+
(smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, (i32 cache_type)),
983+
(!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0))
984+
>;
985+
986+
def : GCNPat <
987+
(smrd_prefetch (i64 SReg_64:$sbase), timm, timm, (i32 cache_type)),
988+
(!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0))
989+
>;
990+
}
991+
992+
defm : SMPrefetchPat<"INST", 0>;
993+
defm : SMPrefetchPat<"DATA", 1>;
994+
972995
//===----------------------------------------------------------------------===//
973996
// GFX10.
974997
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)