Skip to content

[AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic. #130041

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2824,6 +2824,17 @@ def int_amdgcn_image_bvh_dual_intersect_ray :
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
[IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>;

// <vdata>, <ray_origin>, <ray_dir>
// llvm.amdgcn.image.bvh8.intersect.ray <node_ptr>, <ray_extent>,
// <instance_mask>, <ray_origin>,
// <ray_dir>, <offset>,
// <texture_descr>
def int_amdgcn_image_bvh8_intersect_ray :
Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
[llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>;

// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
Intrinsic<[llvm_i32_ty],
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1094,10 +1094,10 @@ def FeaturePrngInst : SubtargetFeature<"prng-inst",
"Has v_prng_b32 instruction"
>;

def FeatureBVHDualInst : SubtargetFeature<"bvh-dual-inst",
"HasBVHDualInst",
def FeatureBVHDualAndBVH8Insts : SubtargetFeature<"bvh-dual-bvh-8-insts",
"HasBVHDualAndBVH8Insts",
"true",
"Has image_bvh_dual_intersect_ray instruction"
"Has image_bvh_dual_intersect_ray and image_bvh8_intersect_ray instructions"
>;

//===------------------------------------------------------------===//
Expand Down Expand Up @@ -1857,7 +1857,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureBVHDualInst
FeatureBVHDualAndBVH8Insts
]>;

def FeatureISAVersion12_Generic: FeatureSet<
Expand Down Expand Up @@ -2513,8 +2513,8 @@ def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
AssemblerPredicate<(all_of FeaturePrngInst)>;

def HasBVHDualInst : Predicate<"Subtarget->hasBVHDualInst()">,
AssemblerPredicate<(all_of FeatureBVHDualInst)>;
def HasBVHDualAndBVH8Insts : Predicate<"Subtarget->hasBVHDualAndBVH8Insts()">,
AssemblerPredicate<(all_of FeatureBVHDualAndBVH8Insts)>;

def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4118,6 +4118,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
}
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
return selectBVHIntersectRayIntrinsic(I);
case AMDGPU::G_SBFX:
case AMDGPU::G_UBFX:
Expand Down
23 changes: 14 additions & 9 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7198,8 +7198,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
return true;
}

bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const {
bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
MachineInstr &MI, MachineIRBuilder &B) const {
const LLT S32 = LLT::scalar(32);
const LLT V2S32 = LLT::fixed_vector(2, 32);

Expand All @@ -7214,25 +7214,29 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
Register Offsets = MI.getOperand(9).getReg();
Register TDescr = MI.getOperand(10).getReg();

if (!ST.hasBVHDualInst()) {
if (!ST.hasBVHDualAndBVH8Insts()) {
DiagnosticInfoUnsupported BadIntrin(B.getMF().getFunction(),
"intrinsic not supported on subtarget",
MI.getDebugLoc());
B.getMF().getFunction().getContext().diagnose(BadIntrin);
return false;
}

bool IsBVH8 = cast<GIntrinsic>(MI).getIntrinsicID() ==
Intrinsic::amdgcn_image_bvh8_intersect_ray;
const unsigned NumVDataDwords = 10;
const unsigned NumVAddrDwords = 12;
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
AMDGPU::MIMGEncGfx12, NumVDataDwords,
NumVAddrDwords);
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
int Opcode = AMDGPU::getMIMGOpcode(
IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
: AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);

auto RayExtentInstanceMaskVec = B.buildMergeLikeInstr(
V2S32, {RayExtent, B.buildAnyExt(S32, InstanceMask)});

B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
B.buildInstr(IsBVH8 ? AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY
: AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
.addDef(DstReg)
.addDef(DstOrigin)
.addDef(DstDir)
Expand Down Expand Up @@ -7598,7 +7602,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
return legalizeBVHDualIntrinsic(MI, B);
case Intrinsic::amdgcn_image_bvh8_intersect_ray:
return legalizeBVHDualOrBVH8IntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const;

bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeBVHDualOrBVH8IntersectRayIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const;

bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
Intrinsic::ID IID) const;
Expand Down
18 changes: 12 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3240,9 +3240,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
bool IsDualOrBVH8 =
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
Expand Down Expand Up @@ -5036,13 +5039,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
bool IsDualOrBVH8 =
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
if (IsDual) {
if (IsDualOrBVH8) {
OpdsMapping[1] = AMDGPU::getValueMapping(
AMDGPU::VGPRRegBankID,
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
Expand All @@ -5060,7 +5066,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
} else {
// NSA form
unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasRestrictedSOffset = false;
bool HasBitOp3Insts = false;
bool HasPrngInst = false;
bool HasBVHDualInst = false;
bool HasBVHDualAndBVH8Insts = false;
bool HasPermlane16Swap = false;
bool HasPermlane32Swap = false;
bool HasVcmpxPermlaneHazard = false;
Expand Down Expand Up @@ -1366,7 +1366,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasPrngInst() const { return HasPrngInst; }

bool hasBVHDualInst() const { return HasBVHDualInst; }
bool hasBVHDualAndBVH8Insts() const { return HasBVHDualAndBVH8Insts; }

/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
Expand Down
35 changes: 19 additions & 16 deletions llvm/lib/Target/AMDGPU/MIMGInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1509,18 +1509,18 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;

class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11)));
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
int VAddrDwords = !srl(RegClass.Size, 5);

int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
list<RegisterClass> GFX11PlusAddrTypes =
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
!if(IsA16,
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
!cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32],
isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64],
IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96],
true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
}

class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
Expand Down Expand Up @@ -1554,26 +1554,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
}

class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
bit isDual,
bit isDual, bit isBVH8,
list<RegisterClass> addr_types>
: VIMAGE_gfx12<op.GFX12, !if(isDual,
: VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
VReg_96:$ray_dir_out),
(outs VReg_128:$vdata)),
num_addrs, "GFX12", addr_types> {
let Constraints = !if(isDual,
let Constraints = !if(!or(isDual, isBVH8),
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
!if(isDual, (ins), (ins A16:$a16)));
!if(!or(isDual, isBVH8), (ins), (ins A16:$a16)));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
!if(isDual, "", "$a16");
let SchedRW = !if(isDual,
!if(!or(isDual, isBVH8), "", "$a16");
let SchedRW = !if(!or(isDual, isBVH8),
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
}

multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
bit isDual> {
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
bit isDual, bit isBVH8 = 0> {
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual, isBVH8>;
def "" : MIMGBaseOpcode {
let BVH = 1;
let A16 = IsA16;
Expand Down Expand Up @@ -1611,8 +1611,9 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
}
}
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
isDual, info.GFX11PlusAddrTypes> {
let VDataDwords = !if(isDual, 10, 4);
isDual, isBVH8,
info.GFX11PlusAddrTypes> {
let VDataDwords = !if(!or(isDual, isBVH8), 10, 4);
let VAddrDwords = info.num_addrs;
}
}
Expand Down Expand Up @@ -1791,11 +1792,13 @@ defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]

defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
defm IMAGE_BVH8_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x81, MIMG.NOP, MIMG.NOP>, "image_bvh8_intersect_ray", 1, 0, 0, 1>;

let SubtargetPredicate = isGFX12Plus in {
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh8_intersect_ray", "image_bvh8_intersect_ray">;
}

} // End let OtherPredicates = [HasImageInsts]
Expand Down
18 changes: 11 additions & 7 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1387,7 +1387,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
case Intrinsic::amdgcn_image_bvh_intersect_ray:
case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT =
MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
Expand Down Expand Up @@ -9443,7 +9444,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, VT,
M->getMemOperand());
}
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue NodePtr = M->getOperand(2);
SDValue RayExtent = M->getOperand(3);
Expand All @@ -9456,16 +9458,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
assert(NodePtr.getValueType() == MVT::i64);
assert(RayDir.getValueType() == MVT::v3f32);

if (!Subtarget->hasBVHDualInst()) {
if (!Subtarget->hasBVHDualAndBVH8Insts()) {
emitRemovedIntrinsicError(DAG, DL, Op.getValueType());
return SDValue();
}

bool IsBVH8 = IntrID == Intrinsic::amdgcn_image_bvh8_intersect_ray;
const unsigned NumVDataDwords = 10;
const unsigned NumVAddrDwords = 12;
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
AMDGPU::MIMGEncGfx12, NumVDataDwords,
NumVAddrDwords);
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
int Opcode = AMDGPU::getMIMGOpcode(
IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
: AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);

SmallVector<SDValue, 7> Ops;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -4376,6 +4376,14 @@ def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
let mayStore = 0;
}

def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
let InOperandList = (ins unknown:$opcode, variable_ops);
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 0;
}

// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
// if necessary.
def G_SI_CALL : AMDGPUGenericInstruction {
Expand Down
Loading