Skip to content

[AMDGPU] Add intrinsic and MI for image_bvh_dual_intersect_ray #130038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ def llvm_v3i32_ty : LLVMType<v3i32>; // 3 x i32
def llvm_v4i32_ty : LLVMType<v4i32>; // 4 x i32
def llvm_v6i32_ty : LLVMType<v6i32>; // 6 x i32
def llvm_v8i32_ty : LLVMType<v8i32>; // 8 x i32
def llvm_v10i32_ty : LLVMType<v10i32>; // 10 x i32
def llvm_v16i32_ty : LLVMType<v16i32>; // 16 x i32
def llvm_v32i32_ty : LLVMType<v32i32>; // 32 x i32
def llvm_v64i32_ty : LLVMType<v64i32>; // 64 x i32
Expand Down Expand Up @@ -591,6 +592,7 @@ def llvm_v2f32_ty : LLVMType<v2f32>; // 2 x float
def llvm_v3f32_ty : LLVMType<v3f32>; // 3 x float
def llvm_v4f32_ty : LLVMType<v4f32>; // 4 x float
def llvm_v8f32_ty : LLVMType<v8f32>; // 8 x float
def llvm_v10f32_ty : LLVMType<v10f32>; // 10 x float
def llvm_v16f32_ty : LLVMType<v16f32>; // 16 x float
def llvm_v32f32_ty : LLVMType<v32f32>; // 32 x float
def llvm_v1f64_ty : LLVMType<v1f64>; // 1 x double
Expand Down
11 changes: 11 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2813,6 +2813,17 @@ def int_amdgcn_ds_bvh_stack_push8_pop1_rtn : IntDSBVHStackRtn<vdst = llvm_i32_t
def int_amdgcn_ds_bvh_stack_push8_pop2_rtn : IntDSBVHStackRtn<vdst = llvm_i64_ty,
data1 = llvm_v8i32_ty>;

// <vdata>, <ray_origin>, <ray_dir>
// llvm.amdgcn.image.bvh.dual.intersect.ray <node_ptr>, <ray_extent>,
// <instance_mask>, <ray_origin>,
// <ray_dir>, <offsets>,
// <texture_descr>
def int_amdgcn_image_bvh_dual_intersect_ray :
Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
[llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
[IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>;

// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
Intrinsic<[llvm_i32_ty],
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,12 @@ def FeaturePrngInst : SubtargetFeature<"prng-inst",
"Has v_prng_b32 instruction"
>;

def FeatureBVHDualInst : SubtargetFeature<"bvh-dual-inst",
"HasBVHDualInst",
"true",
"Has image_bvh_dual_intersect_ray instruction"
>;

//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
Expand Down Expand Up @@ -1844,7 +1850,8 @@ def FeatureISAVersion12 : FeatureSet<
FeatureDPPSrc1SGPR,
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
FeatureMemoryAtomicFAddF32DenormalSupport
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureBVHDualInst
]>;

def FeatureISAVersion12_Generic: FeatureSet<
Expand Down Expand Up @@ -2500,6 +2507,9 @@ def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
AssemblerPredicate<(all_of FeaturePrngInst)>;

def HasBVHDualInst : Predicate<"Subtarget->hasBVHDualInst()">,
AssemblerPredicate<(all_of FeatureBVHDualInst)>;

def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>;

Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3597,10 +3597,12 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{

bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
MachineInstr &MI) const {
MI.setDesc(TII.get(MI.getOperand(1).getImm()));
MI.removeOperand(1);
unsigned OpcodeOpIdx =
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
MI.setDesc(TII.get(MI.getOperand(OpcodeOpIdx).getImm()));
MI.removeOperand(OpcodeOpIdx);
MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
return true;
return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
}

// FIXME: This should be removed and let the patterns select. We just need the
Expand Down Expand Up @@ -4114,6 +4116,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
assert(Intr && "not an image intrinsic with image pseudo");
return selectImageIntrinsic(I, Intr);
}
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
return selectBVHIntersectRayIntrinsic(I);
case AMDGPU::G_SBFX:
Expand Down
53 changes: 53 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7198,6 +7198,57 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
return true;
}

bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const {
const LLT S32 = LLT::scalar(32);
const LLT V2S32 = LLT::fixed_vector(2, 32);

Register DstReg = MI.getOperand(0).getReg();
Register DstOrigin = MI.getOperand(1).getReg();
Register DstDir = MI.getOperand(2).getReg();
Register NodePtr = MI.getOperand(4).getReg();
Register RayExtent = MI.getOperand(5).getReg();
Register InstanceMask = MI.getOperand(6).getReg();
Register RayOrigin = MI.getOperand(7).getReg();
Register RayDir = MI.getOperand(8).getReg();
Register Offsets = MI.getOperand(9).getReg();
Register TDescr = MI.getOperand(10).getReg();

if (!ST.hasBVHDualInst()) {
DiagnosticInfoUnsupported BadIntrin(B.getMF().getFunction(),
"intrinsic not supported on subtarget",
MI.getDebugLoc());
B.getMF().getFunction().getContext().diagnose(BadIntrin);
return false;
}

const unsigned NumVDataDwords = 10;
const unsigned NumVAddrDwords = 12;
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
AMDGPU::MIMGEncGfx12, NumVDataDwords,
NumVAddrDwords);
assert(Opcode != -1);

auto RayExtentInstanceMaskVec = B.buildMergeLikeInstr(
V2S32, {RayExtent, B.buildAnyExt(S32, InstanceMask)});

B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
.addDef(DstReg)
.addDef(DstOrigin)
.addDef(DstDir)
.addImm(Opcode)
.addUse(NodePtr)
.addUse(RayExtentInstanceMaskVec.getReg(0))
.addUse(RayOrigin)
.addUse(RayDir)
.addUse(Offsets)
.addUse(TDescr)
.cloneMemRefs(MI);

MI.eraseFromParent();
return true;
}

bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
MachineIRBuilder &B) const {
const SITargetLowering *TLI = ST.getTargetLowering();
Expand Down Expand Up @@ -7546,6 +7597,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeRsqClampIntrinsic(MI, MRI, B);
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
return legalizeBVHDualIntrinsic(MI, B);
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const;

bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;

bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
Intrinsic::ID IID) const;

Expand Down
35 changes: 26 additions & 9 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3239,10 +3239,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyMappingImage(B, MI, OpdMapper, RSrcIntrin->RsrcArg);
return;
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: {
unsigned N = MI.getNumExplicitOperands() - 2;
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(B, MI, {N});
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
return;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
Expand Down Expand Up @@ -5032,19 +5035,33 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
assert(RSrcIntrin->IsImage);
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: {
unsigned N = MI.getNumExplicitOperands() - 2;
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
OpdsMapping[N] = getSGPROpMapping(MI.getOperand(N).getReg(), MRI, *TRI);
if (N == 3) {
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
if (IsDual) {
OpdsMapping[1] = AMDGPU::getValueMapping(
AMDGPU::VGPRRegBankID,
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
OpdsMapping[2] = AMDGPU::getValueMapping(
AMDGPU::VGPRRegBankID,
MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
}
OpdsMapping[LastRegOpIdx] =
getSGPROpMapping(MI.getOperand(LastRegOpIdx).getReg(), MRI, *TRI);
if (LastRegOpIdx == 3) {
// Sequential form: all operands combined into VGPR256/VGPR512
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
if (Size > 256)
Size = 512;
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
} else {
// NSA form
for (unsigned I = 2; I < N; ++I) {
unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ DECODE_OPERAND_REG_8(VReg_128)
DECODE_OPERAND_REG_8(VReg_192)
DECODE_OPERAND_REG_8(VReg_256)
DECODE_OPERAND_REG_8(VReg_288)
DECODE_OPERAND_REG_8(VReg_320)
DECODE_OPERAND_REG_8(VReg_352)
DECODE_OPERAND_REG_8(VReg_384)
DECODE_OPERAND_REG_8(VReg_512)
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasRestrictedSOffset = false;
bool HasBitOp3Insts = false;
bool HasPrngInst = false;
bool HasBVHDualInst = false;
bool HasPermlane16Swap = false;
bool HasPermlane32Swap = false;
bool HasVcmpxPermlaneHazard = false;
Expand Down Expand Up @@ -1364,6 +1365,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasPrngInst() const { return HasPrngInst; }

bool hasBVHDualInst() const { return HasBVHDualInst; }

/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
Expand Down
46 changes: 31 additions & 15 deletions llvm/lib/Target/AMDGPU/MIMGInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1509,17 +1509,18 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;

class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
int VAddrDwords = !srl(RegClass.Size, 5);

int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
list<RegisterClass> GFX11PlusAddrTypes =
!if(IsA16,
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
!if(IsA16,
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
}

class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
Expand Down Expand Up @@ -1553,15 +1554,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
}

class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
bit isDual,
list<RegisterClass> addr_types>
: VIMAGE_gfx12<op.GFX12, (outs VReg_128:$vdata),
: VIMAGE_gfx12<op.GFX12, !if(isDual,
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
VReg_96:$ray_dir_out),
(outs VReg_128:$vdata)),
num_addrs, "GFX12", addr_types> {
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc$a16";
}

multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16>;
let Constraints = !if(isDual,
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
!if(isDual, (ins), (ins A16:$a16)));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
!if(isDual, "", "$a16");
let SchedRW = !if(isDual,
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
}

multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
bit isDual> {
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
def "" : MIMGBaseOpcode {
let BVH = 1;
let A16 = IsA16;
Expand Down Expand Up @@ -1599,7 +1611,8 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
}
}
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
info.GFX11PlusAddrTypes> {
isDual, info.GFX11PlusAddrTypes> {
let VDataDwords = !if(isDual, 10, 4);
let VAddrDwords = info.num_addrs;
}
}
Expand Down Expand Up @@ -1771,15 +1784,18 @@ defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x80>, "ima
let OtherPredicates = [HasImageInsts, HasGFX10_AEncoding] in {
defm IMAGE_MSAA_LOAD : MIMG_MSAA_Load <mimgopc<0x18, 0x18, MIMG.NOP>, "image_msaa_load">;

defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0>;
defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1>;
defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0>;
defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1>;
defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0, 0>;
defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1, 0>;
defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0, 0>;
defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1, 0>;
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]

defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;

let SubtargetPredicate = isGFX12Plus in {
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
}

} // End let OtherPredicates = [HasImageInsts]
Expand Down
Loading
Loading