-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Add intrinsic and MI for image_bvh_dual_intersect_ray #130038
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Add intrinsic and MI for image_bvh_dual_intersect_ray #130038
Conversation
mariusz-sikora-at-amd
commented
Mar 6, 2025
- Add llvm.amdgcn.image.bvh.dual.intersect.ray intrinsic and image_bvh_dual_intersect_ray machine instruction.
- Add llvm_v10i32_ty and llvm_v10f32_ty
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Mariusz Sikora (mariusz-sikora-at-amd) Changes
Patch is 28.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130038.diff 14 Files Affected:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 62239ca705b9e..645591dc4c776 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -561,6 +561,7 @@ def llvm_v3i32_ty : LLVMType<v3i32>; // 3 x i32
def llvm_v4i32_ty : LLVMType<v4i32>; // 4 x i32
def llvm_v6i32_ty : LLVMType<v6i32>; // 6 x i32
def llvm_v8i32_ty : LLVMType<v8i32>; // 8 x i32
+def llvm_v10i32_ty : LLVMType<v10i32>; // 10 x i32
def llvm_v16i32_ty : LLVMType<v16i32>; // 16 x i32
def llvm_v32i32_ty : LLVMType<v32i32>; // 32 x i32
def llvm_v64i32_ty : LLVMType<v64i32>; // 64 x i32
@@ -591,6 +592,7 @@ def llvm_v2f32_ty : LLVMType<v2f32>; // 2 x float
def llvm_v3f32_ty : LLVMType<v3f32>; // 3 x float
def llvm_v4f32_ty : LLVMType<v4f32>; // 4 x float
def llvm_v8f32_ty : LLVMType<v8f32>; // 8 x float
+def llvm_v10f32_ty : LLVMType<v10f32>; // 10 x float
def llvm_v16f32_ty : LLVMType<v16f32>; // 16 x float
def llvm_v32f32_ty : LLVMType<v32f32>; // 32 x float
def llvm_v1f64_ty : LLVMType<v1f64>; // 1 x double
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 86e050333acc7..f93439b30523e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2801,6 +2801,17 @@ def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty
// GFX12 Intrinsics
//===----------------------------------------------------------------------===//
+// <vdata>, <ray_origin>, <ray_dir>
+// llvm.amdgcn.image.bvh.dual.intersect.ray <node_ptr>, <ray_extent>,
+// <instance_mask>, <ray_origin>,
+// <ray_dir>, <offsets>,
+// <texture_descr>
+def int_amdgcn_image_bvh_dual_intersect_ray :
+ Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
+ llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
+ [IntrReadMem, IntrWillReturn]>;
+
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
Intrinsic<[llvm_i32_ty],
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 441fb5730a6d8..9c3bdd74a5cb0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3569,10 +3569,12 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
MachineInstr &MI) const {
- MI.setDesc(TII.get(MI.getOperand(1).getImm()));
- MI.removeOperand(1);
+ unsigned OpcodeOpIdx =
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
+ MI.setDesc(TII.get(MI.getOperand(OpcodeOpIdx).getImm()));
+ MI.removeOperand(OpcodeOpIdx);
MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
- return true;
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
}
// FIXME: This should be removed and let the patterns select. We just need the
@@ -4086,6 +4088,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
assert(Intr && "not an image intrinsic with image pseudo");
return selectImageIntrinsic(I, Intr);
}
+ case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
return selectBVHIntersectRayIntrinsic(I);
case AMDGPU::G_SBFX:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b3a8183beeacf..cd0554a5c5b99 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7183,6 +7183,61 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
return true;
}
+bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ const LLT S32 = LLT::scalar(32);
+ const LLT V2S32 = LLT::fixed_vector(2, 32);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register DstOrigin = MI.getOperand(1).getReg();
+ Register DstDir = MI.getOperand(2).getReg();
+ Register NodePtr = MI.getOperand(4).getReg();
+ Register RayExtent = MI.getOperand(5).getReg();
+ Register InstanceMask = MI.getOperand(6).getReg();
+ Register RayOrigin = MI.getOperand(7).getReg();
+ Register RayDir = MI.getOperand(8).getReg();
+ Register Offsets = MI.getOperand(9).getReg();
+ Register TDescr = MI.getOperand(10).getReg();
+
+ if (!AMDGPU::isGFX12Plus(ST)) {
+ DiagnosticInfoUnsupported BadIntrin(B.getMF().getFunction(),
+ "intrinsic not supported on subtarget",
+ MI.getDebugLoc());
+ B.getMF().getFunction().getContext().diagnose(BadIntrin);
+ return false;
+ }
+
+ const unsigned NumVDataDwords = 10;
+ const unsigned NumVAddrDwords = 12;
+ int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ AMDGPU::MIMGEncGfx12, NumVDataDwords,
+ NumVAddrDwords);
+ assert(Opcode != -1);
+
+ SmallVector<Register, 12> Ops;
+ Ops.push_back(NodePtr);
+ Ops.push_back(B.buildMergeLikeInstr(
+ V2S32, {RayExtent, B.buildAnyExt(S32, InstanceMask)})
+ .getReg(0));
+ Ops.push_back(RayOrigin);
+ Ops.push_back(RayDir);
+ Ops.push_back(Offsets);
+
+ auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
+ .addDef(DstReg)
+ .addDef(DstOrigin)
+ .addDef(DstDir)
+ .addImm(Opcode);
+
+ for (Register R : Ops)
+ MIB.addUse(R);
+
+ MIB.addUse(TDescr).cloneMemRefs(MI);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
MachineIRBuilder &B) const {
const SITargetLowering *TLI = ST.getTargetLowering();
@@ -7531,6 +7586,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeRsqClampIntrinsic(MI, MRI, B);
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntersectRayIntrinsic(MI, B);
+ case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
+ return legalizeBVHDualIntrinsic(MI, B);
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index a98e8ba7aaaf1..aba1f55330913 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -208,6 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const;
+ bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
+
bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
Intrinsic::ID IID) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index efe92e0fecc12..370c9c1076193 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3217,10 +3217,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyMappingImage(B, MI, OpdMapper, RSrcIntrin->RsrcArg);
return;
}
- case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: {
- unsigned N = MI.getNumExplicitOperands() - 2;
+ case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
+ bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
+ unsigned NumMods = !IsDual ? 1 : 0; // Has A16 modifier
+ unsigned LastRegOpIdx = (MI.getNumExplicitOperands() - 1) - NumMods;
applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(B, MI, {N});
+ executeInWaterfallLoop(B, MI, {LastRegOpIdx});
return;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
@@ -5010,11 +5013,24 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
assert(RSrcIntrin->IsImage);
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
- case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: {
- unsigned N = MI.getNumExplicitOperands() - 2;
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
- OpdsMapping[N] = getSGPROpMapping(MI.getOperand(N).getReg(), MRI, *TRI);
- if (N == 3) {
+ case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
+ bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
+ unsigned NumMods = !IsDual ? 1 : 0; // Has A16 modifier
+ unsigned LastRegOpIdx = (MI.getNumExplicitOperands() - 1) - NumMods;
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+ if (IsDual) {
+ OpdsMapping[1] = AMDGPU::getValueMapping(
+ AMDGPU::VGPRRegBankID,
+ MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
+ OpdsMapping[2] = AMDGPU::getValueMapping(
+ AMDGPU::VGPRRegBankID,
+ MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
+ }
+ OpdsMapping[LastRegOpIdx] =
+ getSGPROpMapping(MI.getOperand(LastRegOpIdx).getReg(), MRI, *TRI);
+ if (LastRegOpIdx == 3) {
// Sequential form: all operands combined into VGPR256/VGPR512
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
if (Size > 256)
@@ -5022,7 +5038,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
} else {
// NSA form
- for (unsigned I = 2; I < N; ++I) {
+ unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
+ for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index afed8b999d8eb..9784bb08da557 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -265,6 +265,7 @@ DECODE_OPERAND_REG_8(VReg_128)
DECODE_OPERAND_REG_8(VReg_192)
DECODE_OPERAND_REG_8(VReg_256)
DECODE_OPERAND_REG_8(VReg_288)
+DECODE_OPERAND_REG_8(VReg_320)
DECODE_OPERAND_REG_8(VReg_352)
DECODE_OPERAND_REG_8(VReg_384)
DECODE_OPERAND_REG_8(VReg_512)
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 1b94d6c43392d..63af4b2e351fb 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1509,7 +1509,7 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
-class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
+class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
int VAddrDwords = !srl(RegClass.Size, 5);
@@ -1517,9 +1517,10 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
list<RegisterClass> GFX11PlusAddrTypes =
- !if(IsA16,
+ !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
+ !if(IsA16,
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
- [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
+ [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1553,15 +1554,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
}
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
+ bit isDual,
list<RegisterClass> addr_types>
- : VIMAGE_gfx12<op.GFX12, (outs VReg_128:$vdata),
+ : VIMAGE_gfx12<op.GFX12, !if(isDual,
+ (outs VReg_320:$vdata, VReg_96:$ray_origin_out,
+ VReg_96:$ray_dir_out),
+ (outs VReg_128:$vdata)),
num_addrs, "GFX12", addr_types> {
- let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc, A16:$a16));
- let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc$a16";
-}
-
-multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
- defvar info = MIMG_IntersectRay_Helper<Is64, IsA16>;
+ let Constraints = !if(isDual,
+ "$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
+ !if(isDual, (ins), (ins A16:$a16)));
+ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
+ !if(isDual, "", "$a16");
+ let SchedRW = !if(isDual,
+ [WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
+}
+
+multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
+ bit isDual> {
+ defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
def "" : MIMGBaseOpcode {
let BVH = 1;
let A16 = IsA16;
@@ -1599,7 +1611,8 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
}
}
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
- info.GFX11PlusAddrTypes> {
+ isDual, info.GFX11PlusAddrTypes> {
+ let VDataDwords = !if(isDual, 10, 4);
let VAddrDwords = info.num_addrs;
}
}
@@ -1771,15 +1784,18 @@ defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x80>, "ima
let OtherPredicates = [HasImageInsts, HasGFX10_AEncoding] in {
defm IMAGE_MSAA_LOAD : MIMG_MSAA_Load <mimgopc<0x18, 0x18, MIMG.NOP>, "image_msaa_load">;
-defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0>;
-defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1>;
-defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0>;
-defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1>;
+defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0, 0>;
+defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1, 0>;
+defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0, 0>;
+defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1, 0>;
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]
+defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
+
let SubtargetPredicate = isGFX12Plus in {
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
+ def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
}
} // End let OtherPredicates = [HasImageInsts]
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index fe095414e5172..b010027e7ec72 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1386,9 +1386,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOVolatile;
return true;
}
+ case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
+ Info.memVT =
+ MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
+ ? CI.getType()
+ : cast<StructType>(CI.getType())
+ ->getElementType(0)); // XXX: what is correct VT?
Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
Info.align.reset();
@@ -9421,6 +9426,48 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, VT,
M->getMemOperand());
}
+ case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ SDValue NodePtr = M->getOperand(2);
+ SDValue RayExtent = M->getOperand(3);
+ SDValue InstanceMask = M->getOperand(4);
+ SDValue RayOrigin = M->getOperand(5);
+ SDValue RayDir = M->getOperand(6);
+ SDValue Offsets = M->getOperand(7);
+ SDValue TDescr = M->getOperand(8);
+
+ assert(NodePtr.getValueType() == MVT::i64);
+ assert(RayDir.getValueType() == MVT::v3f32);
+
+ if (!AMDGPU::isGFX12Plus(*Subtarget)) {
+ emitRemovedIntrinsicError(DAG, DL, Op.getValueType());
+ return SDValue();
+ }
+
+ const unsigned NumVDataDwords = 10;
+ const unsigned NumVAddrDwords = 12;
+ int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ AMDGPU::MIMGEncGfx12, NumVDataDwords,
+ NumVAddrDwords);
+ assert(Opcode != -1);
+
+ SmallVector<SDValue, 16> Ops;
+ Ops.push_back(NodePtr);
+ Ops.push_back(DAG.getBuildVector(
+ MVT::v2i32, DL,
+ {DAG.getBitcast(MVT::i32, RayExtent),
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, InstanceMask)}));
+ Ops.push_back(RayOrigin);
+ Ops.push_back(RayDir);
+ Ops.push_back(Offsets);
+ Ops.push_back(TDescr);
+ Ops.push_back(M->getChain());
+
+ auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
+ MachineMemOperand *MemRef = M->getMemOperand();
+ DAG.setNodeMemRefs(NewNode, {MemRef});
+ return SDValue(NewNode, 0);
+ }
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue NodePtr = M->getOperand(2);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index cd5d3e3d31ab4..574303f46f9b6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -4343,7 +4343,15 @@ def G_AMDGPU_INTRIN_IMAGE_STORE_D16 : AMDGPUGenericInstruction {
def G_AMDGPU_BVH_INTERSECT_RAY : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins unknown:$intrin, variable_ops);
+ let InOperandList = (ins unknown:$opcode, variable_ops);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
+ let InOperandList = (ins unknown:$opcode, variable_ops);
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dual_intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dual_intersect_ray.ll
new file mode 100644
index 0000000000000..710d802016aeb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dual_intersect_ray.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions hav...
[truncated]
|
e75024c
to
4a5b797
Compare
4a5b797
to
ad9c9a9
Compare
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good overall.
ad9c9a9
to
6fafb98
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
- Add llvm.amdgcn.image.bvh.dual.intersect.ray intrinsic and image_bvh_dual_intersect_ray machine instruction. - Add llvm_v10i32_ty and llvm_v10f32_ty
420753b
to
746378e
Compare
Rebased |