-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic. #130041
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic. #130041
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-llvm-ir Author: Mariusz Sikora (mariusz-sikora-at-amd) ChangesPatch is 23.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130041.diff 12 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f93439b30523e..d0ce9f0b8322d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2812,6 +2812,17 @@ def int_amdgcn_image_bvh_dual_intersect_ray :
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
[IntrReadMem, IntrWillReturn]>;
+// <vdata>, <ray_origin>, <ray_dir>
+// llvm.amdgcn.image.bvh8.intersect.ray <node_ptr>, <ray_extent>,
+// <instance_mask>, <ray_origin>,
+// <ray_dir>, <offset>,
+// <texture_descr>
+def int_amdgcn_image_bvh8_intersect_ray :
+ Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
+ llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty],
+ [IntrReadMem, IntrWillReturn]>;
+
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
Intrinsic<[llvm_i32_ty],
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9c3bdd74a5cb0..8777a440c613b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4090,6 +4090,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
}
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
return selectBVHIntersectRayIntrinsic(I);
case AMDGPU::G_SBFX:
case AMDGPU::G_UBFX:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index cd0554a5c5b99..3e4c946ee9010 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7183,8 +7183,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
return true;
}
-bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
+ MachineInstr &MI, MachineIRBuilder &B) const {
const LLT S32 = LLT::scalar(32);
const LLT V2S32 = LLT::fixed_vector(2, 32);
@@ -7207,11 +7207,14 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
return false;
}
+ bool IsBVH8 = cast<GIntrinsic>(MI).getIntrinsicID() ==
+ Intrinsic::amdgcn_image_bvh8_intersect_ray;
const unsigned NumVDataDwords = 10;
- const unsigned NumVAddrDwords = 12;
- int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
- AMDGPU::MIMGEncGfx12, NumVDataDwords,
- NumVAddrDwords);
+ const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
+ int Opcode = AMDGPU::getMIMGOpcode(
+ IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
+ : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
SmallVector<Register, 12> Ops;
@@ -7223,7 +7226,8 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
Ops.push_back(RayDir);
Ops.push_back(Offsets);
- auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
+ auto MIB = B.buildInstr(IsBVH8 ? AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY
+ : AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
.addDef(DstReg)
.addDef(DstOrigin)
.addDef(DstDir)
@@ -7587,7 +7591,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
- return legalizeBVHDualIntrinsic(MI, B);
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray:
+ return legalizeBVHDualOrBVH8IntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index aba1f55330913..1f4e02b0d600a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -208,7 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const;
- bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeBVHDualOrBVH8IntersectRayIntrinsic(MachineInstr &MI,
+ MachineIRBuilder &B) const;
bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
Intrinsic::ID IID) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 370c9c1076193..9bd9e46bdfcb8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3218,9 +3218,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
- bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
- unsigned NumMods = !IsDual ? 1 : 0; // Has A16 modifier
+ bool IsDualOrBVH8 =
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
+ unsigned NumMods = !IsDualOrBVH8 ? 1 : 0; // Has A16 modifier
unsigned LastRegOpIdx = (MI.getNumExplicitOperands() - 1) - NumMods;
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
@@ -5014,13 +5017,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
- bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
- unsigned NumMods = !IsDual ? 1 : 0; // Has A16 modifier
+ bool IsDualOrBVH8 =
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
+ unsigned NumMods = !IsDualOrBVH8 ? 1 : 0; // Has A16 modifier
unsigned LastRegOpIdx = (MI.getNumExplicitOperands() - 1) - NumMods;
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
- if (IsDual) {
+ if (IsDualOrBVH8) {
OpdsMapping[1] = AMDGPU::getValueMapping(
AMDGPU::VGPRRegBankID,
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
@@ -5038,7 +5044,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
} else {
// NSA form
- unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
+ unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 63af4b2e351fb..2b9bc2b89a825 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1509,18 +1509,19 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
-class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
- int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
+class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
+ int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11)));
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
int VAddrDwords = !srl(RegClass.Size, 5);
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
list<RegisterClass> GFX11PlusAddrTypes =
- !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
+ !if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32],
+ !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
!if(IsA16,
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
- [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
+ [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96])));
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1554,26 +1555,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
}
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
- bit isDual,
+ bit isDual, bit isBVH8,
list<RegisterClass> addr_types>
- : VIMAGE_gfx12<op.GFX12, !if(isDual,
+ : VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
VReg_96:$ray_dir_out),
(outs VReg_128:$vdata)),
num_addrs, "GFX12", addr_types> {
- let Constraints = !if(isDual,
+ let Constraints = !if(!or(isDual, isBVH8),
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
- !if(isDual, (ins), (ins A16:$a16)));
+ !if(!or(isDual, isBVH8), (ins), (ins A16:$a16)));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
- !if(isDual, "", "$a16");
- let SchedRW = !if(isDual,
+ !if(!or(isDual, isBVH8), "", "$a16");
+ let SchedRW = !if(!or(isDual, isBVH8),
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
}
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
- bit isDual> {
- defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
+ bit isDual, bit isBVH8 = 0> {
+ defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual, isBVH8>;
def "" : MIMGBaseOpcode {
let BVH = 1;
let A16 = IsA16;
@@ -1611,8 +1612,9 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
}
}
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
- isDual, info.GFX11PlusAddrTypes> {
- let VDataDwords = !if(isDual, 10, 4);
+ isDual, isBVH8,
+ info.GFX11PlusAddrTypes> {
+ let VDataDwords = !if(!or(isDual, isBVH8), 10, 4);
let VAddrDwords = info.num_addrs;
}
}
@@ -1791,11 +1793,13 @@ defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]
defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
+defm IMAGE_BVH8_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x81, MIMG.NOP, MIMG.NOP>, "image_bvh8_intersect_ray", 1, 0, 0, 1>;
let SubtargetPredicate = isGFX12Plus in {
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
+ def : AMDGPUMnemonicAlias<"bvh8_intersect_ray", "image_bvh8_intersect_ray">;
}
} // End let OtherPredicates = [HasImageInsts]
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b010027e7ec72..dcb65f9c7523f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1387,7 +1387,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
- case Intrinsic::amdgcn_image_bvh_intersect_ray: {
+ case Intrinsic::amdgcn_image_bvh_intersect_ray:
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT =
MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
@@ -9426,7 +9427,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, VT,
M->getMemOperand());
}
- case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
+ case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue NodePtr = M->getOperand(2);
SDValue RayExtent = M->getOperand(3);
@@ -9444,11 +9446,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
+ bool IsBVH8 = IntrID == Intrinsic::amdgcn_image_bvh8_intersect_ray;
const unsigned NumVDataDwords = 10;
- const unsigned NumVAddrDwords = 12;
- int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
- AMDGPU::MIMGEncGfx12, NumVDataDwords,
- NumVAddrDwords);
+ const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
+ int Opcode = AMDGPU::getMIMGOpcode(
+ IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
+ : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
SmallVector<SDValue, 16> Ops;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 574303f46f9b6..322d566acde0a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -4357,6 +4357,14 @@ def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
let mayStore = 0;
}
+def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
+ let InOperandList = (ins unknown:$opcode, variable_ops);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
// if necessary.
def G_SI_CALL : AMDGPUGenericInstruction {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll
new file mode 100644
index 0000000000000..ff65d5d96cb2c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
+
+declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>)
+
+define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
+; GFX12-SDAG-LABEL: image_bvh8_intersect_ray:
+; GFX12-SDAG: ; %bb.0: ; %main_body
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
+; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: image_bvh8_intersect_ray:
+; GFX12-GISEL: ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
+; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
+; GFX12-GISEL-NEXT: ; return to shader part epilog
+main_body:
+ %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
+ %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
+ %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
+ %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
+ %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
+ %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
+ %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
+ %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
+ %r = bitcast <10 x i32> %a to <10 x float>
+ %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
+ store <3 x float> %o, ptr addrspace(1) %origin
+ %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
+ store <3 x float> %d, ptr addrspace(1) %dir
+ ret <10 x float> %r
+}
+
+define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
+; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1:
+; GFX12-SDAG: ; %bb.0: ; %main_body
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1
+; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
+; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1:
+; GFX12-GISEL: ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1
+; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
+; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
+; GFX12-GISEL-NEXT: ; return to shader part epilog
+main_body:
+ %ray_origin0 = insertelement <3 x floa...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Mariusz Sikora (mariusz-sikora-at-amd) ChangesPatch is 23.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130041.diff 12 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f93439b30523e..d0ce9f0b8322d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2812,6 +2812,17 @@ def int_amdgcn_image_bvh_dual_intersect_ray :
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
[IntrReadMem, IntrWillReturn]>;
+// <vdata>, <ray_origin>, <ray_dir>
+// llvm.amdgcn.image.bvh8.intersect.ray <node_ptr>, <ray_extent>,
+// <instance_mask>, <ray_origin>,
+// <ray_dir>, <offset>,
+// <texture_descr>
+def int_amdgcn_image_bvh8_intersect_ray :
+ Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
+ llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty],
+ [IntrReadMem, IntrWillReturn]>;
+
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
Intrinsic<[llvm_i32_ty],
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9c3bdd74a5cb0..8777a440c613b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4090,6 +4090,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
}
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
return selectBVHIntersectRayIntrinsic(I);
case AMDGPU::G_SBFX:
case AMDGPU::G_UBFX:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index cd0554a5c5b99..3e4c946ee9010 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7183,8 +7183,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
return true;
}
-bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
+ MachineInstr &MI, MachineIRBuilder &B) const {
const LLT S32 = LLT::scalar(32);
const LLT V2S32 = LLT::fixed_vector(2, 32);
@@ -7207,11 +7207,14 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
return false;
}
+ bool IsBVH8 = cast<GIntrinsic>(MI).getIntrinsicID() ==
+ Intrinsic::amdgcn_image_bvh8_intersect_ray;
const unsigned NumVDataDwords = 10;
- const unsigned NumVAddrDwords = 12;
- int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
- AMDGPU::MIMGEncGfx12, NumVDataDwords,
- NumVAddrDwords);
+ const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
+ int Opcode = AMDGPU::getMIMGOpcode(
+ IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
+ : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
SmallVector<Register, 12> Ops;
@@ -7223,7 +7226,8 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
Ops.push_back(RayDir);
Ops.push_back(Offsets);
- auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
+ auto MIB = B.buildInstr(IsBVH8 ? AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY
+ : AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
.addDef(DstReg)
.addDef(DstOrigin)
.addDef(DstDir)
@@ -7587,7 +7591,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
- return legalizeBVHDualIntrinsic(MI, B);
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray:
+ return legalizeBVHDualOrBVH8IntersectRayIntrinsic(MI, B);
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index aba1f55330913..1f4e02b0d600a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -208,7 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
MachineIRBuilder &B) const;
- bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeBVHDualOrBVH8IntersectRayIntrinsic(MachineInstr &MI,
+ MachineIRBuilder &B) const;
bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
Intrinsic::ID IID) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 370c9c1076193..9bd9e46bdfcb8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3218,9 +3218,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
- bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
- unsigned NumMods = !IsDual ? 1 : 0; // Has A16 modifier
+ bool IsDualOrBVH8 =
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
+ unsigned NumMods = !IsDualOrBVH8 ? 1 : 0; // Has A16 modifier
unsigned LastRegOpIdx = (MI.getNumExplicitOperands() - 1) - NumMods;
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
@@ -5014,13 +5017,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
+ case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
- bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
- unsigned NumMods = !IsDual ? 1 : 0; // Has A16 modifier
+ bool IsDualOrBVH8 =
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
+ MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
+ unsigned NumMods = !IsDualOrBVH8 ? 1 : 0; // Has A16 modifier
unsigned LastRegOpIdx = (MI.getNumExplicitOperands() - 1) - NumMods;
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
- if (IsDual) {
+ if (IsDualOrBVH8) {
OpdsMapping[1] = AMDGPU::getValueMapping(
AMDGPU::VGPRRegBankID,
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
@@ -5038,7 +5044,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
} else {
// NSA form
- unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
+ unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 63af4b2e351fb..2b9bc2b89a825 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1509,18 +1509,19 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
-class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
- int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
+class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
+ int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11)));
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
int VAddrDwords = !srl(RegClass.Size, 5);
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
list<RegisterClass> GFX11PlusAddrTypes =
- !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
+ !if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32],
+ !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
!if(IsA16,
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
- [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
+ [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96])));
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1554,26 +1555,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
}
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
- bit isDual,
+ bit isDual, bit isBVH8,
list<RegisterClass> addr_types>
- : VIMAGE_gfx12<op.GFX12, !if(isDual,
+ : VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
VReg_96:$ray_dir_out),
(outs VReg_128:$vdata)),
num_addrs, "GFX12", addr_types> {
- let Constraints = !if(isDual,
+ let Constraints = !if(!or(isDual, isBVH8),
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
- !if(isDual, (ins), (ins A16:$a16)));
+ !if(!or(isDual, isBVH8), (ins), (ins A16:$a16)));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
- !if(isDual, "", "$a16");
- let SchedRW = !if(isDual,
+ !if(!or(isDual, isBVH8), "", "$a16");
+ let SchedRW = !if(!or(isDual, isBVH8),
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
}
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
- bit isDual> {
- defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
+ bit isDual, bit isBVH8 = 0> {
+ defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual, isBVH8>;
def "" : MIMGBaseOpcode {
let BVH = 1;
let A16 = IsA16;
@@ -1611,8 +1612,9 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
}
}
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
- isDual, info.GFX11PlusAddrTypes> {
- let VDataDwords = !if(isDual, 10, 4);
+ isDual, isBVH8,
+ info.GFX11PlusAddrTypes> {
+ let VDataDwords = !if(!or(isDual, isBVH8), 10, 4);
let VAddrDwords = info.num_addrs;
}
}
@@ -1791,11 +1793,13 @@ defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]
defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
+defm IMAGE_BVH8_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x81, MIMG.NOP, MIMG.NOP>, "image_bvh8_intersect_ray", 1, 0, 0, 1>;
let SubtargetPredicate = isGFX12Plus in {
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
+ def : AMDGPUMnemonicAlias<"bvh8_intersect_ray", "image_bvh8_intersect_ray">;
}
} // End let OtherPredicates = [HasImageInsts]
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b010027e7ec72..dcb65f9c7523f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1387,7 +1387,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
- case Intrinsic::amdgcn_image_bvh_intersect_ray: {
+ case Intrinsic::amdgcn_image_bvh_intersect_ray:
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT =
MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
@@ -9426,7 +9427,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, VT,
M->getMemOperand());
}
- case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
+ case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
+ case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue NodePtr = M->getOperand(2);
SDValue RayExtent = M->getOperand(3);
@@ -9444,11 +9446,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
+ bool IsBVH8 = IntrID == Intrinsic::amdgcn_image_bvh8_intersect_ray;
const unsigned NumVDataDwords = 10;
- const unsigned NumVAddrDwords = 12;
- int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
- AMDGPU::MIMGEncGfx12, NumVDataDwords,
- NumVAddrDwords);
+ const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
+ int Opcode = AMDGPU::getMIMGOpcode(
+ IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
+ : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
SmallVector<SDValue, 16> Ops;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 574303f46f9b6..322d566acde0a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -4357,6 +4357,14 @@ def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
let mayStore = 0;
}
+def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
+ let InOperandList = (ins unknown:$opcode, variable_ops);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
// if necessary.
def G_SI_CALL : AMDGPUGenericInstruction {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll
new file mode 100644
index 0000000000000..ff65d5d96cb2c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
+
+declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>)
+
+define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
+; GFX12-SDAG-LABEL: image_bvh8_intersect_ray:
+; GFX12-SDAG: ; %bb.0: ; %main_body
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
+; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: image_bvh8_intersect_ray:
+; GFX12-GISEL: ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
+; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
+; GFX12-GISEL-NEXT: ; return to shader part epilog
+main_body:
+ %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
+ %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
+ %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
+ %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
+ %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
+ %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
+ %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
+ %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
+ %r = bitcast <10 x i32> %a to <10 x float>
+ %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
+ store <3 x float> %o, ptr addrspace(1) %origin
+ %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
+ store <3 x float> %d, ptr addrspace(1) %dir
+ ret <10 x float> %r
+}
+
+define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
+; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1:
+; GFX12-SDAG: ; %bb.0: ; %main_body
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1
+; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
+; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1:
+; GFX12-GISEL: ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1
+; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
+; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
+; GFX12-GISEL-NEXT: ; return to shader part epilog
+main_body:
+ %ray_origin0 = insertelement <3 x floa...
[truncated]
|
e75024c
to
4a5b797
Compare
2377e74
to
14e88b7
Compare
4a5b797
to
ad9c9a9
Compare
14e88b7
to
39baef8
Compare
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good overall.
ad9c9a9
to
6fafb98
Compare
39baef8
to
c88a7c2
Compare
420753b
to
746378e
Compare
2c08a9c
to
6534bbe
Compare
No description provided.