Skip to content

Commit ad9c9a9

Browse files
[AMDGPU] Add intrinsic and MI for image_bvh_dual_intersect_ray
- Add llvm.amdgcn.image.bvh.dual.intersect.ray intrinsic and image_bvh_dual_intersect_ray machine instruction. - Add llvm_v10i32_ty and llvm_v10f32_ty
1 parent bbabf4e commit ad9c9a9

14 files changed

+289
-29
lines changed

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ def llvm_v3i32_ty : LLVMType<v3i32>; // 3 x i32
561561
def llvm_v4i32_ty : LLVMType<v4i32>; // 4 x i32
562562
def llvm_v6i32_ty : LLVMType<v6i32>; // 6 x i32
563563
def llvm_v8i32_ty : LLVMType<v8i32>; // 8 x i32
564+
def llvm_v10i32_ty : LLVMType<v10i32>; // 10 x i32
564565
def llvm_v16i32_ty : LLVMType<v16i32>; // 16 x i32
565566
def llvm_v32i32_ty : LLVMType<v32i32>; // 32 x i32
566567
def llvm_v64i32_ty : LLVMType<v64i32>; // 64 x i32
@@ -591,6 +592,7 @@ def llvm_v2f32_ty : LLVMType<v2f32>; // 2 x float
591592
def llvm_v3f32_ty : LLVMType<v3f32>; // 3 x float
592593
def llvm_v4f32_ty : LLVMType<v4f32>; // 4 x float
593594
def llvm_v8f32_ty : LLVMType<v8f32>; // 8 x float
595+
def llvm_v10f32_ty : LLVMType<v10f32>; // 10 x float
594596
def llvm_v16f32_ty : LLVMType<v16f32>; // 16 x float
595597
def llvm_v32f32_ty : LLVMType<v32f32>; // 32 x float
596598
def llvm_v1f64_ty : LLVMType<v1f64>; // 1 x double

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2801,6 +2801,17 @@ def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty
28012801
// GFX12 Intrinsics
28022802
//===----------------------------------------------------------------------===//
28032803

2804+
// <vdata>, <ray_origin>, <ray_dir>
2805+
// llvm.amdgcn.image.bvh.dual.intersect.ray <node_ptr>, <ray_extent>,
2806+
// <instance_mask>, <ray_origin>,
2807+
// <ray_dir>, <offsets>,
2808+
// <texture_descr>
2809+
def int_amdgcn_image_bvh_dual_intersect_ray :
2810+
Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
2811+
[llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
2812+
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
2813+
[IntrReadMem, IntrWillReturn]>;
2814+
28042815
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
28052816
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
28062817
Intrinsic<[llvm_i32_ty],

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3569,10 +3569,12 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
35693569

35703570
bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
35713571
MachineInstr &MI) const {
3572-
MI.setDesc(TII.get(MI.getOperand(1).getImm()));
3573-
MI.removeOperand(1);
3572+
unsigned OpcodeOpIdx =
3573+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3574+
MI.setDesc(TII.get(MI.getOperand(OpcodeOpIdx).getImm()));
3575+
MI.removeOperand(OpcodeOpIdx);
35743576
MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
3575-
return true;
3577+
return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
35763578
}
35773579

35783580
// FIXME: This should be removed and let the patterns select. We just need the
@@ -4086,6 +4088,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
40864088
assert(Intr && "not an image intrinsic with image pseudo");
40874089
return selectImageIntrinsic(I, Intr);
40884090
}
4091+
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
40894092
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
40904093
return selectBVHIntersectRayIntrinsic(I);
40914094
case AMDGPU::G_SBFX:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7183,6 +7183,61 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
71837183
return true;
71847184
}
71857185

7186+
bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
7187+
MachineIRBuilder &B) const {
7188+
const LLT S32 = LLT::scalar(32);
7189+
const LLT V2S32 = LLT::fixed_vector(2, 32);
7190+
7191+
Register DstReg = MI.getOperand(0).getReg();
7192+
Register DstOrigin = MI.getOperand(1).getReg();
7193+
Register DstDir = MI.getOperand(2).getReg();
7194+
Register NodePtr = MI.getOperand(4).getReg();
7195+
Register RayExtent = MI.getOperand(5).getReg();
7196+
Register InstanceMask = MI.getOperand(6).getReg();
7197+
Register RayOrigin = MI.getOperand(7).getReg();
7198+
Register RayDir = MI.getOperand(8).getReg();
7199+
Register Offsets = MI.getOperand(9).getReg();
7200+
Register TDescr = MI.getOperand(10).getReg();
7201+
7202+
if (!AMDGPU::isGFX12Plus(ST)) {
7203+
DiagnosticInfoUnsupported BadIntrin(B.getMF().getFunction(),
7204+
"intrinsic not supported on subtarget",
7205+
MI.getDebugLoc());
7206+
B.getMF().getFunction().getContext().diagnose(BadIntrin);
7207+
return false;
7208+
}
7209+
7210+
const unsigned NumVDataDwords = 10;
7211+
const unsigned NumVAddrDwords = 12;
7212+
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
7213+
AMDGPU::MIMGEncGfx12, NumVDataDwords,
7214+
NumVAddrDwords);
7215+
assert(Opcode != -1);
7216+
7217+
SmallVector<Register, 12> Ops;
7218+
Ops.push_back(NodePtr);
7219+
Ops.push_back(B.buildMergeLikeInstr(
7220+
V2S32, {RayExtent, B.buildAnyExt(S32, InstanceMask)})
7221+
.getReg(0));
7222+
Ops.push_back(RayOrigin);
7223+
Ops.push_back(RayDir);
7224+
Ops.push_back(Offsets);
7225+
7226+
auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
7227+
.addDef(DstReg)
7228+
.addDef(DstOrigin)
7229+
.addDef(DstDir)
7230+
.addImm(Opcode);
7231+
7232+
for (Register R : Ops)
7233+
MIB.addUse(R);
7234+
7235+
MIB.addUse(TDescr).cloneMemRefs(MI);
7236+
7237+
MI.eraseFromParent();
7238+
return true;
7239+
}
7240+
71867241
bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
71877242
MachineIRBuilder &B) const {
71887243
const SITargetLowering *TLI = ST.getTargetLowering();
@@ -7531,6 +7586,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
75317586
return legalizeRsqClampIntrinsic(MI, MRI, B);
75327587
case Intrinsic::amdgcn_image_bvh_intersect_ray:
75337588
return legalizeBVHIntersectRayIntrinsic(MI, B);
7589+
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
7590+
return legalizeBVHDualIntrinsic(MI, B);
75347591
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
75357592
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
75367593
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
208208
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
209209
MachineIRBuilder &B) const;
210210

211+
bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
212+
211213
bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
212214
Intrinsic::ID IID) const;
213215

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3217,10 +3217,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
32173217
applyMappingImage(B, MI, OpdMapper, RSrcIntrin->RsrcArg);
32183218
return;
32193219
}
3220-
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: {
3221-
unsigned N = MI.getNumExplicitOperands() - 2;
3220+
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
3221+
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
3222+
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
3223+
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
3224+
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
32223225
applyDefaultMapping(OpdMapper);
3223-
executeInWaterfallLoop(B, MI, {N});
3226+
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
32243227
return;
32253228
}
32263229
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
@@ -5010,19 +5013,33 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50105013
assert(RSrcIntrin->IsImage);
50115014
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
50125015
}
5013-
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: {
5014-
unsigned N = MI.getNumExplicitOperands() - 2;
5015-
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
5016-
OpdsMapping[N] = getSGPROpMapping(MI.getOperand(N).getReg(), MRI, *TRI);
5017-
if (N == 3) {
5016+
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
5017+
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
5018+
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
5019+
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
5020+
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
5021+
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5022+
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5023+
if (IsDual) {
5024+
OpdsMapping[1] = AMDGPU::getValueMapping(
5025+
AMDGPU::VGPRRegBankID,
5026+
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
5027+
OpdsMapping[2] = AMDGPU::getValueMapping(
5028+
AMDGPU::VGPRRegBankID,
5029+
MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
5030+
}
5031+
OpdsMapping[LastRegOpIdx] =
5032+
getSGPROpMapping(MI.getOperand(LastRegOpIdx).getReg(), MRI, *TRI);
5033+
if (LastRegOpIdx == 3) {
50185034
// Sequential form: all operands combined into VGPR256/VGPR512
50195035
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
50205036
if (Size > 256)
50215037
Size = 512;
50225038
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
50235039
} else {
50245040
// NSA form
5025-
for (unsigned I = 2; I < N; ++I) {
5041+
unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
5042+
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
50265043
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
50275044
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
50285045
}

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ DECODE_OPERAND_REG_8(VReg_128)
265265
DECODE_OPERAND_REG_8(VReg_192)
266266
DECODE_OPERAND_REG_8(VReg_256)
267267
DECODE_OPERAND_REG_8(VReg_288)
268+
DECODE_OPERAND_REG_8(VReg_320)
268269
DECODE_OPERAND_REG_8(VReg_352)
269270
DECODE_OPERAND_REG_8(VReg_384)
270271
DECODE_OPERAND_REG_8(VReg_512)

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,17 +1509,18 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
15091509
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
15101510
: MIMG_Gather<op, sample, 1>;
15111511

1512-
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
1512+
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
15131513
int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
15141514
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
15151515
int VAddrDwords = !srl(RegClass.Size, 5);
15161516

15171517
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
15181518
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
15191519
list<RegisterClass> GFX11PlusAddrTypes =
1520-
!if(IsA16,
1520+
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
1521+
!if(IsA16,
15211522
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
1522-
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
1523+
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
15231524
}
15241525

15251526
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1553,15 +1554,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
15531554
}
15541555

15551556
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
1557+
bit isDual,
15561558
list<RegisterClass> addr_types>
1557-
: VIMAGE_gfx12<op.GFX12, (outs VReg_128:$vdata),
1559+
: VIMAGE_gfx12<op.GFX12, !if(isDual,
1560+
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
1561+
VReg_96:$ray_dir_out),
1562+
(outs VReg_128:$vdata)),
15581563
num_addrs, "GFX12", addr_types> {
1559-
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc, A16:$a16));
1560-
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc$a16";
1561-
}
1562-
1563-
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
1564-
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16>;
1564+
let Constraints = !if(isDual,
1565+
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
1566+
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
1567+
!if(isDual, (ins), (ins A16:$a16)));
1568+
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
1569+
!if(isDual, "", "$a16");
1570+
let SchedRW = !if(isDual,
1571+
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
1572+
}
1573+
1574+
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
1575+
bit isDual> {
1576+
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
15651577
def "" : MIMGBaseOpcode {
15661578
let BVH = 1;
15671579
let A16 = IsA16;
@@ -1599,7 +1611,8 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
15991611
}
16001612
}
16011613
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
1602-
info.GFX11PlusAddrTypes> {
1614+
isDual, info.GFX11PlusAddrTypes> {
1615+
let VDataDwords = !if(isDual, 10, 4);
16031616
let VAddrDwords = info.num_addrs;
16041617
}
16051618
}
@@ -1771,15 +1784,18 @@ defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x80>, "ima
17711784
let OtherPredicates = [HasImageInsts, HasGFX10_AEncoding] in {
17721785
defm IMAGE_MSAA_LOAD : MIMG_MSAA_Load <mimgopc<0x18, 0x18, MIMG.NOP>, "image_msaa_load">;
17731786

1774-
defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0>;
1775-
defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1>;
1776-
defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0>;
1777-
defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1>;
1787+
defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0, 0>;
1788+
defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1, 0>;
1789+
defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0, 0>;
1790+
defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1, 0>;
17781791
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]
17791792

1793+
defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
1794+
17801795
let SubtargetPredicate = isGFX12Plus in {
17811796
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
17821797
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
1798+
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
17831799
}
17841800

17851801
} // End let OtherPredicates = [HasImageInsts]

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1386,9 +1386,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
13861386
MachineMemOperand::MOVolatile;
13871387
return true;
13881388
}
1389+
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
13891390
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
13901391
Info.opc = ISD::INTRINSIC_W_CHAIN;
1391-
Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
1392+
Info.memVT =
1393+
MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
1394+
? CI.getType()
1395+
: cast<StructType>(CI.getType())
1396+
->getElementType(0)); // XXX: what is correct VT?
13921397

13931398
Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
13941399
Info.align.reset();
@@ -9421,6 +9426,48 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94219426
Op->getVTList(), Ops, VT,
94229427
M->getMemOperand());
94239428
}
9429+
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
9430+
MemSDNode *M = cast<MemSDNode>(Op);
9431+
SDValue NodePtr = M->getOperand(2);
9432+
SDValue RayExtent = M->getOperand(3);
9433+
SDValue InstanceMask = M->getOperand(4);
9434+
SDValue RayOrigin = M->getOperand(5);
9435+
SDValue RayDir = M->getOperand(6);
9436+
SDValue Offsets = M->getOperand(7);
9437+
SDValue TDescr = M->getOperand(8);
9438+
9439+
assert(NodePtr.getValueType() == MVT::i64);
9440+
assert(RayDir.getValueType() == MVT::v3f32);
9441+
9442+
if (!AMDGPU::isGFX12Plus(*Subtarget)) {
9443+
emitRemovedIntrinsicError(DAG, DL, Op.getValueType());
9444+
return SDValue();
9445+
}
9446+
9447+
const unsigned NumVDataDwords = 10;
9448+
const unsigned NumVAddrDwords = 12;
9449+
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
9450+
AMDGPU::MIMGEncGfx12, NumVDataDwords,
9451+
NumVAddrDwords);
9452+
assert(Opcode != -1);
9453+
9454+
SmallVector<SDValue, 16> Ops;
9455+
Ops.push_back(NodePtr);
9456+
Ops.push_back(DAG.getBuildVector(
9457+
MVT::v2i32, DL,
9458+
{DAG.getBitcast(MVT::i32, RayExtent),
9459+
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, InstanceMask)}));
9460+
Ops.push_back(RayOrigin);
9461+
Ops.push_back(RayDir);
9462+
Ops.push_back(Offsets);
9463+
Ops.push_back(TDescr);
9464+
Ops.push_back(M->getChain());
9465+
9466+
auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
9467+
MachineMemOperand *MemRef = M->getMemOperand();
9468+
DAG.setNodeMemRefs(NewNode, {MemRef});
9469+
return SDValue(NewNode, 0);
9470+
}
94249471
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
94259472
MemSDNode *M = cast<MemSDNode>(Op);
94269473
SDValue NodePtr = M->getOperand(2);

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4343,7 +4343,15 @@ def G_AMDGPU_INTRIN_IMAGE_STORE_D16 : AMDGPUGenericInstruction {
43434343

43444344
def G_AMDGPU_BVH_INTERSECT_RAY : AMDGPUGenericInstruction {
43454345
let OutOperandList = (outs type0:$dst);
4346-
let InOperandList = (ins unknown:$intrin, variable_ops);
4346+
let InOperandList = (ins unknown:$opcode, variable_ops);
4347+
let hasSideEffects = 0;
4348+
let mayLoad = 1;
4349+
let mayStore = 0;
4350+
}
4351+
4352+
def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
4353+
let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
4354+
let InOperandList = (ins unknown:$opcode, variable_ops);
43474355
let hasSideEffects = 0;
43484356
let mayLoad = 1;
43494357
let mayStore = 0;

0 commit comments

Comments
 (0)