Skip to content

Commit 039e907

Browse files
committed
[AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic. llvm#130041
1 parent 9e65f10 commit 039e907

12 files changed

+171
-35
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2879,6 +2879,17 @@ def int_amdgcn_image_bvh_dual_intersect_ray :
28792879
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
28802880
[IntrReadMem, IntrWillReturn]>;
28812881

2882+
// <vdata>, <ray_origin>, <ray_dir>
2883+
// llvm.amdgcn.image.bvh8.intersect.ray <node_ptr>, <ray_extent>,
2884+
// <instance_mask>, <ray_origin>,
2885+
// <ray_dir>, <offset>,
2886+
// <texture_descr>
2887+
def int_amdgcn_image_bvh8_intersect_ray :
2888+
Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
2889+
[llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
2890+
llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty],
2891+
[IntrReadMem, IntrWillReturn]>;
2892+
28822893
def int_amdgcn_ds_bvh_stack_push4_pop1_rtn : IntDSBVHStackRtn;
28832894

28842895
def int_amdgcn_ds_bvh_stack_push8_pop1_rtn :

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4103,6 +4103,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
41034103
}
41044104
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
41054105
case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
4106+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
41064107
return selectBVHIntrinsic(I);
41074108
case AMDGPU::G_SBFX:
41084109
case AMDGPU::G_UBFX:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7173,8 +7173,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
71737173
return true;
71747174
}
71757175

7176-
bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
7177-
MachineIRBuilder &B) const {
7176+
bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
7177+
MachineInstr &MI, MachineIRBuilder &B) const {
71787178
const LLT S32 = LLT::scalar(32);
71797179
const LLT V2S32 = LLT::fixed_vector(2, 32);
71807180

@@ -7197,11 +7197,14 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
71977197
return false;
71987198
}
71997199

7200+
bool IsBVH8 = cast<GIntrinsic>(MI).getIntrinsicID() ==
7201+
Intrinsic::amdgcn_image_bvh8_intersect_ray;
72007202
const unsigned NumVDataDwords = 10;
7201-
const unsigned NumVAddrDwords = 12;
7202-
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
7203-
AMDGPU::MIMGEncGfx12, NumVDataDwords,
7204-
NumVAddrDwords);
7203+
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
7204+
int Opcode = AMDGPU::getMIMGOpcode(
7205+
IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
7206+
: AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
7207+
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
72057208
assert(Opcode != -1);
72067209

72077210
SmallVector<Register, 12> Ops;
@@ -7213,7 +7216,8 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
72137216
Ops.push_back(RayDir);
72147217
Ops.push_back(Offsets);
72157218

7216-
auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
7219+
auto MIB = B.buildInstr(IsBVH8 ? AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY
7220+
: AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
72177221
.addDef(DstReg)
72187222
.addDef(DstOrigin)
72197223
.addDef(DstDir)
@@ -7582,7 +7586,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
75827586
case Intrinsic::amdgcn_image_bvh_intersect_ray:
75837587
return legalizeBVHIntrinsic(MI, B);
75847588
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
7585-
return legalizeBVHDualIntrinsic(MI, B);
7589+
case Intrinsic::amdgcn_image_bvh8_intersect_ray:
7590+
return legalizeBVHDualOrBVH8IntersectRayIntrinsic(MI, B);
75867591
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
75877592
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
75887593
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
205205
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
206206
Intrinsic::ID IID) const;
207207

208-
bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
208+
bool legalizeBVHDualOrBVH8IntersectRayIntrinsic(MachineInstr &MI,
209+
MachineIRBuilder &B) const;
209210

210211
bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
211212
Intrinsic::ID IID) const;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3218,9 +3218,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
32183218
return;
32193219
}
32203220
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
3221+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
32213222
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
3222-
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
3223-
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
3223+
bool IsDualOrBVH8 =
3224+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
3225+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
3226+
unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
32243227
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
32253228
applyDefaultMapping(OpdMapper);
32263229
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
@@ -5017,13 +5020,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50175020
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
50185021
}
50195022
case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
5023+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
50205024
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
5021-
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
5022-
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
5025+
bool IsDualOrBVH8 =
5026+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
5027+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
5028+
unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
50235029
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
50245030
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
50255031
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5026-
if (IsDual) {
5032+
if (IsDualOrBVH8) {
50275033
OpdsMapping[1] = AMDGPU::getValueMapping(
50285034
AMDGPU::VGPRRegBankID,
50295035
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
@@ -5041,7 +5047,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50415047
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
50425048
} else {
50435049
// NSA form
5044-
unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
5050+
unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
50455051
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
50465052
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
50475053
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,18 +1509,19 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
15091509
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
15101510
: MIMG_Gather<op, sample, 1>;
15111511

1512-
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
1513-
int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
1512+
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
1513+
int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11)));
15141514
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
15151515
int VAddrDwords = !srl(RegClass.Size, 5);
15161516

15171517
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
15181518
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
15191519
list<RegisterClass> GFX11PlusAddrTypes =
1520-
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
1520+
!if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32],
1521+
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
15211522
!if(IsA16,
15221523
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
1523-
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
1524+
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96])));
15241525
}
15251526

15261527
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1554,26 +1555,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
15541555
}
15551556

15561557
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
1557-
bit isDual,
1558+
bit isDual, bit isBVH8,
15581559
list<RegisterClass> addr_types>
1559-
: VIMAGE_gfx12<op.GFX12, !if(isDual,
1560+
: VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
15601561
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
15611562
VReg_96:$ray_dir_out),
15621563
(outs VReg_128:$vdata)),
15631564
num_addrs, "GFX12", addr_types> {
1564-
let Constraints = !if(isDual,
1565+
let Constraints = !if(!or(isDual, isBVH8),
15651566
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
15661567
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
1567-
!if(isDual, (ins), (ins A16:$a16)));
1568+
!if(!or(isDual, isBVH8), (ins), (ins A16:$a16)));
15681569
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
1569-
!if(isDual, "", "$a16");
1570-
let SchedRW = !if(isDual,
1570+
!if(!or(isDual, isBVH8), "", "$a16");
1571+
let SchedRW = !if(!or(isDual, isBVH8),
15711572
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
15721573
}
15731574

15741575
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
1575-
bit isDual> {
1576-
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
1576+
bit isDual, bit isBVH8 = 0> {
1577+
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual, isBVH8>;
15771578
def "" : MIMGBaseOpcode {
15781579
let BVH = 1;
15791580
let A16 = IsA16;
@@ -1611,8 +1612,9 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
16111612
}
16121613
}
16131614
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
1614-
isDual, info.GFX11PlusAddrTypes> {
1615-
let VDataDwords = !if(isDual, 10, 4);
1615+
isDual, isBVH8,
1616+
info.GFX11PlusAddrTypes> {
1617+
let VDataDwords = !if(!or(isDual, isBVH8), 10, 4);
16161618
let VAddrDwords = info.num_addrs;
16171619
}
16181620
}
@@ -1791,11 +1793,13 @@ defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>
17911793
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]
17921794

17931795
defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
1796+
defm IMAGE_BVH8_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x81, MIMG.NOP, MIMG.NOP>, "image_bvh8_intersect_ray", 1, 0, 0, 1>;
17941797

17951798
let SubtargetPredicate = isGFX12Plus in {
17961799
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
17971800
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
17981801
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
1802+
def : AMDGPUMnemonicAlias<"bvh8_intersect_ray", "image_bvh8_intersect_ray">;
17991803
}
18001804

18011805
} // End let OtherPredicates = [HasImageInsts]

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,7 +1381,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
13811381
return true;
13821382
}
13831383
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
1384-
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
1384+
case Intrinsic::amdgcn_image_bvh_intersect_ray:
1385+
case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
13851386
Info.opc = ISD::INTRINSIC_W_CHAIN;
13861387
Info.memVT =
13871388
MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
@@ -9435,7 +9436,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94359436
Op->getVTList(), Ops, VT,
94369437
M->getMemOperand());
94379438
}
9438-
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
9439+
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
9440+
case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
94399441
MemSDNode *M = cast<MemSDNode>(Op);
94409442
SDValue NodePtr = M->getOperand(2);
94419443
SDValue RayExtent = M->getOperand(3);
@@ -9453,11 +9455,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94539455
return SDValue();
94549456
}
94559457

9458+
bool IsBVH8 = IntrID == Intrinsic::amdgcn_image_bvh8_intersect_ray;
94569459
const unsigned NumVDataDwords = 10;
9457-
const unsigned NumVAddrDwords = 12;
9458-
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
9459-
AMDGPU::MIMGEncGfx12, NumVDataDwords,
9460-
NumVAddrDwords);
9460+
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
9461+
int Opcode = AMDGPU::getMIMGOpcode(
9462+
IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
9463+
: AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
9464+
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
94619465
assert(Opcode != -1);
94629466

94639467
SmallVector<SDValue, 16> Ops;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4328,6 +4328,14 @@ def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
43284328
let mayStore = 0;
43294329
}
43304330

4331+
def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction {
4332+
let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
4333+
let InOperandList = (ins unknown:$opcode, variable_ops);
4334+
let hasSideEffects = 0;
4335+
let mayLoad = 1;
4336+
let mayStore = 0;
4337+
}
4338+
43314339
// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
43324340
// if necessary.
43334341
def G_SI_CALL : AMDGPUGenericInstruction {
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
3+
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
4+
5+
declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>)
6+
7+
define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
8+
; GFX12-SDAG-LABEL: image_bvh8_intersect_ray:
9+
; GFX12-SDAG: ; %bb.0: ; %main_body
10+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
11+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
12+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
13+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0
14+
; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
15+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
16+
; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
17+
; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
18+
; GFX12-SDAG-NEXT: ; return to shader part epilog
19+
;
20+
; GFX12-GISEL-LABEL: image_bvh8_intersect_ray:
21+
; GFX12-GISEL: ; %bb.0: ; %main_body
22+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
23+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
24+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
25+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0
26+
; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
27+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
28+
; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
29+
; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
30+
; GFX12-GISEL-NEXT: ; return to shader part epilog
31+
main_body:
32+
%ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
33+
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
34+
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
35+
%ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
36+
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
37+
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
38+
%v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
39+
%a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
40+
%r = bitcast <10 x i32> %a to <10 x float>
41+
%o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
42+
store <3 x float> %o, ptr addrspace(1) %origin
43+
%d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
44+
store <3 x float> %d, ptr addrspace(1) %dir
45+
ret <10 x float> %r
46+
}
47+
48+
define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
49+
; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1:
50+
; GFX12-SDAG: ; %bb.0: ; %main_body
51+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
52+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
53+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
54+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1
55+
; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
56+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
57+
; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
58+
; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
59+
; GFX12-SDAG-NEXT: ; return to shader part epilog
60+
;
61+
; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1:
62+
; GFX12-GISEL: ; %bb.0: ; %main_body
63+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
64+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
65+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
66+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1
67+
; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
68+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
69+
; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
70+
; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
71+
; GFX12-GISEL-NEXT: ; return to shader part epilog
72+
main_body:
73+
%ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
74+
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
75+
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
76+
%ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
77+
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
78+
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
79+
%v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 1, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
80+
%a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
81+
%r = bitcast <10 x i32> %a to <10 x float>
82+
%o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
83+
store <3 x float> %o, ptr addrspace(1) %origin
84+
%d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
85+
store <3 x float> %d, ptr addrspace(1) %dir
86+
ret <10 x float> %r
87+
}

llvm/test/MC/AMDGPU/gfx12_asm_vimage.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,9 @@ image_bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17]], s[4:7] a16
10691069
image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3]
10701070
// GFX12: encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
10711071

1072+
image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3]
1073+
// GFX12: encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
1074+
10721075
image_get_resinfo v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D
10731076
// GFX12: encoding: [0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00]
10741077

llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,6 @@ bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17], v[18:20]], s[4:7]
4444

4545
bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3]
4646
// GFX12: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] ; encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
47+
48+
bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3]
49+
// GFX12: image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] ; encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]

0 commit comments

Comments
 (0)