Skip to content

Commit 39baef8

Browse files
kosarevmariusz-sikora-at-amd
authored andcommitted
[AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic.
1 parent ad9c9a9 commit 39baef8

12 files changed

+171
-35
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2812,6 +2812,17 @@ def int_amdgcn_image_bvh_dual_intersect_ray :
28122812
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
28132813
[IntrReadMem, IntrWillReturn]>;
28142814

2815+
// <vdata>, <ray_origin>, <ray_dir>
2816+
// llvm.amdgcn.image.bvh8.intersect.ray <node_ptr>, <ray_extent>,
2817+
// <instance_mask>, <ray_origin>,
2818+
// <ray_dir>, <offset>,
2819+
// <texture_descr>
2820+
def int_amdgcn_image_bvh8_intersect_ray :
2821+
Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
2822+
[llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
2823+
llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty],
2824+
[IntrReadMem, IntrWillReturn]>;
2825+
28152826
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
28162827
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
28172828
Intrinsic<[llvm_i32_ty],

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4090,6 +4090,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
40904090
}
40914091
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
40924092
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4093+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
40934094
return selectBVHIntersectRayIntrinsic(I);
40944095
case AMDGPU::G_SBFX:
40954096
case AMDGPU::G_UBFX:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7183,8 +7183,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
71837183
return true;
71847184
}
71857185

7186-
bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
7187-
MachineIRBuilder &B) const {
7186+
bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
7187+
MachineInstr &MI, MachineIRBuilder &B) const {
71887188
const LLT S32 = LLT::scalar(32);
71897189
const LLT V2S32 = LLT::fixed_vector(2, 32);
71907190

@@ -7207,11 +7207,14 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
72077207
return false;
72087208
}
72097209

7210+
bool IsBVH8 = cast<GIntrinsic>(MI).getIntrinsicID() ==
7211+
Intrinsic::amdgcn_image_bvh8_intersect_ray;
72107212
const unsigned NumVDataDwords = 10;
7211-
const unsigned NumVAddrDwords = 12;
7212-
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
7213-
AMDGPU::MIMGEncGfx12, NumVDataDwords,
7214-
NumVAddrDwords);
7213+
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
7214+
int Opcode = AMDGPU::getMIMGOpcode(
7215+
IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
7216+
: AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
7217+
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
72157218
assert(Opcode != -1);
72167219

72177220
SmallVector<Register, 12> Ops;
@@ -7223,7 +7226,8 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
72237226
Ops.push_back(RayDir);
72247227
Ops.push_back(Offsets);
72257228

7226-
auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
7229+
auto MIB = B.buildInstr(IsBVH8 ? AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY
7230+
: AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
72277231
.addDef(DstReg)
72287232
.addDef(DstOrigin)
72297233
.addDef(DstDir)
@@ -7587,7 +7591,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
75877591
case Intrinsic::amdgcn_image_bvh_intersect_ray:
75887592
return legalizeBVHIntersectRayIntrinsic(MI, B);
75897593
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
7590-
return legalizeBVHDualIntrinsic(MI, B);
7594+
case Intrinsic::amdgcn_image_bvh8_intersect_ray:
7595+
return legalizeBVHDualOrBVH8IntersectRayIntrinsic(MI, B);
75917596
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
75927597
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
75937598
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
208208
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
209209
MachineIRBuilder &B) const;
210210

211-
bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
211+
bool legalizeBVHDualOrBVH8IntersectRayIntrinsic(MachineInstr &MI,
212+
MachineIRBuilder &B) const;
212213

213214
bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
214215
Intrinsic::ID IID) const;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3218,9 +3218,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
32183218
return;
32193219
}
32203220
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
3221+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
32213222
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
3222-
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
3223-
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
3223+
bool IsDualOrBVH8 =
3224+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
3225+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
3226+
unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
32243227
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
32253228
applyDefaultMapping(OpdMapper);
32263229
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
@@ -5014,13 +5017,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50145017
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
50155018
}
50165019
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
5020+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
50175021
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
5018-
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
5019-
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
5022+
bool IsDualOrBVH8 =
5023+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
5024+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
5025+
unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
50205026
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
50215027
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
50225028
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5023-
if (IsDual) {
5029+
if (IsDualOrBVH8) {
50245030
OpdsMapping[1] = AMDGPU::getValueMapping(
50255031
AMDGPU::VGPRRegBankID,
50265032
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
@@ -5038,7 +5044,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50385044
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
50395045
} else {
50405046
// NSA form
5041-
unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
5047+
unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
50425048
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
50435049
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
50445050
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,18 +1509,19 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
15091509
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
15101510
: MIMG_Gather<op, sample, 1>;
15111511

1512-
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
1513-
int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
1512+
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
1513+
int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11)));
15141514
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
15151515
int VAddrDwords = !srl(RegClass.Size, 5);
15161516

15171517
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
15181518
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
15191519
list<RegisterClass> GFX11PlusAddrTypes =
1520-
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
1520+
!if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32],
1521+
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
15211522
!if(IsA16,
15221523
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
1523-
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
1524+
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96])));
15241525
}
15251526

15261527
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1554,26 +1555,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
15541555
}
15551556

15561557
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
1557-
bit isDual,
1558+
bit isDual, bit isBVH8,
15581559
list<RegisterClass> addr_types>
1559-
: VIMAGE_gfx12<op.GFX12, !if(isDual,
1560+
: VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
15601561
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
15611562
VReg_96:$ray_dir_out),
15621563
(outs VReg_128:$vdata)),
15631564
num_addrs, "GFX12", addr_types> {
1564-
let Constraints = !if(isDual,
1565+
let Constraints = !if(!or(isDual, isBVH8),
15651566
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
15661567
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
1567-
!if(isDual, (ins), (ins A16:$a16)));
1568+
!if(!or(isDual, isBVH8), (ins), (ins A16:$a16)));
15681569
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
1569-
!if(isDual, "", "$a16");
1570-
let SchedRW = !if(isDual,
1570+
!if(!or(isDual, isBVH8), "", "$a16");
1571+
let SchedRW = !if(!or(isDual, isBVH8),
15711572
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
15721573
}
15731574

15741575
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
1575-
bit isDual> {
1576-
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
1576+
bit isDual, bit isBVH8 = 0> {
1577+
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual, isBVH8>;
15771578
def "" : MIMGBaseOpcode {
15781579
let BVH = 1;
15791580
let A16 = IsA16;
@@ -1611,8 +1612,9 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
16111612
}
16121613
}
16131614
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
1614-
isDual, info.GFX11PlusAddrTypes> {
1615-
let VDataDwords = !if(isDual, 10, 4);
1615+
isDual, isBVH8,
1616+
info.GFX11PlusAddrTypes> {
1617+
let VDataDwords = !if(!or(isDual, isBVH8), 10, 4);
16161618
let VAddrDwords = info.num_addrs;
16171619
}
16181620
}
@@ -1791,11 +1793,13 @@ defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>
17911793
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]
17921794

17931795
defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
1796+
defm IMAGE_BVH8_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x81, MIMG.NOP, MIMG.NOP>, "image_bvh8_intersect_ray", 1, 0, 0, 1>;
17941797

17951798
let SubtargetPredicate = isGFX12Plus in {
17961799
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
17971800
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
17981801
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
1802+
def : AMDGPUMnemonicAlias<"bvh8_intersect_ray", "image_bvh8_intersect_ray">;
17991803
}
18001804

18011805
} // End let OtherPredicates = [HasImageInsts]

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,7 +1387,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
13871387
return true;
13881388
}
13891389
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
1390-
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
1390+
case Intrinsic::amdgcn_image_bvh_intersect_ray:
1391+
case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
13911392
Info.opc = ISD::INTRINSIC_W_CHAIN;
13921393
Info.memVT =
13931394
MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
@@ -9426,7 +9427,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94269427
Op->getVTList(), Ops, VT,
94279428
M->getMemOperand());
94289429
}
9429-
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
9430+
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
9431+
case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
94309432
MemSDNode *M = cast<MemSDNode>(Op);
94319433
SDValue NodePtr = M->getOperand(2);
94329434
SDValue RayExtent = M->getOperand(3);
@@ -9444,11 +9446,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94449446
return SDValue();
94459447
}
94469448

9449+
bool IsBVH8 = IntrID == Intrinsic::amdgcn_image_bvh8_intersect_ray;
94479450
const unsigned NumVDataDwords = 10;
9448-
const unsigned NumVAddrDwords = 12;
9449-
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
9450-
AMDGPU::MIMGEncGfx12, NumVDataDwords,
9451-
NumVAddrDwords);
9451+
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
9452+
int Opcode = AMDGPU::getMIMGOpcode(
9453+
IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
9454+
: AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
9455+
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
94529456
assert(Opcode != -1);
94539457

94549458
SmallVector<SDValue, 16> Ops;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4357,6 +4357,14 @@ def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
43574357
let mayStore = 0;
43584358
}
43594359

4360+
def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction {
4361+
let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
4362+
let InOperandList = (ins unknown:$opcode, variable_ops);
4363+
let hasSideEffects = 0;
4364+
let mayLoad = 1;
4365+
let mayStore = 0;
4366+
}
4367+
43604368
// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
43614369
// if necessary.
43624370
def G_SI_CALL : AMDGPUGenericInstruction {
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
3+
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
4+
5+
declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>)
6+
7+
define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
8+
; GFX12-SDAG-LABEL: image_bvh8_intersect_ray:
9+
; GFX12-SDAG: ; %bb.0: ; %main_body
10+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
11+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
12+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
13+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0
14+
; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
15+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
16+
; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
17+
; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
18+
; GFX12-SDAG-NEXT: ; return to shader part epilog
19+
;
20+
; GFX12-GISEL-LABEL: image_bvh8_intersect_ray:
21+
; GFX12-GISEL: ; %bb.0: ; %main_body
22+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
23+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
24+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
25+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0
26+
; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
27+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
28+
; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
29+
; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
30+
; GFX12-GISEL-NEXT: ; return to shader part epilog
31+
main_body:
32+
%ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
33+
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
34+
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
35+
%ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
36+
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
37+
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
38+
%v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
39+
%a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
40+
%r = bitcast <10 x i32> %a to <10 x float>
41+
%o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
42+
store <3 x float> %o, ptr addrspace(1) %origin
43+
%d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
44+
store <3 x float> %d, ptr addrspace(1) %dir
45+
ret <10 x float> %r
46+
}
47+
48+
define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
49+
; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1:
50+
; GFX12-SDAG: ; %bb.0: ; %main_body
51+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
52+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
53+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
54+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1
55+
; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
56+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
57+
; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
58+
; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
59+
; GFX12-SDAG-NEXT: ; return to shader part epilog
60+
;
61+
; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1:
62+
; GFX12-GISEL: ; %bb.0: ; %main_body
63+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
64+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
65+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
66+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1
67+
; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
68+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
69+
; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
70+
; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
71+
; GFX12-GISEL-NEXT: ; return to shader part epilog
72+
main_body:
73+
%ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
74+
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
75+
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
76+
%ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
77+
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
78+
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
79+
%v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 1, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
80+
%a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
81+
%r = bitcast <10 x i32> %a to <10 x float>
82+
%o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
83+
store <3 x float> %o, ptr addrspace(1) %origin
84+
%d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
85+
store <3 x float> %d, ptr addrspace(1) %dir
86+
ret <10 x float> %r
87+
}

llvm/test/MC/AMDGPU/gfx12_asm_vimage.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,9 @@ image_bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17]], s[4:7] a16
10691069
image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3]
10701070
// GFX12: encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
10711071

1072+
image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3]
1073+
// GFX12: encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
1074+
10721075
image_get_resinfo v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D
10731076
// GFX12: encoding: [0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00]
10741077

llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,6 @@ bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17], v[18:20]], s[4:7]
4444

4545
bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3]
4646
// GFX12: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] ; encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
47+
48+
bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3]
49+
// GFX12: image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] ; encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]

0 commit comments

Comments
 (0)