Skip to content

Commit 8e318f1

Browse files
kosarevmariusz-sikora-at-amd
authored andcommitted
[AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic.
1 parent 1f84495 commit 8e318f1

12 files changed

+171
-35
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2824,6 +2824,17 @@ def int_amdgcn_image_bvh_dual_intersect_ray :
28242824
llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty],
28252825
[IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
28262826

2827+
// <vdata>, <ray_origin>, <ray_dir>
2828+
// llvm.amdgcn.image.bvh8.intersect.ray <node_ptr>, <ray_extent>,
2829+
// <instance_mask>, <ray_origin>,
2830+
// <ray_dir>, <offset>,
2831+
// <texture_descr>
2832+
def int_amdgcn_image_bvh8_intersect_ray :
2833+
Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty],
2834+
[llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty,
2835+
llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty],
2836+
[IntrReadMem, IntrWillReturn]>;
2837+
28272838
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
28282839
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
28292840
Intrinsic<[llvm_i32_ty],

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4118,6 +4118,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
41184118
}
41194119
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
41204120
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4121+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
41214122
return selectBVHIntersectRayIntrinsic(I);
41224123
case AMDGPU::G_SBFX:
41234124
case AMDGPU::G_UBFX:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7198,8 +7198,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
71987198
return true;
71997199
}
72007200

7201-
bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
7202-
MachineIRBuilder &B) const {
7201+
bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
7202+
MachineInstr &MI, MachineIRBuilder &B) const {
72037203
const LLT S32 = LLT::scalar(32);
72047204
const LLT V2S32 = LLT::fixed_vector(2, 32);
72057205

@@ -7222,17 +7222,21 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI,
72227222
return false;
72237223
}
72247224

7225+
bool IsBVH8 = cast<GIntrinsic>(MI).getIntrinsicID() ==
7226+
Intrinsic::amdgcn_image_bvh8_intersect_ray;
72257227
const unsigned NumVDataDwords = 10;
7226-
const unsigned NumVAddrDwords = 12;
7227-
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
7228-
AMDGPU::MIMGEncGfx12, NumVDataDwords,
7229-
NumVAddrDwords);
7228+
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
7229+
int Opcode = AMDGPU::getMIMGOpcode(
7230+
IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
7231+
: AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
7232+
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
72307233
assert(Opcode != -1);
72317234

72327235
auto RayExtentInstanceMaskVec = B.buildMergeLikeInstr(
72337236
V2S32, {RayExtent, B.buildAnyExt(S32, InstanceMask)});
72347237

7235-
B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
7238+
B.buildInstr(IsBVH8 ? AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY
7239+
: AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY)
72367240
.addDef(DstReg)
72377241
.addDef(DstOrigin)
72387242
.addDef(DstDir)
@@ -7598,7 +7602,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
75987602
case Intrinsic::amdgcn_image_bvh_intersect_ray:
75997603
return legalizeBVHIntersectRayIntrinsic(MI, B);
76007604
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
7601-
return legalizeBVHDualIntrinsic(MI, B);
7605+
case Intrinsic::amdgcn_image_bvh8_intersect_ray:
7606+
return legalizeBVHDualOrBVH8IntersectRayIntrinsic(MI, B);
76027607
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
76037608
case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
76047609
case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
208208
bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI,
209209
MachineIRBuilder &B) const;
210210

211-
bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
211+
bool legalizeBVHDualOrBVH8IntersectRayIntrinsic(MachineInstr &MI,
212+
MachineIRBuilder &B) const;
212213

213214
bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI,
214215
Intrinsic::ID IID) const;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3240,9 +3240,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
32403240
return;
32413241
}
32423242
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
3243+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
32433244
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
3244-
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
3245-
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
3245+
bool IsDualOrBVH8 =
3246+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
3247+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
3248+
unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
32463249
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
32473250
applyDefaultMapping(OpdMapper);
32483251
executeInWaterfallLoop(B, MI, {LastRegOpIdx});
@@ -5036,13 +5039,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50365039
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
50375040
}
50385041
case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
5042+
case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
50395043
case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
5040-
bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY;
5041-
unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier
5044+
bool IsDualOrBVH8 =
5045+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
5046+
MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
5047+
unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier
50425048
unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
50435049
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
50445050
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5045-
if (IsDual) {
5051+
if (IsDualOrBVH8) {
50465052
OpdsMapping[1] = AMDGPU::getValueMapping(
50475053
AMDGPU::VGPRRegBankID,
50485054
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
@@ -5060,7 +5066,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50605066
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
50615067
} else {
50625068
// NSA form
5063-
unsigned FirstSrcOpIdx = IsDual ? 4 : 2;
5069+
unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
50645070
for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
50655071
unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
50665072
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,18 +1509,19 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
15091509
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
15101510
: MIMG_Gather<op, sample, 1>;
15111511

1512-
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> {
1513-
int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11));
1512+
class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
1513+
int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11)));
15141514
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
15151515
int VAddrDwords = !srl(RegClass.Size, 5);
15161516

15171517
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
15181518
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
15191519
list<RegisterClass> GFX11PlusAddrTypes =
1520-
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
1520+
!if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32],
1521+
!if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64],
15211522
!if(IsA16,
15221523
[node_ptr_type, VGPR_32, VReg_96, VReg_96],
1523-
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]));
1524+
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96])));
15241525
}
15251526

15261527
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1554,26 +1555,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
15541555
}
15551556

15561557
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
1557-
bit isDual,
1558+
bit isDual, bit isBVH8,
15581559
list<RegisterClass> addr_types>
1559-
: VIMAGE_gfx12<op.GFX12, !if(isDual,
1560+
: VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
15601561
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
15611562
VReg_96:$ray_dir_out),
15621563
(outs VReg_128:$vdata)),
15631564
num_addrs, "GFX12", addr_types> {
1564-
let Constraints = !if(isDual,
1565+
let Constraints = !if(!or(isDual, isBVH8),
15651566
"$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", "");
15661567
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc),
1567-
!if(isDual, (ins), (ins A16:$a16)));
1568+
!if(!or(isDual, isBVH8), (ins), (ins A16:$a16)));
15681569
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"#
1569-
!if(isDual, "", "$a16");
1570-
let SchedRW = !if(isDual,
1570+
!if(!or(isDual, isBVH8), "", "$a16");
1571+
let SchedRW = !if(!or(isDual, isBVH8),
15711572
[WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]);
15721573
}
15731574

15741575
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
1575-
bit isDual> {
1576-
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>;
1576+
bit isDual, bit isBVH8 = 0> {
1577+
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual, isBVH8>;
15771578
def "" : MIMGBaseOpcode {
15781579
let BVH = 1;
15791580
let A16 = IsA16;
@@ -1611,8 +1612,9 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16,
16111612
}
16121613
}
16131614
def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
1614-
isDual, info.GFX11PlusAddrTypes> {
1615-
let VDataDwords = !if(isDual, 10, 4);
1615+
isDual, isBVH8,
1616+
info.GFX11PlusAddrTypes> {
1617+
let VDataDwords = !if(!or(isDual, isBVH8), 10, 4);
16161618
let VAddrDwords = info.num_addrs;
16171619
}
16181620
}
@@ -1791,11 +1793,13 @@ defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>
17911793
} // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding]
17921794

17931795
defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>;
1796+
defm IMAGE_BVH8_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x81, MIMG.NOP, MIMG.NOP>, "image_bvh8_intersect_ray", 1, 0, 0, 1>;
17941797

17951798
let SubtargetPredicate = isGFX12Plus in {
17961799
def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">;
17971800
def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">;
17981801
def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">;
1802+
def : AMDGPUMnemonicAlias<"bvh8_intersect_ray", "image_bvh8_intersect_ray">;
17991803
}
18001804

18011805
} // End let OtherPredicates = [HasImageInsts]

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,7 +1387,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
13871387
return true;
13881388
}
13891389
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
1390-
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
1390+
case Intrinsic::amdgcn_image_bvh_intersect_ray:
1391+
case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
13911392
Info.opc = ISD::INTRINSIC_W_CHAIN;
13921393
Info.memVT =
13931394
MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray
@@ -9443,7 +9444,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94439444
Op->getVTList(), Ops, VT,
94449445
M->getMemOperand());
94459446
}
9446-
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: {
9447+
case Intrinsic::amdgcn_image_bvh_dual_intersect_ray:
9448+
case Intrinsic::amdgcn_image_bvh8_intersect_ray: {
94479449
MemSDNode *M = cast<MemSDNode>(Op);
94489450
SDValue NodePtr = M->getOperand(2);
94499451
SDValue RayExtent = M->getOperand(3);
@@ -9461,11 +9463,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94619463
return SDValue();
94629464
}
94639465

9466+
bool IsBVH8 = IntrID == Intrinsic::amdgcn_image_bvh8_intersect_ray;
94649467
const unsigned NumVDataDwords = 10;
9465-
const unsigned NumVAddrDwords = 12;
9466-
int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
9467-
AMDGPU::MIMGEncGfx12, NumVDataDwords,
9468-
NumVAddrDwords);
9468+
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
9469+
int Opcode = AMDGPU::getMIMGOpcode(
9470+
IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
9471+
: AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
9472+
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
94699473
assert(Opcode != -1);
94709474

94719475
SmallVector<SDValue, 7> Ops;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4376,6 +4376,14 @@ def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction {
43764376
let mayStore = 0;
43774377
}
43784378

4379+
def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction {
4380+
let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir);
4381+
let InOperandList = (ins unknown:$opcode, variable_ops);
4382+
let hasSideEffects = 0;
4383+
let mayLoad = 1;
4384+
let mayStore = 0;
4385+
}
4386+
43794387
// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
43804388
// if necessary.
43814389
def G_SI_CALL : AMDGPUGenericInstruction {
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
3+
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
4+
5+
declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>)
6+
7+
define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
8+
; GFX12-SDAG-LABEL: image_bvh8_intersect_ray:
9+
; GFX12-SDAG: ; %bb.0: ; %main_body
10+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
11+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
12+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
13+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0
14+
; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
15+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
16+
; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
17+
; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
18+
; GFX12-SDAG-NEXT: ; return to shader part epilog
19+
;
20+
; GFX12-GISEL-LABEL: image_bvh8_intersect_ray:
21+
; GFX12-GISEL: ; %bb.0: ; %main_body
22+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
23+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
24+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
25+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0
26+
; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
27+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
28+
; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
29+
; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
30+
; GFX12-GISEL-NEXT: ; return to shader part epilog
31+
main_body:
32+
%ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
33+
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
34+
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
35+
%ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
36+
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
37+
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
38+
%v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
39+
%a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
40+
%r = bitcast <10 x i32> %a to <10 x float>
41+
%o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
42+
store <3 x float> %o, ptr addrspace(1) %origin
43+
%d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
44+
store <3 x float> %d, ptr addrspace(1) %dir
45+
ret <10 x float> %r
46+
}
47+
48+
define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
49+
; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1:
50+
; GFX12-SDAG: ; %bb.0: ; %main_body
51+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
52+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
53+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
54+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1
55+
; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
56+
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
57+
; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
58+
; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
59+
; GFX12-SDAG-NEXT: ; return to shader part epilog
60+
;
61+
; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1:
62+
; GFX12-GISEL: ; %bb.0: ; %main_body
63+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
64+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
65+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
66+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1
67+
; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
68+
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
69+
; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
70+
; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
71+
; GFX12-GISEL-NEXT: ; return to shader part epilog
72+
main_body:
73+
%ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
74+
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
75+
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
76+
%ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
77+
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
78+
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
79+
%v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 1, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
80+
%a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
81+
%r = bitcast <10 x i32> %a to <10 x float>
82+
%o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
83+
store <3 x float> %o, ptr addrspace(1) %origin
84+
%d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
85+
store <3 x float> %d, ptr addrspace(1) %dir
86+
ret <10 x float> %r
87+
}

llvm/test/MC/AMDGPU/gfx12_asm_vimage.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,9 @@ image_bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17]], s[4:7] a16
10691069
image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3]
10701070
// GFX12: encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
10711071

1072+
image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3]
1073+
// GFX12: encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
1074+
10721075
image_get_resinfo v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D
10731076
// GFX12: encoding: [0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00]
10741077

llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,6 @@ bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17], v[18:20]], s[4:7]
4444

4545
bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3]
4646
// GFX12: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] ; encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]
47+
48+
bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3]
49+
// GFX12: image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] ; encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06]

0 commit comments

Comments
 (0)