Skip to content

Commit 2774bad

Browse files
committed
[AMDGPU] Change llvm.amdgcn.image.bvh.intersect.ray to take vec3 args
The ray_origin, ray_dir and ray_inv_dir arguments should all be vec3 to match how the hardware instruction works. Don't change the API of the corresponding OpenCL builtins. Differential Revision: https://reviews.llvm.org/D115032
1 parent c8e84c7 commit 2774bad

File tree

7 files changed

+377
-398
lines changed

7 files changed

+377
-398
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16592,6 +16592,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1659216592
llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
1659316593
llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
1659416594

16595+
// The builtins take these arguments as vec4 where the last element is
16596+
// ignored. The intrinsic takes them as vec3.
16597+
RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
16598+
ArrayRef<int>{0, 1, 2});
16599+
RayDir =
16600+
Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
16601+
RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
16602+
ArrayRef<int>{0, 1, 2});
16603+
1659516604
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
1659616605
{NodePtr->getType(), RayDir->getType()});
1659716606
return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,

clang/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ typedef double double4 __attribute__((ext_vector_type(4)));
1919
typedef half half4 __attribute__((ext_vector_type(4)));
2020
typedef uint uint4 __attribute__((ext_vector_type(4)));
2121

22-
// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32
22+
// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v3f32
2323
// ISA: image_bvh_intersect_ray
2424
void test_image_bvh_intersect_ray(global uint4* out, uint node_ptr,
2525
float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir,
@@ -29,7 +29,7 @@ void test_image_bvh_intersect_ray(global uint4* out, uint node_ptr,
2929
ray_origin, ray_dir, ray_inv_dir, texture_descr);
3030
}
3131

32-
// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16
32+
// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v3f16
3333
// ISA: image_bvh_intersect_ray
3434
void test_image_bvh_intersect_ray_h(global uint4* out, uint node_ptr,
3535
float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir,
@@ -39,7 +39,7 @@ void test_image_bvh_intersect_ray_h(global uint4* out, uint node_ptr,
3939
ray_origin, ray_dir, ray_inv_dir, texture_descr);
4040
}
4141

42-
// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32
42+
// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v3f32
4343
// ISA: image_bvh_intersect_ray
4444
void test_image_bvh_intersect_ray_l(global uint4* out, ulong node_ptr,
4545
float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir,
@@ -49,7 +49,7 @@ void test_image_bvh_intersect_ray_l(global uint4* out, ulong node_ptr,
4949
ray_origin, ray_dir, ray_inv_dir, texture_descr);
5050
}
5151

52-
// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16
52+
// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v3f16
5353
// ISA: image_bvh_intersect_ray
5454
void test_image_bvh_intersect_ray_lh(global uint4* out, ulong node_ptr,
5555
float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir,

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1789,9 +1789,11 @@ def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>;
17891789

17901790
// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
17911791
// <ray_dir>, <ray_inv_dir>, <texture_descr>
1792+
// <node_ptr> is i32 or i64.
1793+
// <ray_dir> and <ray_inv_dir> are both v3f16 or both v3f32.
17921794
def int_amdgcn_image_bvh_intersect_ray :
17931795
Intrinsic<[llvm_v4i32_ty],
1794-
[llvm_anyint_ty, llvm_float_ty, llvm_v4f32_ty, llvm_anyvector_ty,
1796+
[llvm_anyint_ty, llvm_float_ty, llvm_v3f32_ty, llvm_anyvector_ty,
17951797
LLVMMatchType<1>, llvm_v4i32_ty],
17961798
[IntrReadMem, IntrWillReturn]>;
17971799

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4869,17 +4869,17 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
48694869
}
48704870
Ops.push_back(RayExtent);
48714871

4872-
auto packLanes = [&Ops, &S32, &B] (Register Src) {
4873-
auto Unmerge = B.buildUnmerge({S32, S32, S32, S32}, Src);
4872+
auto packLanes = [&Ops, &S32, &B](Register Src) {
4873+
auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
48744874
Ops.push_back(Unmerge.getReg(0));
48754875
Ops.push_back(Unmerge.getReg(1));
48764876
Ops.push_back(Unmerge.getReg(2));
48774877
};
48784878

48794879
packLanes(RayOrigin);
48804880
if (IsA16) {
4881-
auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16, S16}, RayDir);
4882-
auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16, S16}, RayInvDir);
4881+
auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir);
4882+
auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir);
48834883
Register R1 = MRI.createGenericVirtualRegister(S32);
48844884
Register R2 = MRI.createGenericVirtualRegister(S32);
48854885
Register R3 = MRI.createGenericVirtualRegister(S32);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7503,8 +7503,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
75037503

75047504
assert(NodePtr.getValueType() == MVT::i32 ||
75057505
NodePtr.getValueType() == MVT::i64);
7506-
assert(RayDir.getValueType() == MVT::v4f16 ||
7507-
RayDir.getValueType() == MVT::v4f32);
7506+
assert(RayDir.getValueType() == MVT::v3f16 ||
7507+
RayDir.getValueType() == MVT::v3f32);
75087508

75097509
if (!Subtarget->hasGFX10_AEncoding()) {
75107510
emitRemovedIntrinsicError(DAG, DL, Op.getValueType());

0 commit comments

Comments
 (0)