|
3 | 3 | // RUN: -emit-llvm -cl-std=CL2.0 -o - %s | FileCheck %s
|
4 | 4 | // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 -S \
|
5 | 5 | // RUN: -cl-std=CL2.0 -o - %s | FileCheck -check-prefix=ISA %s
|
| 6 | +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm \ |
| 7 | +// RUN: -cl-std=CL2.0 -o - %s | FileCheck -check-prefix=GFX12 %s |
| 8 | +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S \ |
| 9 | +// RUN: -cl-std=CL2.0 -o - %s | FileCheck -check-prefix=GFX12ISA %s |
6 | 10 |
|
7 | 11 | // Test llvm.amdgcn.image.bvh.intersect.ray intrinsic.
|
8 | 12 |
|
|
12 | 16 | // Postfix l indicates the 1st argument is i64 and postfix h indicates
|
13 | 17 | // the 4/5-th arguments are half4.
|
14 | 18 |
|
| 19 | +typedef unsigned char uchar; |
15 | 20 | typedef unsigned int uint;
|
16 | 21 | typedef unsigned long ulong;
|
| 22 | +typedef float float3 __attribute__((ext_vector_type(3))); |
17 | 23 | typedef float float4 __attribute__((ext_vector_type(4)));
|
18 | 24 | typedef double double4 __attribute__((ext_vector_type(4)));
|
19 | 25 | typedef half half4 __attribute__((ext_vector_type(4)));
|
| 26 | +typedef uint uint2 __attribute__((ext_vector_type(2))); |
20 | 27 | typedef uint uint4 __attribute__((ext_vector_type(4)));
|
| 28 | +typedef uint uint8 __attribute__((ext_vector_type(8))); |
| 29 | +typedef uint uint10 __attribute__((ext_vector_type(10))); |
| 30 | +typedef ulong ulong2 __attribute__((ext_vector_type(2))); |
21 | 31 |
|
22 | 32 | // CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v3f32
|
23 | 33 | // ISA: image_bvh_intersect_ray
|
@@ -59,3 +69,71 @@ void test_image_bvh_intersect_ray_lh(global uint4* out, ulong node_ptr,
|
59 | 69 | ray_origin, ray_dir, ray_inv_dir, texture_descr);
|
60 | 70 | }
|
61 | 71 |
|
| 72 | +#if __has_builtin(__builtin_amdgcn_image_bvh8_intersect_ray) |
| 73 | +// GFX12: call { <10 x i32>, <3 x float>, <3 x float> } @llvm.amdgcn.image.bvh8.intersect.ray( |
| 74 | +// GFX12: i64 %node_ptr, float %ray_extent, i8 %instance_mask, <3 x float> %ray_origin, |
| 75 | +// GFX12: <3 x float> %ray_dir, i32 %offset, <4 x i32> %texture_descr) |
| 76 | +// GFX12ISA: image_bvh8_intersect_ray |
| 77 | +void test_image_bvh8_intersect_ray(global uint10* ret_vdata, float3* ret_ray_origin, |
| 78 | + float3* ret_ray_dir, ulong node_ptr, float ray_extent, uchar instance_mask, |
| 79 | + float3 ray_origin, float3 ray_dir, uint offset, uint4 texture_descr) |
| 80 | +{ |
| 81 | + *ret_vdata = __builtin_amdgcn_image_bvh8_intersect_ray(node_ptr, ray_extent, |
| 82 | + instance_mask, ray_origin, ray_dir, offset, texture_descr, |
| 83 | + ret_ray_origin, ret_ray_dir); |
| 84 | +} |
| 85 | +#endif |
| 86 | + |
| 87 | +#if __has_builtin(__builtin_amdgcn_image_bvh_dual_intersect_ray) |
| 88 | +// GFX12: call { <10 x i32>, <3 x float>, <3 x float> } @llvm.amdgcn.image.bvh.dual.intersect.ray( |
| 89 | +// GFX12: i64 %node_ptr, float %ray_extent, i8 %instance_mask, <3 x float> %ray_origin, |
| 90 | +// GFX12: <3 x float> %ray_dir, <2 x i32> %offset, <4 x i32> %texture_descr) |
| 91 | +// GFX12ISA: image_bvh_dual_intersect_ray |
| 92 | +void test_builtin_amdgcn_image_bvh_dual_intersect_ray(global uint10* ret_vdata, float3* ret_ray_origin, |
| 93 | + float3* ret_ray_dir, ulong node_ptr, float ray_extent, uchar instance_mask, |
| 94 | + float3 ray_origin, float3 ray_dir, uint2 offset, uint4 texture_descr) |
| 95 | +{ |
| 96 | + *ret_vdata = __builtin_amdgcn_image_bvh_dual_intersect_ray(node_ptr, ray_extent, |
| 97 | + instance_mask, ray_origin, ray_dir, offset, texture_descr, |
| 98 | + ret_ray_origin, ret_ray_dir); |
| 99 | +} |
| 100 | +#endif |
| 101 | + |
| 102 | +#if __has_builtin(__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn) |
| 103 | +// GFX12: call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.push4.pop1.rtn( |
| 104 | +// GFX12: i32 %addr, i32 %data0, <4 x i32> %data1, i32 0) |
| 105 | +// GFX12ISA: ds_bvh_stack_push4_pop1_rtn |
| 106 | +void test_builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn(uint* ret_vdst, uint* ret_addr, |
| 107 | + uint addr, uint data0, uint4 data1) |
| 108 | +{ |
| 109 | + uint2 ret = __builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn(addr, data0, data1, /*constant offset=*/0); |
| 110 | + *ret_vdst = ret.x; |
| 111 | + *ret_addr = ret.y; |
| 112 | +} |
| 113 | +#endif |
| 114 | + |
| 115 | +#if __has_builtin(__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn) |
| 116 | +// GFX12: call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.push8.pop1.rtn( |
| 117 | +// GFX12: i32 %addr, i32 %data0, <8 x i32> %data1, i32 0) |
| 118 | +// GFX12ISA: ds_bvh_stack_push8_pop1_rtn |
| 119 | +void test_builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn(uint* ret_vdst, uint* ret_addr, |
| 120 | + uint addr, uint data0, uint8 data1) |
| 121 | +{ |
| 122 | + uint2 ret = __builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn(addr, data0, data1, /*constant offset=*/0); |
| 123 | + *ret_vdst = ret.x; |
| 124 | + *ret_addr = ret.y; |
| 125 | +} |
| 126 | +#endif |
| 127 | + |
| 128 | +#if __has_builtin(__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn) |
| 129 | +// GFX12: call { i64, i32 } @llvm.amdgcn.ds.bvh.stack.push8.pop2.rtn( |
| 130 | +// GFX12: i32 %addr, i32 %data0, <8 x i32> %data1, i32 0) |
| 131 | +// GFX12ISA: ds_bvh_stack_push8_pop2_rtn |
| 132 | +void test_builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn(ulong* ret_vdst, uint* ret_addr, |
| 133 | + uint addr, uint data0, uint8 data1) |
| 134 | +{ |
| 135 | + ulong2 ret = __builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn(addr, data0, data1, /*constant offset=*/0); |
| 136 | + *ret_vdst = ret.x; |
| 137 | + *ret_addr = ret.y; |
| 138 | +} |
| 139 | +#endif |
0 commit comments