Skip to content

Commit e3d9f75

Browse files
ficoligcbot
authored andcommitted
Refactor OCL raytracing builtins
refactor OCL raytracing builtins by adding helpers to access RTStack fields.
1 parent dc26dac commit e3d9f75

File tree

3 files changed

+99
-78
lines changed

3 files changed

+99
-78
lines changed

IGC/BiFModule/Languages/OpenCL/Raytracing/IBiF_intel_rt_production.cl

Lines changed: 85 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ SPDX-License-Identifier: MIT
1212
#if defined(cl_intel_rt_production)
1313

1414
void __basic_rtstack_init(
15-
global RTStack* rtStack,
15+
global void* rtStack,
1616
global HWAccel* hwaccel,
1717
intel_float3 origin,
1818
intel_float3 direction,
@@ -23,38 +23,41 @@ void __basic_rtstack_init(
2323
{
2424
unsigned int bvh_level = 0;
2525
/* init ray */
26-
rtStack->ray[bvh_level].org[0] = origin.x;
27-
rtStack->ray[bvh_level].org[1] = origin.y;
28-
rtStack->ray[bvh_level].org[2] = origin.z;
29-
rtStack->ray[bvh_level].dir[0] = direction.x;
30-
rtStack->ray[bvh_level].dir[1] = direction.y;
31-
rtStack->ray[bvh_level].dir[2] = direction.z;
32-
rtStack->ray[bvh_level].tnear = tmin;
33-
rtStack->ray[bvh_level].tfar = tmax;
34-
35-
rtStack->ray[bvh_level].data[1] = 0;
36-
rtStack->ray[bvh_level].data[2] = 0;
37-
rtStack->ray[bvh_level].data[3] = 0;
38-
39-
MemRay_setRootNodePtr(&rtStack->ray[bvh_level], (ulong)hwaccel + 128);
40-
MemRay_setRayFlags(&rtStack->ray[bvh_level], flags);
41-
MemRay_setRayMask(&rtStack->ray[bvh_level], mask);
42-
43-
MemHit_clearUV(&rtStack->hit[COMMITTED]);
44-
rtStack->hit[COMMITTED].t = INFINITY;
45-
rtStack->hit[COMMITTED].data0 = 0;
46-
MemHit_setValid(&rtStack->hit[COMMITTED], 0);
47-
MemHit_setDone(&rtStack->hit[COMMITTED], 0);
48-
49-
MemHit_clearUV(&rtStack->hit[POTENTIAL]);
50-
rtStack->hit[POTENTIAL].t = INFINITY;
51-
rtStack->hit[POTENTIAL].data0 = 0;
52-
MemHit_setValid(&rtStack->hit[POTENTIAL], 1);
53-
MemHit_setDone(&rtStack->hit[POTENTIAL], 1);
26+
MemRay* memRay = get_rt_stack_ray(rtStack, bvh_level);
27+
memRay->org[0] = origin.x;
28+
memRay->org[1] = origin.y;
29+
memRay->org[2] = origin.z;
30+
memRay->dir[0] = direction.x;
31+
memRay->dir[1] = direction.y;
32+
memRay->dir[2] = direction.z;
33+
memRay->tnear = tmin;
34+
memRay->tfar = tmax;
35+
36+
memRay->data[1] = 0;
37+
memRay->data[2] = 0;
38+
memRay->data[3] = 0;
39+
40+
MemRay_setRootNodePtr(memRay, (ulong)hwaccel + 128);
41+
MemRay_setRayFlags(memRay, flags);
42+
MemRay_setRayMask(memRay, mask);
43+
44+
MemHit* commitedHit = get_rt_stack_hit(rtStack, intel_hit_type_committed_hit);
45+
MemHit_clearUV(commitedHit);
46+
commitedHit->t = INFINITY;
47+
commitedHit->data0 = 0;
48+
MemHit_setValid(commitedHit, 0);
49+
MemHit_setDone(commitedHit, 0);
50+
51+
MemHit* potentialHit = get_rt_stack_hit(rtStack, intel_hit_type_potential_hit);
52+
MemHit_clearUV(potentialHit);
53+
potentialHit->t = INFINITY;
54+
potentialHit->data0 = 0;
55+
MemHit_setValid(potentialHit, 1);
56+
MemHit_setDone(potentialHit, 1);
5457
}
5558

5659
void __basic_ray_forward(
57-
global RTStack* rtStack,
60+
global void* rtStack,
5861
HWAccel* hwaccel,
5962
uint bvhLevel,
6063
intel_float3 origin,
@@ -64,22 +67,23 @@ void __basic_ray_forward(
6467
uint mask,
6568
intel_ray_flags_t flags)
6669
{
67-
rtStack->ray[bvhLevel].org[0] = origin.x;
68-
rtStack->ray[bvhLevel].org[1] = origin.y;
69-
rtStack->ray[bvhLevel].org[2] = origin.z;
70-
rtStack->ray[bvhLevel].dir[0] = direction.x;
71-
rtStack->ray[bvhLevel].dir[1] = direction.y;
72-
rtStack->ray[bvhLevel].dir[2] = direction.z;
73-
rtStack->ray[bvhLevel].tnear = tmin;
74-
rtStack->ray[bvhLevel].tfar = tmax;
75-
76-
rtStack->ray[bvhLevel].data[1] = 0;
77-
rtStack->ray[bvhLevel].data[2] = 0;
78-
rtStack->ray[bvhLevel].data[3] = 0;
79-
80-
MemRay_setRootNodePtr(&rtStack->ray[bvhLevel], (ulong)hwaccel + 128);
81-
MemRay_setRayFlags(&rtStack->ray[bvhLevel], flags);
82-
MemRay_setRayMask(&rtStack->ray[bvhLevel], mask);
70+
MemRay* memRay = get_rt_stack_ray(rtStack, bvhLevel);
71+
memRay->org[0] = origin.x;
72+
memRay->org[1] = origin.y;
73+
memRay->org[2] = origin.z;
74+
memRay->dir[0] = direction.x;
75+
memRay->dir[1] = direction.y;
76+
memRay->dir[2] = direction.z;
77+
memRay->tnear = tmin;
78+
memRay->tfar = tmax;
79+
80+
memRay->data[1] = 0;
81+
memRay->data[2] = 0;
82+
memRay->data[3] = 0;
83+
84+
MemRay_setRootNodePtr(memRay, (ulong)hwaccel + 128);
85+
MemRay_setRayFlags(memRay, flags);
86+
MemRay_setRayMask(memRay, mask);
8387
}
8488

8589
typedef enum
@@ -97,8 +101,7 @@ intel_ray_query_t intel_ray_query_init(
97101
{
98102
global HWAccel* hwaccel = to_global((HWAccel*)accel);
99103
rtglobals_t dispatchGlobalsPtr = (rtglobals_t) __getImplicitDispatchGlobals();
100-
global RTStack* rtStack =
101-
to_global((RTStack*)__builtin_IB_intel_get_rt_stack(dispatchGlobalsPtr));
104+
global void* rtStack = to_global(__builtin_IB_intel_get_rt_stack(dispatchGlobalsPtr));
102105

103106
__basic_rtstack_init(rtStack, hwaccel, ray.origin, ray.direction, ray.tmin, ray.tmax, ray.mask, ray.flags);
104107

@@ -119,7 +122,7 @@ void intel_ray_query_forward_ray(
119122
intel_raytracing_acceleration_structure_t accel_i)
120123
{
121124
HWAccel* hwaccel = (HWAccel*)accel_i;
122-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
125+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
123126

124127
/* init ray */
125128
uint bvh_level = __builtin_IB_intel_query_bvh_level(rayquery) + 1;
@@ -139,15 +142,18 @@ void intel_ray_query_forward_ray(
139142

140143
void intel_ray_query_commit_potential_hit(intel_ray_query_t rayquery)
141144
{
142-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
145+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
143146

144147
uint bvh_level = __builtin_IB_intel_query_bvh_level(rayquery);
145-
uint rflags = MemRay_getRayFlags(&rtStack->ray[bvh_level]);
148+
MemRay* memRay = get_rt_stack_ray(rtStack, bvh_level);
149+
uint rflags = MemRay_getRayFlags(memRay);
146150

151+
MemHit* commitedHit = get_rt_stack_hit(rtStack, intel_hit_type_committed_hit);
152+
MemHit* potentialHit = get_rt_stack_hit(rtStack, intel_hit_type_potential_hit);
147153
if (rflags & intel_ray_flags_accept_first_hit_and_end_search)
148154
{
149-
rtStack->hit[COMMITTED] = rtStack->hit[POTENTIAL];
150-
MemHit_setValid(&rtStack->hit[COMMITTED], 1);
155+
*commitedHit = *potentialHit;
156+
MemHit_setValid(commitedHit, 1);
151157

152158
__builtin_IB_intel_update_ray_query(
153159
rayquery,
@@ -160,7 +166,7 @@ void intel_ray_query_commit_potential_hit(intel_ray_query_t rayquery)
160166
}
161167
else
162168
{
163-
MemHit_setValid(&rtStack->hit[POTENTIAL], 1); // FIXME: is this required?
169+
MemHit_setValid(potentialHit, 1); // FIXME: is this required?
164170

165171
__builtin_IB_intel_update_ray_query(
166172
rayquery,
@@ -176,19 +182,20 @@ void intel_ray_query_commit_potential_hit(intel_ray_query_t rayquery)
176182
void intel_ray_query_commit_potential_hit_override(
177183
intel_ray_query_t rayquery, float override_hit_distance, intel_float2 override_uv)
178184
{
179-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
180-
rtStack->hit[POTENTIAL].t = override_hit_distance;
181-
MemHit_setUV(&rtStack->hit[POTENTIAL], override_uv.x, override_uv.y);
185+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
186+
MemHit* potentialHit = get_rt_stack_hit(rtStack, intel_hit_type_potential_hit);
187+
potentialHit->t = override_hit_distance;
188+
MemHit_setUV(potentialHit, override_uv.x, override_uv.y);
182189
intel_ray_query_commit_potential_hit(rayquery);
183190
}
184191

185192
void intel_ray_query_start_traversal(intel_ray_query_t rayquery)
186193
{
187194
rtglobals_t dispatchGlobalsPtr = __builtin_IB_intel_query_rt_globals(rayquery);
188-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
189-
190-
MemHit_setDone(&rtStack->hit[POTENTIAL], 1);
191-
MemHit_setValid(&rtStack->hit[POTENTIAL], 1);
195+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
196+
MemHit* potentialHit = get_rt_stack_hit(rtStack, intel_hit_type_potential_hit);
197+
MemHit_setDone(potentialHit, 1);
198+
MemHit_setValid(potentialHit, 1);
192199

193200
TraceRayCtrl ctrl = __builtin_IB_intel_query_ctrl(rayquery);
194201

@@ -214,9 +221,9 @@ void intel_ray_query_sync(intel_ray_query_t rayquery)
214221
rtfence_t fence = __builtin_IB_intel_query_rt_fence(rayquery);
215222
__builtin_IB_intel_rt_sync(fence);
216223

217-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
218-
219-
uint bvh_level = MemHit_getBvhLevel(&rtStack->hit[POTENTIAL]);
224+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
225+
MemHit* potentialHit = get_rt_stack_hit(rtStack, intel_hit_type_potential_hit);
226+
uint bvh_level = MemHit_getBvhLevel(potentialHit);
220227

221228
__builtin_IB_intel_update_ray_query(
222229
rayquery,
@@ -397,39 +404,42 @@ void intel_get_hit_triangle_vertices(
397404
// during any-hit or intersection shader execution.
398405
intel_float3 intel_get_ray_origin(intel_ray_query_t rayquery, uint bvh_level)
399406
{
400-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
407+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
401408

402-
global MemRay* ray = &rtStack->ray[bvh_level];
403-
return (intel_float3){ray->org[0], ray->org[1], ray->org[2]};
409+
MemRay* memRay = get_rt_stack_ray(rtStack, bvh_level);
410+
return (intel_float3){memRay->org[0], memRay->org[1], memRay->org[2]};
404411
}
405412

406413
intel_float3 intel_get_ray_direction(intel_ray_query_t rayquery, uint bvh_level)
407414
{
408415
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
409416

410-
global MemRay* ray = &rtStack->ray[bvh_level];
411-
return (intel_float3){ray->dir[0], ray->dir[1], ray->dir[2]};
417+
MemRay* memRay = get_rt_stack_ray(rtStack, bvh_level);
418+
return (intel_float3){memRay->dir[0], memRay->dir[1], memRay->dir[2]};
412419
}
413420

414421
float intel_get_ray_tmin(intel_ray_query_t rayquery, uint bvh_level)
415422
{
416-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
423+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
417424

418-
return rtStack->ray[bvh_level].tnear;
425+
MemRay* memRay = get_rt_stack_ray(rtStack, bvh_level);
426+
return memRay->tnear;
419427
}
420428

421429
intel_ray_flags_t intel_get_ray_flags(intel_ray_query_t rayquery, uint bvh_level)
422430
{
423-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
431+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
424432

425-
return (intel_ray_flags_t)MemRay_getRayFlags(&rtStack->ray[bvh_level]);
433+
MemRay* memRay = get_rt_stack_ray(rtStack, bvh_level);
434+
return (intel_ray_flags_t)MemRay_getRayFlags(memRay);
426435
}
427436

428437
int intel_get_ray_mask(intel_ray_query_t rayquery, uint bvh_level)
429438
{
430-
global RTStack* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
439+
global void* rtStack = __builtin_IB_intel_query_rt_stack(rayquery);
431440

432-
return MemRay_getRayMask(&rtStack->ray[bvh_level]);
441+
MemRay* memRay = get_rt_stack_ray(rtStack, bvh_level);
442+
return MemRay_getRayMask(memRay);
433443
}
434444

435445
// Test whether traversal has terminated. If false, the ray has reached

IGC/BiFModule/Languages/OpenCL/Raytracing/IBiF_intel_rt_struct_defs.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,17 @@ inline MemHit* get_query_hit(intel_ray_query_t rayquery, intel_hit_type_t ty)
259259
return &rtStack->hit[ty];
260260
}
261261

262+
inline MemHit* get_rt_stack_hit(void* rtstack, intel_hit_type_t ty)
263+
{
264+
RTStack* rtStack = rtstack;
265+
return &rtStack->hit[ty];
266+
}
267+
268+
inline MemRay* get_rt_stack_ray(void* rtstack, uchar raynum)
269+
{
270+
RTStack* rtStack = rtstack;
271+
return &rtStack->ray[raynum];
272+
}
262273

263274
// === --------------------------------------------------------------------===
264275
// === PrimLeafDesc

IGC/Compiler/Optimizer/OpenCLPasses/RayTracing/ResolveOCLRaytracingBuiltins.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ void* __builtin_IB_intel_get_rt_stack( rtglobals_t rt_dispatch_globals );
184184
Returns a pointer to the data structure which the RT hardware operates on.
185185
The RT Stack address is computed as:
186186
syncStackSize = sizeof(HitInfo)*2 + (sizeof(Ray) + sizeof(TravStack))*RTDispatchGlobals.maxBVHLevels;
187-
syncBase = RTDispatchGlobals.rtMemBasePtr - (DSSID * NUM_SIMD_LANES_PER_DSS + StackID + 1)*syncStackSize;
188-
Where DSSID is an index which uniquely identifies the DSS in the machine (across tiles), and StackID is compute as below:
187+
syncBase = RTDispatchGlobals.rtMemBasePtr - (DSSID * NUM_SIMD_LANES_PER_DSS + StackID + 1)*syncStackSize; */
188+
/* Where DSSID is an index which uniquely identifies the DSS in the machine (across tiles), and StackID is compute as below:
189189
With fused EUs (e.g. in DG2) :
190190
StackID[10:0] (msb to lsb) = (EUID[3:0]<<7) | (THREAD_ID[2:0]<<4) | SIMD_LANE_ID[3:0]
191191
@@ -426,7 +426,7 @@ void ResolveOCLRaytracingBuiltins::handleUpdateRayQuery(llvm::CallInst& callInst
426426
Handler for the following builtins:
427427
rtfence_t __builtin_IB_intel_query_rt_fence(intel_ray_query_t);
428428
rtglobals_t __builtin_IB_intel_query_rt_globals(intel_ray_query_t);
429-
global RTStack* __builtin_IB_intel_query_rt_stack(intel_ray_query_t);
429+
global void* __builtin_IB_intel_query_rt_stack(intel_ray_query_t);
430430
TraceRayCtrl __builtin_IB_intel_query_ctrl(intel_ray_query_t);
431431
uint __builtin_IB_intel_query_bvh_level(intel_ray_query_t);
432432

0 commit comments

Comments
 (0)