Skip to content

Commit 00132b9

Browse files
MaciejKalinskiigcbot
authored andcommitted
Ray Tracing minor refactoring
Some adjustments in the RT stack handling code.
1 parent d0e33bb commit 00132b9

File tree

2 files changed

+19
-13
lines changed

2 files changed

+19
-13
lines changed

IGC/AdaptorCommon/RayTracing/RTBuilder.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ Value* RTBuilder::getGlobalSyncStackID()
229229
return val;
230230
}
231231

232+
232233
uint32_t RTBuilder::getRTStack2Size() const
233234
{
234235
switch (getMemoryStyle())
@@ -265,11 +266,13 @@ Value* RTBuilder::getSyncRTStackSize()
265266

266267
Value* RTBuilder::getSyncStackOffset(bool rtMemBasePtr)
267268
{
268-
// Per thread Synchronous RTStack is calculated using the following formula:
269+
// Per thread Synchronous RTStack address/ptr is calculated using the following formula
270+
// (note that offset is calculated here so RTDispatchGlobals.rtMemBasePtr is not taken into account):
269271
// syncBase = RTDispatchGlobals.rtMemBasePtr - (GlobalSyncStackID + 1) * syncStackSize;
270-
// If we start from syncStack, then, offset should be:
272+
// If we start from syncStack, the address/ptr should be:
271273
// syncBase = syncStack + (NumSyncStackSlots - (GlobalSyncStackID + 1)) * syncStackSize
272-
// Where:
274+
275+
273276
Value* globalStackID = this->getGlobalSyncStackID();
274277
Value* OffsetID = this->CreateAdd(globalStackID, this->getInt32(1));
275278

IGC/AdaptorCommon/RayTracing/RTStackFormat.h

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -953,17 +953,17 @@ static_assert(IGC::RTStackAlign % LSC_WRITE_GRANULARITY == 0, "not aligned to wr
953953

954954
// This is the structure of memory that will be allocated by the UMD:
955955
template <typename HotZoneTy, typename SyncStackTy, typename AsyncStackTy, typename SWStackTy,
956-
uint64_t DSS_COUNT, uint64_t NumDSSRTStacks, uint64_t SIMD_LANES_PER_DSS>
956+
uint32_t RTStackAlign, uint64_t DSS_COUNT, uint64_t NumDSSRTStacks, uint64_t SIMD_LANES_PER_DSS>
957957
struct RTMemory
958958
{
959959
// Packed SW hot-zones
960960
alignas(IGC::RTStackAlign) HotZoneTy HotZones[DSS_COUNT * NumDSSRTStacks];
961961

962962
// sync stacks for synchronous ray tracing
963-
alignas(IGC::RTStackAlign) SyncStackTy SyncStacks[DSS_COUNT * SIMD_LANES_PER_DSS];
963+
alignas(RTStackAlign) SyncStackTy SyncStacks[DSS_COUNT * SIMD_LANES_PER_DSS];
964964

965965
// RTMemBasePointer points here <----
966-
alignas(IGC::RTStackAlign) AsyncStackTy AsyncStacks[DSS_COUNT * NumDSSRTStacks];
966+
alignas(RTStackAlign) AsyncStackTy AsyncStacks[DSS_COUNT * NumDSSRTStacks];
967967

968968
// Align to L3 sector size, or LSC sector size if stack is LSC-cached
969969
alignas(IGC::RTStackAlign) SWStackTy SWStacks[DSS_COUNT * NumDSSRTStacks];
@@ -980,12 +980,13 @@ constexpr uint64_t calcRTMemoryAllocSize(
980980
uint64_t SIMD_LANES_PER_DSS)
981981
{
982982
// SIMD_LANES_PER_DSS = EUCount * ThreadCount * SIMD16
983-
return IGC::Align(SWHotZoneSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign) +
984-
IGC::Align(SyncStackSize * DSS_COUNT * SIMD_LANES_PER_DSS, IGC::RTStackAlign) +
985-
IGC::Align(AsyncStackSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign) +
986-
IGC::Align(SWStackSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign);
983+
return IGC::Align(SWHotZoneSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign) +
984+
IGC::Align(SyncStackSize * DSS_COUNT * SIMD_LANES_PER_DSS, IGC::RTStackAlign) +
985+
IGC::Align(AsyncStackSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign) +
986+
IGC::Align(SWStackSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign);
987987
}
988988

989+
989990
constexpr uint32_t getSyncStackSize() {
990991
#define STYLE(X) static_assert(sizeofRTStack2<RTStack2<Xe>>() == sizeofRTStack2<RTStack2<X>>());
991992
#include "RayTracingMemoryStyle.h"
@@ -995,7 +996,7 @@ constexpr uint32_t getSyncStackSize() {
995996

996997
// As per this:
997998
// The rtMemBasePtr points to the top of the async stacks. After having computed
998-
// the full allocation with 'calcRTMemoryAllocSize()', This returns the offset
999+
// the full allocation with 'calcRTMemoryAllocSize()', this returns the offset
9991000
// of the async stacks, hot zone, and sw stack from the base of the allocation.
10001001
constexpr uint64_t calcRTMemoryOffsets(
10011002
uint64_t SWHotZoneSize,
@@ -1020,21 +1021,23 @@ constexpr uint64_t calcRTMemoryOffsets(
10201021
return AsyncOffset;
10211022
}
10221023

1024+
10231025
// unit tests:
10241026
static_assert(
10251027
calcRTMemoryAllocSize(
10261028
sizeof(SWHotZone_v1), sizeof(RTStack2<Xe>), sizeof(RTStack2<Xe>), 128,
10271029
32, 2048, 16 * 8 * 16) ==
1028-
sizeof(RTMemory<SWHotZone_v1, RTStack2<Xe>, RTStack2<Xe>, uint8_t[128],
1030+
sizeof(RTMemory<SWHotZone_v1, RTStack2<Xe>, RTStack2<Xe>, uint8_t[128], IGC::RTStackAlign,
10291031
32, 2048, 16 * 8 * 16>), "mismatch?");
10301032

10311033
static_assert(
10321034
calcRTMemoryAllocSize(
10331035
8, sizeof(RTStack2<Xe>), sizeof(RTStack2<Xe>), 136,
10341036
32, 2048, 16 * 8 * 16) ==
1035-
sizeof(RTMemory<uint8_t[8], RTStack2<Xe>, RTStack2<Xe>, uint8_t[136],
1037+
sizeof(RTMemory<uint8_t[8], RTStack2<Xe>, RTStack2<Xe>, uint8_t[136], IGC::RTStackAlign,
10361038
32, 2048, 16 * 8 * 16>), "mismatch?");
10371039

1040+
10381041
/* a list of commands for the ray tracing hardware */
10391042
enum class TraceRayCtrl : uint8_t
10401043
{

0 commit comments

Comments
 (0)