@@ -953,17 +953,17 @@ static_assert(IGC::RTStackAlign % LSC_WRITE_GRANULARITY == 0, "not aligned to wr
953
953
954
954
// This is the structure of memory that will be allocated by the UMD:
955
955
template <typename HotZoneTy, typename SyncStackTy, typename AsyncStackTy, typename SWStackTy,
956
- uint64_t DSS_COUNT, uint64_t NumDSSRTStacks, uint64_t SIMD_LANES_PER_DSS>
956
+ uint32_t RTStackAlign, uint64_t DSS_COUNT, uint64_t NumDSSRTStacks, uint64_t SIMD_LANES_PER_DSS>
957
957
struct RTMemory
958
958
{
959
959
// Packed SW hot-zones
960
960
alignas (IGC::RTStackAlign) HotZoneTy HotZones[DSS_COUNT * NumDSSRTStacks];
961
961
962
962
// sync stacks for synchronous ray tracing
963
- alignas (IGC:: RTStackAlign) SyncStackTy SyncStacks[DSS_COUNT * SIMD_LANES_PER_DSS];
963
+ alignas (RTStackAlign) SyncStackTy SyncStacks[DSS_COUNT * SIMD_LANES_PER_DSS];
964
964
965
965
// RTMemBasePointer points here <----
966
- alignas (IGC:: RTStackAlign) AsyncStackTy AsyncStacks[DSS_COUNT * NumDSSRTStacks];
966
+ alignas (RTStackAlign) AsyncStackTy AsyncStacks[DSS_COUNT * NumDSSRTStacks];
967
967
968
968
// Align to L3 sector size, or LSC sector size if stack is LSC-cached
969
969
alignas (IGC::RTStackAlign) SWStackTy SWStacks[DSS_COUNT * NumDSSRTStacks];
@@ -980,12 +980,13 @@ constexpr uint64_t calcRTMemoryAllocSize(
980
980
uint64_t SIMD_LANES_PER_DSS)
981
981
{
982
982
// SIMD_LANES_PER_DSS = EUCount * ThreadCount * SIMD16
983
- return IGC::Align (SWHotZoneSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign) +
984
- IGC::Align (SyncStackSize * DSS_COUNT * SIMD_LANES_PER_DSS, IGC::RTStackAlign) +
985
- IGC::Align (AsyncStackSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign) +
986
- IGC::Align (SWStackSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign);
983
+ return IGC::Align (SWHotZoneSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign) +
984
+ IGC::Align (SyncStackSize * DSS_COUNT * SIMD_LANES_PER_DSS, IGC::RTStackAlign) +
985
+ IGC::Align (AsyncStackSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign) +
986
+ IGC::Align (SWStackSize * DSS_COUNT * NumDSSRTStacks, IGC::RTStackAlign);
987
987
}
988
988
989
+
989
990
constexpr uint32_t getSyncStackSize () {
990
991
#define STYLE (X ) static_assert (sizeofRTStack2<RTStack2<Xe>>() == sizeofRTStack2<RTStack2<X>>());
991
992
#include " RayTracingMemoryStyle.h"
@@ -995,7 +996,7 @@ constexpr uint32_t getSyncStackSize() {
995
996
996
997
// As per this:
997
998
// The rtMemBasePtr points to the top of the async stacks. After having computed
998
- // the full allocation with 'calcRTMemoryAllocSize()', This returns the offset
999
+ // the full allocation with 'calcRTMemoryAllocSize()', this returns the offset
999
1000
// of the async stacks, hot zone, and sw stack from the base of the allocation.
1000
1001
constexpr uint64_t calcRTMemoryOffsets (
1001
1002
uint64_t SWHotZoneSize,
@@ -1020,21 +1021,23 @@ constexpr uint64_t calcRTMemoryOffsets(
1020
1021
return AsyncOffset;
1021
1022
}
1022
1023
1024
+
1023
1025
// unit tests:
1024
1026
static_assert (
1025
1027
calcRTMemoryAllocSize (
1026
1028
sizeof (SWHotZone_v1), sizeof(RTStack2<Xe>), sizeof(RTStack2<Xe>), 128,
1027
1029
32, 2048, 16 * 8 * 16) ==
1028
- sizeof(RTMemory<SWHotZone_v1, RTStack2<Xe>, RTStack2<Xe>, uint8_t [128 ],
1030
+ sizeof(RTMemory<SWHotZone_v1, RTStack2<Xe>, RTStack2<Xe>, uint8_t [128 ], IGC::RTStackAlign,
1029
1031
32 , 2048 , 16 * 8 * 16 >), "mismatch?");
1030
1032
1031
1033
static_assert (
1032
1034
calcRTMemoryAllocSize (
1033
1035
8 , sizeof (RTStack2<Xe>), sizeof(RTStack2<Xe>), 136,
1034
1036
32, 2048, 16 * 8 * 16) ==
1035
- sizeof(RTMemory<uint8_t [8 ], RTStack2<Xe>, RTStack2<Xe>, uint8_t [136 ],
1037
+ sizeof(RTMemory<uint8_t [8 ], RTStack2<Xe>, RTStack2<Xe>, uint8_t [136 ], IGC::RTStackAlign,
1036
1038
32 , 2048 , 16 * 8 * 16 >), "mismatch?");
1037
1039
1040
+
1038
1041
/* a list of commands for the ray tracing hardware */
1039
1042
enum class TraceRayCtrl : uint8_t
1040
1043
{
0 commit comments