Skip to content

Commit 9959896

Browse files
Add RT calls support to Kernel descriptor
Signed-off-by: Bartosz Dunajski <[email protected]>
1 parent 6043294 commit 9959896

15 files changed

+92
-100
lines changed

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ struct KernelImp : Kernel {
138138

139139
ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override;
140140
bool usesRayTracing() {
141-
return kernelImmData->getDescriptor().hasRTCalls();
141+
return kernelImmData->getDescriptor().kernelAttributes.flags.hasRTCalls;
142142
}
143143

144144
ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) override {

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,8 +1241,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
12411241
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
12421242

12431243
{
1244-
VariableBackup<uint32_t> usesSyncBuffer{&kernelAttributes.flags.packed};
1245-
usesSyncBuffer = false;
1244+
VariableBackup<std::array<bool, 3>> usesSyncBuffer{&kernelAttributes.flags.packed};
1245+
usesSyncBuffer = {};
12461246
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
12471247
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
12481248
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);

level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -676,20 +676,9 @@ TEST_F(KernelImmutableDataTests, givenKernelWithPrivateMemoryBiggerThanGlobalMem
676676
EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
677677
}
678678

679-
class KernelDescriptorRTCallsTrue : public NEO::KernelDescriptor {
680-
bool hasRTCalls() const override {
681-
return true;
682-
}
683-
};
684-
685-
class KernelDescriptorRTCallsFalse : public NEO::KernelDescriptor {
686-
bool hasRTCalls() const override {
687-
return false;
688-
}
689-
};
690-
691679
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized) {
692-
KernelDescriptorRTCallsTrue mockDescriptor = {};
680+
KernelDescriptor mockDescriptor = {};
681+
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
693682
mockDescriptor.kernelMetadata.kernelName = "rt_test";
694683
for (auto i = 0u; i < 3u; i++) {
695684
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
@@ -735,7 +724,8 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
735724
}
736725

737726
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAllocationTokenThenRayTracingStillEnabledWithoutAllocation) {
738-
KernelDescriptorRTCallsTrue mockDescriptor = {};
727+
KernelDescriptor mockDescriptor = {};
728+
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
739729
mockDescriptor.kernelMetadata.kernelName = "rt_test";
740730
for (auto i = 0u; i < 3u; i++) {
741731
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
@@ -773,7 +763,8 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAll
773763
}
774764

775765
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized) {
776-
KernelDescriptorRTCallsTrue mockDescriptor = {};
766+
KernelDescriptor mockDescriptor = {};
767+
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
777768
mockDescriptor.kernelMetadata.kernelName = "rt_test";
778769
for (auto i = 0u; i < 3u; i++) {
779770
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
@@ -811,7 +802,8 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAll
811802
}
812803

813804
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitialized) {
814-
KernelDescriptorRTCallsFalse mockDescriptor = {};
805+
KernelDescriptor mockDescriptor = {};
806+
mockDescriptor.kernelAttributes.flags.hasRTCalls = false;
815807
mockDescriptor.kernelMetadata.kernelName = "rt_test";
816808
for (auto i = 0u; i < 3u; i++) {
817809
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
@@ -845,7 +837,8 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial
845837
}
846838

847839
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatched) {
848-
KernelDescriptorRTCallsTrue mockDescriptor = {};
840+
KernelDescriptor mockDescriptor = {};
841+
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
849842
mockDescriptor.kernelMetadata.kernelName = "rt_test";
850843
for (auto i = 0u; i < 3u; i++) {
851844
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
@@ -1211,7 +1204,8 @@ TEST_F(KernelPropertiesTests, givenValidKernelWithIndirectAccessFlagsAndDisableI
12111204

12121205
HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsTrueThenUsesRayTracingIsTrue, MatchAny) {
12131206
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1214-
KernelDescriptorRTCallsTrue mockDescriptor = {};
1207+
KernelDescriptor mockDescriptor = {};
1208+
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
12151209
WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
12161210

12171211
mockKernelImmutableData.kernelDescriptor = &mockDescriptor;
@@ -1222,7 +1216,8 @@ HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsTrueThenUsesRayTracingIsTrue, M
12221216

12231217
HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsFalseThenUsesRayTracingIsFalse, MatchAny) {
12241218
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1225-
KernelDescriptorRTCallsFalse mockDescriptor = {};
1219+
KernelDescriptor mockDescriptor = {};
1220+
mockDescriptor.kernelAttributes.flags.hasRTCalls = false;
12261221
WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
12271222

12281223
mockKernelImmutableData.kernelDescriptor = &mockDescriptor;

shared/offline_compiler/source/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ set(CLOC_LIB_SRCS_LIB
4141
${NEO_SHARED_DIRECTORY}/helpers/hw_info.cpp
4242
${NEO_SHARED_DIRECTORY}/helpers/hw_info.h
4343
${NEO_SHARED_DIRECTORY}/helpers${BRANCH_DIR_SUFFIX}hw_info_extended.cpp
44-
${NEO_SHARED_DIRECTORY}/kernel${BRANCH_DIR_SUFFIX}kernel_descriptor.cpp
45-
${NEO_SHARED_DIRECTORY}/kernel${BRANCH_DIR_SUFFIX}kernel_descriptor.h
4644
${NEO_SHARED_DIRECTORY}/os_interface/os_library.h
4745
${NEO_SHARED_DIRECTORY}/compiler_interface/oclc_extensions.cpp
4846
${NEO_SHARED_DIRECTORY}/compiler_interface/oclc_extensions.h

shared/source/kernel/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,11 @@ set(NEO_CORE_KERNEL
1515
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_device_side_enqueue.h
1616
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_vme.h
1717
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata.h
18-
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor.cpp
1918
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h
2019
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp
2120
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h
2221
${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h
2322
${CMAKE_CURRENT_SOURCE_DIR}/kernel_properties.h
24-
${CMAKE_CURRENT_SOURCE_DIR}/read_extended_info.h
25-
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}read_extended_info.cpp
26-
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor_from_patchtokens_extended.cpp
2723
)
2824

2925
set_property(GLOBAL PROPERTY NEO_CORE_KERNEL ${NEO_CORE_KERNEL})

shared/source/kernel/kernel_descriptor.cpp

Lines changed: 0 additions & 14 deletions
This file was deleted.

shared/source/kernel/kernel_descriptor.h

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "shared/source/utilities/arrayref.h"
1717
#include "shared/source/utilities/stackvec.h"
1818

19+
#include <array>
1920
#include <cinttypes>
2021
#include <cstddef>
2122
#include <limits>
@@ -28,10 +29,6 @@ namespace NEO {
2829
using StringMap = std::unordered_map<uint32_t, std::string>;
2930
using InstructionsSegmentOffset = uint16_t;
3031

31-
struct ExtendedInfoBase {
32-
virtual ~ExtendedInfoBase() = default;
33-
};
34-
3532
struct KernelDescriptor {
3633
enum AddressingMode : uint8_t {
3734
AddrNone,
@@ -44,7 +41,6 @@ struct KernelDescriptor {
4441

4542
KernelDescriptor() = default;
4643
virtual ~KernelDescriptor() = default;
47-
virtual bool hasRTCalls() const;
4844

4945
void updateCrossThreadDataSize() {
5046
uint32_t crossThreadDataSize = 0;
@@ -144,12 +140,11 @@ struct KernelDescriptor {
144140
}
145141

146142
struct KernelAttributes {
147-
KernelAttributes() { flags.packed = 0U; }
148-
149143
uint32_t slmInlineSize = 0U;
150144
uint32_t perThreadScratchSize[2] = {0U, 0U};
151145
uint32_t perHwThreadPrivateMemorySize = 0U;
152146
uint32_t perThreadSystemThreadSurfaceSize = 0U;
147+
uint32_t numThreadsRequired = 0u;
153148
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
154149
uint16_t crossThreadDataSize = 0U;
155150
uint16_t inlineDataPayloadSize = 0U;
@@ -185,14 +180,16 @@ struct KernelDescriptor {
185180

186181
union {
187182
struct {
183+
// 0
188184
bool usesSpecialPipelineSelectMode : 1;
189185
bool usesStringMapForPrintf : 1;
190186
bool usesPrintf : 1;
191187
bool usesFencesForReadWriteImages : 1;
192-
bool usesFlattenedLocalIds;
188+
bool usesFlattenedLocalIds : 1;
193189
bool usesPrivateMemory : 1;
194190
bool usesVme : 1;
195191
bool usesImages : 1;
192+
// 1
196193
bool usesSamplers : 1;
197194
bool usesSyncBuffer : 1;
198195
bool useGlobalAtomics : 1;
@@ -201,14 +198,17 @@ struct KernelDescriptor {
201198
bool perThreadDataHeaderIsPresent : 1;
202199
bool perThreadDataUnusedGrfIsPresent : 1;
203200
bool requiresDisabledEUFusion : 1;
201+
// 2
204202
bool requiresDisabledMidThreadPreemption : 1;
205203
bool requiresSubgroupIndependentForwardProgress : 1;
206204
bool requiresWorkgroupWalkOrder : 1;
207205
bool requiresImplicitArgs : 1;
208206
bool useStackCalls : 1;
207+
bool hasRTCalls : 1;
208+
bool reserved : 2;
209209
};
210-
uint32_t packed;
211-
} flags;
210+
std::array<bool, 3> packed;
211+
} flags = {};
212212
static_assert(sizeof(KernelAttributes::flags) == sizeof(KernelAttributes::flags.packed), "");
213213

214214
bool usesStringMap() const {
@@ -297,7 +297,6 @@ struct KernelDescriptor {
297297
} external;
298298

299299
std::vector<uint8_t> generatedHeaps;
300-
std::unique_ptr<ExtendedInfoBase> extendedInfo;
301300
};
302301

303302
} // namespace NEO

shared/source/kernel/kernel_descriptor_from_patchtokens.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include "shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h"
1212
#include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h"
1313
#include "shared/source/kernel/kernel_descriptor.h"
14-
#include "shared/source/kernel/read_extended_info.h"
1514

1615
#include <sstream>
1716
#include <string>
@@ -54,6 +53,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
5453
dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired;
5554
dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize;
5655
dst.kernelAttributes.barrierCount = execEnv.HasBarriers;
56+
dst.kernelAttributes.numThreadsRequired = execEnv.NumThreadsRequired;
5757

5858
dst.kernelAttributes.flags.requiresDisabledEUFusion = (0 != execEnv.RequireDisableEUFusion);
5959
dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption);
@@ -63,9 +63,9 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
6363
dst.kernelAttributes.flags.usesSpecialPipelineSelectMode = (0 != execEnv.HasDPAS);
6464
dst.kernelAttributes.flags.usesStatelessWrites = (0 != execEnv.StatelessWritesCount);
6565
dst.kernelAttributes.flags.useStackCalls = (0 != execEnv.HasStackCalls);
66+
dst.kernelAttributes.flags.hasRTCalls = (0 != execEnv.HasRTCalls);
6667

6768
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
68-
readExtendedInfo(dst.extendedInfo, execEnv);
6969
}
7070

7171
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) {
@@ -477,7 +477,24 @@ void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::Ker
477477
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface);
478478
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface);
479479
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateSyncBuffer);
480-
populateKernelDescriptorRtDispatchGlobals(dst, src);
480+
481+
{
482+
uint32_t heapOffset = 0;
483+
uint32_t paramOffset = 0;
484+
uint32_t paramSize = 0;
485+
486+
if (src.tokens.allocateRTGlobalBuffer != nullptr) {
487+
auto allocateRTGlobalBuffer = static_cast<const struct iOpenCL::SPatchAllocateRTGlobalBuffer *>(src.tokens.allocateRTGlobalBuffer);
488+
heapOffset = allocateRTGlobalBuffer->SurfaceStateHeapOffset;
489+
paramOffset = allocateRTGlobalBuffer->DataParamOffset;
490+
paramSize = allocateRTGlobalBuffer->DataParamSize;
491+
}
492+
493+
populatePointerKernelArg(dst.payloadMappings.implicitArgs.rtDispatchGlobals,
494+
paramOffset, paramSize, heapOffset, heapOffset,
495+
dst.kernelAttributes.bufferAddressingMode);
496+
}
497+
481498
dst.payloadMappings.explicitArgs.resize(src.tokens.kernelArgs.size());
482499
dst.explicitArgsExtendedMetadata.resize(src.tokens.kernelArgs.size());
483500

shared/source/kernel/kernel_descriptor_from_patchtokens.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2021 Intel Corporation
2+
* Copyright (C) 2020-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -16,7 +16,6 @@ namespace PatchTokenBinary {
1616
struct KernelFromPatchtokens;
1717
}
1818

19-
void populateKernelDescriptorRtDispatchGlobals(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src);
2019
void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes);
2120

2221
} // namespace NEO

shared/source/kernel/kernel_descriptor_from_patchtokens_extended.cpp

Lines changed: 0 additions & 14 deletions
This file was deleted.

shared/source/kernel/read_extended_info.cpp

Lines changed: 0 additions & 12 deletions
This file was deleted.

shared/source/kernel/read_extended_info.h

Lines changed: 0 additions & 15 deletions
This file was deleted.

shared/test/unit_test/kernel/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ target_sources(${TARGET_NAME} PRIVATE
1111
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata_tests.cpp
1212
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens_tests.cpp
1313
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_tests.cpp
14+
${CMAKE_CURRENT_SOURCE_DIR}/kernel_raytracing_tests.cpp
1415
)
1516

1617
add_subdirectories()

shared/test/unit_test/kernel/kernel_descriptor_tests.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
TEST(KernelDescriptor, WhenDefaultInitializedThenValuesAreCleared) {
1313
NEO::KernelDescriptor desc;
14-
EXPECT_EQ(0U, desc.kernelAttributes.flags.packed);
14+
for (auto &element : desc.kernelAttributes.flags.packed) {
15+
EXPECT_EQ(0U, element);
16+
}
1517
EXPECT_EQ(0U, desc.kernelAttributes.slmInlineSize);
1618
EXPECT_EQ(0U, desc.kernelAttributes.perThreadScratchSize[0]);
1719
EXPECT_EQ(0U, desc.kernelAttributes.perThreadScratchSize[1]);

0 commit comments

Comments
 (0)