Skip to content

Commit faea791

Browse files
Revert "Fail build program in shared system USM + statefull access case OCL"
This reverts commit 9dabc2d. Signed-off-by: Compute-Runtime-Validation <[email protected]>
1 parent a0243eb commit faea791

File tree

12 files changed

+8
-155
lines changed

12 files changed

+8
-155
lines changed

opencl/source/program/build.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ cl_int Program::build(
3434
const char *buildOptions,
3535
bool enableCaching) {
3636
cl_int retVal = CL_SUCCESS;
37-
37+
std::string internalOptions;
38+
initInternalOptions(internalOptions);
3839
auto defaultClDevice = deviceVector[0];
3940
UNRECOVERABLE_IF(defaultClDevice == nullptr);
4041
auto &defaultDevice = defaultClDevice->getDevice();
@@ -68,9 +69,6 @@ cl_int Program::build(
6869
} else if (this->createdFrom != CreatedFrom::BINARY) {
6970
options = "";
7071
}
71-
std::string internalOptions;
72-
initInternalOptions(internalOptions);
73-
7472
extractInternalOptions(options, internalOptions);
7573
applyAdditionalOptions(internalOptions);
7674

@@ -168,10 +166,6 @@ cl_int Program::build(
168166
phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing;
169167
}
170168

171-
if (containsStatefulAccess(defaultDevice.getRootDeviceIndex()) && forceToStatelessNeeded() && !isBuiltIn) {
172-
retVal = CL_BUILD_PROGRAM_FAILURE;
173-
}
174-
175169
if (retVal != CL_SUCCESS) {
176170
break;
177171
}

opencl/source/program/program.cpp

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevic
6666
kernelDebugEnabled = clDevices[0]->isDebuggerActive();
6767
}
6868
void Program::initInternalOptions(std::string &internalOptions) const {
69-
7069
auto pClDevice = clDevices[0];
7170
auto force32BitAddressess = pClDevice->getSharedDeviceInfo().force32BitAddressess;
7271
internalOptions = getOclVersionCompilerInternalOption(pClDevice->getEnabledClVersion());
@@ -75,7 +74,7 @@ void Program::initInternalOptions(std::string &internalOptions) const {
7574
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit);
7675
}
7776

78-
if ((isBuiltIn && is32bit) || forceToStatelessNeeded() ||
77+
if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() ||
7978
DebugManager.flags.DisableStatelessToStatefulOptimization.get()) {
8079
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired);
8180
}
@@ -137,20 +136,6 @@ Program::~Program() {
137136
}
138137
}
139138

140-
bool Program::forceToStatelessNeeded() const {
141-
auto preferStateful = false;
142-
if (auto it = options.find(NEO::CompilerOptions::smallerThan4gbBuffersOnly.data()); it != std::string::npos) {
143-
preferStateful = true;
144-
}
145-
146-
if (DebugManager.flags.UseSmallerThan4gbBuffersOnly.get() != -1) {
147-
preferStateful = static_cast<bool>(DebugManager.flags.UseSmallerThan4gbBuffersOnly.get());
148-
}
149-
150-
auto forceStateless = !preferStateful && clDevices[0]->areSharedSystemAllocationsAllowed();
151-
return forceStateless;
152-
}
153-
154139
cl_int Program::createProgramFromBinary(
155140
const void *pBinary,
156141
size_t binarySize, ClDevice &clDevice) {
@@ -504,22 +489,6 @@ cl_int Program::packDeviceBinary(ClDevice &clDevice) {
504489
return CL_SUCCESS;
505490
}
506491

507-
bool Program::containsStatefulAccess(uint32_t rootDeviceIndex) const {
508-
auto &buildInfo = buildInfos[rootDeviceIndex];
509-
for (const auto &kernelInfo : buildInfo.kernelInfoArray) {
510-
for (const auto &arg : kernelInfo->kernelDescriptor.payloadMappings.explicitArgs) {
511-
auto isStatefulAccess = arg.is<ArgDescriptor::ArgTPointer>() &&
512-
(isValidOffset(arg.as<ArgDescPointer>().bindless) ||
513-
isValidOffset(arg.as<ArgDescPointer>().bindful));
514-
if (isStatefulAccess) {
515-
return true;
516-
}
517-
}
518-
}
519-
520-
return false;
521-
}
522-
523492
void Program::setBuildStatus(cl_build_status status) {
524493
for (auto &deviceBuildInfo : deviceBuildInfos) {
525494
deviceBuildInfo.second.buildStatus = status;

opencl/source/program/program.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,6 @@ class Program : public BaseObject<_cl_program> {
283283
}
284284

285285
protected:
286-
bool forceToStatelessNeeded() const;
287-
288-
MOCKABLE_VIRTUAL bool containsStatefulAccess(uint32_t rootDeviceIndex) const;
289-
290286
MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice);
291287

292288
cl_int packDeviceBinary(ClDevice &clDevice);
@@ -370,7 +366,6 @@ class Program : public BaseObject<_cl_program> {
370366

371367
bool isBuiltIn = false;
372368
bool kernelDebugEnabled = false;
373-
bool containsStatefulAccesses = false;
374369
uint32_t maxRootDeviceIndex = std::numeric_limits<uint32_t>::max();
375370
std::mutex lockMutex;
376371
uint32_t exposedKernels = 0;

opencl/test/unit_test/mocks/mock_program.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ class MockProgram : public Program {
4040
using Program::debugDataSize;
4141
using Program::deviceBuildInfos;
4242
using Program::extractInternalOptions;
43-
using Program::forceToStatelessNeeded;
4443
using Program::getKernelInfo;
4544
using Program::internalOptionsToExtract;
4645
using Program::irBinary;

opencl/test/unit_test/program/program_tests.cpp

Lines changed: 0 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,105 +1653,6 @@ TEST_F(ProgramTests, WhenProgramIsCreatedThenCorrectOclVersionIsInOptions) {
16531653
}
16541654
}
16551655

1656-
TEST_F(ProgramTests, whenForceToStatelessNeededIsCalledThenCorrectResultIsReturned) {
1657-
DebugManagerStateRestore restorer;
1658-
1659-
class MyMockProgram : public Program {
1660-
public:
1661-
using Program::forceToStatelessNeeded;
1662-
using Program::options;
1663-
using Program::Program;
1664-
};
1665-
1666-
MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));
1667-
1668-
{
1669-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
1670-
program.options = "";
1671-
EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed());
1672-
}
1673-
{
1674-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
1675-
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
1676-
EXPECT_FALSE(program.forceToStatelessNeeded());
1677-
}
1678-
{
1679-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
1680-
program.options = "";
1681-
EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed());
1682-
}
1683-
{
1684-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
1685-
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
1686-
EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed());
1687-
}
1688-
{
1689-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
1690-
program.options = "";
1691-
EXPECT_FALSE(program.forceToStatelessNeeded());
1692-
}
1693-
{
1694-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
1695-
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
1696-
EXPECT_FALSE(program.forceToStatelessNeeded());
1697-
}
1698-
}
1699-
1700-
TEST_F(ProgramTests, givenStatefulAndStatelessAccessesWhenProgramBuildIsCalledThenCorrectResultIsReturned) {
1701-
DebugManagerStateRestore restorer;
1702-
1703-
class MyMockProgram : public Program {
1704-
public:
1705-
using Program::containsStatefulAccess;
1706-
using Program::createdFrom;
1707-
using Program::irBinary;
1708-
using Program::irBinarySize;
1709-
using Program::isBuiltIn;
1710-
using Program::options;
1711-
using Program::Program;
1712-
using Program::sourceCode;
1713-
1714-
bool containsStatefulAccess(uint32_t rootDeviceIndex) const override {
1715-
return hasStatefulAccess;
1716-
}
1717-
1718-
bool hasStatefulAccess = false;
1719-
};
1720-
1721-
MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));
1722-
program.isBuiltIn = false;
1723-
program.sourceCode = "test_kernel";
1724-
program.createdFrom = Program::CreatedFrom::SOURCE;
1725-
1726-
{
1727-
EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false));
1728-
}
1729-
{
1730-
program.hasStatefulAccess = true;
1731-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
1732-
EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false));
1733-
}
1734-
{
1735-
program.hasStatefulAccess = true;
1736-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
1737-
if (pClDevice->areSharedSystemAllocationsAllowed()) {
1738-
EXPECT_EQ(CL_BUILD_PROGRAM_FAILURE, program.build(toClDeviceVector(*pClDevice), nullptr, false));
1739-
} else {
1740-
EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false));
1741-
}
1742-
}
1743-
{
1744-
MyMockProgram programWithBuiltIn(pContext, true, toClDeviceVector(*pClDevice));
1745-
programWithBuiltIn.irBinary.reset(new char[16]);
1746-
programWithBuiltIn.irBinarySize = 16;
1747-
1748-
programWithBuiltIn.isBuiltIn = true;
1749-
programWithBuiltIn.hasStatefulAccess = true;
1750-
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
1751-
EXPECT_EQ(CL_SUCCESS, programWithBuiltIn.build(toClDeviceVector(*pClDevice), nullptr, false));
1752-
}
1753-
}
1754-
17551656
TEST_F(ProgramTests, GivenForcedClVersionWhenProgramIsCreatedThenCorrectOclOptionIsPresent) {
17561657
std::pair<unsigned int, std::string> testedValues[] = {
17571658
{0, "-ocl-version=120"},

opencl/test/unit_test/test_files/igdrcl.config

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ ForceCsrFlushing = 0
110110
ForceCsrReprogramming = 0
111111
OmitTimestampPacketDependencies = 0
112112
DisableStatelessToStatefulOptimization = 0
113-
UseSmallerThan4gbBuffersOnly = -1
114113
DisableConcurrentBlockExecution = 0
115114
UseNoRingFlushesKmdMode = 1
116115
DisableZeroCopyForUseHostPtr = 0

opencl/test/unit_test/test_files/kernel_num_args.cl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,4 @@
66
*/
77

88
__kernel void test(__global float *argGlobal, __read_only image3d_t argImg3D, __constant float *argConst) {
9-
argGlobal[0] = argConst[0];
109
}

opencl/test/unit_test/test_files/test_constant_memory.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ __constant uint constant_a[2] = {0xabcd5432u, 0xaabb5533u};
1010
__kernel void test(__global uint *in, __global uint *out) {
1111
int i = get_global_id(0);
1212
int j = get_global_id(0) % (sizeof(constant_a) / sizeof(constant_a[0]));
13-
in[0] = 0;
13+
1414
out[i] = constant_a[j];
1515
}

shared/source/compiler_interface/compiler_options/compiler_options_base.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
namespace NEO {
1414
namespace CompilerOptions {
1515
static constexpr ConstStringRef greaterThan4gbBuffersRequired = "-cl-intel-greater-than-4GB-buffer-required";
16-
static constexpr ConstStringRef smallerThan4gbBuffersOnly = "-cl-opt-smaller-than-4GB-buffers-only";
1716
static constexpr ConstStringRef hasBufferOffsetArg = "-cl-intel-has-buffer-offset-arg";
1817
static constexpr ConstStringRef kernelDebugEnable = "-cl-kernel-debug-enable";
1918
static constexpr ConstStringRef arch32bit = "-m32";

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,6 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
207207
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
208208
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
209209
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
210-
DECLARE_DEBUG_VARIABLE(int32_t, UseSmallerThan4gbBuffersOnly, -1, " -1: default, 0: disabled, 1: enabled. When enabled driver will not force stateless accesses when shared system USM is active")
211210
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
212211
DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
213212
DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then maximum number of used HW threads is reduced by MinHwThreadsUnoccupied")

shared/source/helpers/compiler_hw_info_config_base.inl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#include "shared/source/helpers/compiler_hw_info_config.h"
1111

1212
namespace NEO {
13+
template <PRODUCT_FAMILY gfxProduct>
14+
bool CompilerHwInfoConfigHw<gfxProduct>::isForceToStatelessRequired() const {
15+
return false;
16+
}
1317

1418
template <PRODUCT_FAMILY gfxProduct>
1519
bool CompilerHwInfoConfigHw<gfxProduct>::isForceEmuInt32DivRemSPRequired() const {

shared/source/helpers/compiler_hw_info_config_bdw_and_later.inl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,4 @@ bool CompilerHwInfoConfigHw<gfxProduct>::isMidThreadPreemptionSupported(const Ha
1616
return hwInfo.featureTable.ftrGpGpuMidThreadLevelPreempt;
1717
}
1818

19-
template <PRODUCT_FAMILY gfxProduct>
20-
bool CompilerHwInfoConfigHw<gfxProduct>::isForceToStatelessRequired() const {
21-
return false;
22-
}
23-
2419
} // namespace NEO

0 commit comments

Comments
 (0)