Skip to content

Commit fcfd6f6

Browse files
jgu222sys_zuul
authored and
sys_zuul
committed
If a ptr arg is misaligned (ptr to 8bit/16bit, etc), we must
use buffer offset to convert them to stateful; otherwise, they must remain stateless. This is due to the fact that surface state's base must be DW-aligned. This bug was not exposed in ocl tests (probably due to less coverage). THis code is off by default now. Change-Id: I4721642454ad74559a47ff8afbf115100efc2e34
1 parent 350842a commit fcfd6f6

File tree

2 files changed

+45
-9
lines changed

2 files changed

+45
-9
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStatefull/StatelessToStatefull.cpp

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,20 @@ bool StatelessToStatefull::getOffsetFromGEP(
338338
bool StatelessToStatefull::pointerIsPositiveOffsetFromKernelArgument(
339339
Function* F, Value* V, Value*& offset, unsigned int& argNumber)
340340
{
341+
auto getPointeeAlign = [](const DataLayout* DL, Value* ptrVal)-> unsigned {
342+
if (PointerType* PTy = dyn_cast<PointerType>(ptrVal->getType()))
343+
{
344+
Type* pointeeTy = PTy->getElementType();
345+
if (!pointeeTy->isSized()) {
346+
return 0;
347+
}
348+
return DL->getABITypeAlignment(pointeeTy);
349+
}
350+
return 0;
351+
};
352+
353+
const DataLayout* DL = &F->getParent()->getDataLayout();
354+
341355
AssumptionCache* AC = getAC(F);
342356

343357
PointerType* ptrType = dyn_cast<PointerType>(V->getType());
@@ -370,18 +384,38 @@ bool StatelessToStatefull::pointerIsPositiveOffsetFromKernelArgument(
370384
argNumber = arg->getAssociatedArgNo();
371385
bool gepProducesPositivePointer = true;
372386

387+
// An address needs to be DW-aligned in order to be a base
388+
// in a surface state. In another word, a unaligned argument
389+
// cannot be used as a surface base unless buffer_offset is
390+
// used, in which "argument + buffer_offset" is instead used
391+
// as a surface base. (argument + buffer_offset is the original
392+
// base of buffer created on host side, the original buffer is
393+
// guarantted to be DW-aligned.)
394+
//
395+
// Note that implicit arg is always aligned.
396+
bool isAlignedPointee =
397+
(IGC_IS_FLAG_DISABLED(UseSubDWAlignedPtrArg) || arg->isImplicitArg())
398+
? true
399+
: (getPointeeAlign(DL, base) >= 4);
400+
373401
// If m_hasBufferOffsetArg is true, the offset argument is added to
374-
// the final offset, and the final offset must be positive. Thus
375-
// skip checking if an offset is positive.
402+
// the final offset to make it definitely positive. Thus skip checking
403+
// if an offset is positive.
404+
//
405+
// Howerver, if m_hasoptionalBufferOffsetArg is true, the buffer offset
406+
// is not generated if all offsets can be proven positive (this has
407+
// performance benefit as adding buffer offset is an additional add).
408+
// Also, if an argument is unaligned, buffer offset must be ON and used;
409+
// otherwise, no stateful conversion for the argument can be carried out.
376410
//
377-
// Note that offset should be positive for any implicit ptr argument
411+
// Note that offset should be positive for any implicit ptr argument,
412+
// so no need to prove it!
378413
if (!arg->isImplicitArg() &&
414+
isAlignedPointee &&
379415
(!m_hasBufferOffsetArg || m_hasOptionalBufferOffsetArg) &&
380416
IGC_IS_FLAG_DISABLED(SToSProducesPositivePointer))
381417
{
382-
// [This is conservative path]
383-
// Need to verify if there is a negative offset,
384-
// If so, no stateful message is generated.
418+
// This is for proving that the offset is positive.
385419
for (int i = 0, sz = GEPs.size(); i < sz; ++i)
386420
{
387421
GetElementPtrInst* tgep = GEPs[i];
@@ -398,7 +432,8 @@ bool StatelessToStatefull::pointerIsPositiveOffsetFromKernelArgument(
398432
updateArgInfo(arg, gepProducesPositivePointer);
399433
}
400434
}
401-
if ((gepProducesPositivePointer || m_hasBufferOffsetArg) &&
435+
if ((m_hasBufferOffsetArg ||
436+
(gepProducesPositivePointer && isAlignedPointee)) &&
402437
getOffsetFromGEP(F, GEPs, argNumber, arg->isImplicitArg(), offset))
403438
{
404439
return true;

IGC/common/igc_flags.def

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,9 @@ DECLARE_IGC_REGKEY(debugString, SIPOverrideFilePath, 0, "This key when en
196196
DECLARE_IGC_REGKEY(bool, DumpPayloadToScratch, false, "Setting this to 1/true dumps thread payload to scartch space. Used for workloads which doesnt use scartch space for other purposes", false)
197197
DECLARE_IGC_REGKEY(DWORD, DebugInternalSwitch, 0, "Code pass selection, debug only", false)
198198
DECLARE_IGC_REGKEY(bool, SToSProducesPositivePointer, false, "This key is for StatelessToStatefull optimization if the user knows the pointer offset is postive to the kernel argument.", false)
199-
DECLARE_IGC_REGKEY(bool, EnableSupportBufferOffset, false, "[Temporary]For StatelessToStatefull optimization [OCL], support implicit buffer offset argument (same as -cl-intel-has-buffer-offset-arg).", false)
200-
DECLARE_IGC_REGKEY(bool, EnableOptionalBufferOffset, true, "[Temporary]For StatelessToStatefull optimization [OCL], if true, make buffer offset optional. Valid only if buffer offset is supported.", true)
199+
DECLARE_IGC_REGKEY(bool, EnableSupportBufferOffset, false, "[debugging]For StatelessToStatefull optimization [OCL], support implicit buffer offset argument (same as -cl-intel-has-buffer-offset-arg).", false)
200+
DECLARE_IGC_REGKEY(bool, EnableOptionalBufferOffset, true, "For StatelessToStatefull optimization [OCL], if true, make buffer offset optional. Valid only if buffer offset is supported.", true)
201+
DECLARE_IGC_REGKEY(bool, UseSubDWAlignedPtrArg, false, "[OCL]If set, for kernel pointer arg such as ptr to char or short, the arg is not necessarily DW aligned", false)
201202
DECLARE_IGC_REGKEY(bool, EnableTestIGCBuiltin, false, "Enable testing igc builtin (precompiled kernels) using OCL.", false)
202203
DECLARE_IGC_REGKEY(bool, EnableCSSIMD32, false, "Enable computer shader SIMD32 mode, and fall back to lower SIMD when spill", false)
203204
DECLARE_IGC_REGKEY(bool, ForceCSSIMD32, false, "Force computer shader SIMD32 mode", false)

0 commit comments

Comments
 (0)