Skip to content

Commit 2b085bb

Browse files
aratajewpszymich
authored andcommitted
Always use bufferOffset for unaligned pointers
When a pointer to SVM is not aligned to 4 bytes, Neo aligns it and stores a difference between aligned and unaligned pointer into `bufferOffset` argument, so that IGC can correctly access the memory. Neo must align the pointer since hardware expects surface state base address to be aligned to 4 bytes. This change prevents optimizing `bufferOffset` argument usage out when IGC cannot assure that a pointer is 4 bytes aligned. (cherry picked from commit 37ea744)
1 parent 6d5e619 commit 2b085bb

File tree

10 files changed

+70
-31
lines changed

10 files changed

+70
-31
lines changed

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,6 @@ namespace IGC
290290
IntelHasPositivePointerOffset = true;
291291
}
292292

293-
if (internalOptions.hasArg(OPT_has_subdw_aligned_ptr_arg_common))
294-
{
295-
IntelHasSubDWAlignedPtrArg = true;
296-
}
297-
298293
if (internalOptions.hasArg(OPT_disable_a64wa_common))
299294
{
300295
IntelDisableA64WA = true;

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,6 @@ namespace IGC
4848
bool IntelHasPositivePointerOffset = false;
4949
bool IntelHasBufferOffsetArg = false;
5050
bool IntelBufferOffsetArgOptional = true;
51-
bool IntelHasSubDWAlignedPtrArg = false;
52-
// default: false, meaning kernel's sub-DW ptrArgs (char*, short*) are DW-aligned.
53-
// This default is stronger than the natural alignment implied by char*/short*. But
54-
// for historical reason, we have this.
5551

5652
bool replaceGlobalOffsetsByZero = false;
5753
bool IntelEnablePreRAScheduling = true;

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.cpp

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ SPDX-License-Identifier: MIT
1919
#include <llvm/IR/Instructions.h>
2020
#include <llvm/IR/GetElementPtrTypeIterator.h>
2121
#include <llvm/Analysis/ValueTracking.h>
22+
#include <llvm/Transforms/Utils/Local.h>
2223
#include "common/LLVMWarningsPop.hpp"
2324
#include <string>
2425
#include "Probe/Assertion.h"
@@ -132,7 +133,6 @@ StatelessToStateful::StatelessToStateful()
132133
: FunctionPass(ID),
133134
m_hasBufferOffsetArg(false),
134135
m_hasOptionalBufferOffsetArg(false),
135-
m_hasSubDWAlignedPtrArg(false),
136136
m_hasPositivePointerOffset(false),
137137
m_ACT(nullptr),
138138
m_pImplicitArgs(nullptr),
@@ -175,13 +175,11 @@ bool StatelessToStateful::runOnFunction(llvm::Function& F)
175175
m_hasOptionalBufferOffsetArg = (m_hasBufferOffsetArg &&
176176
(IGC_IS_FLAG_ENABLED(EnableOptionalBufferOffset) || modMD->compOpt.BufferOffsetArgOptional));
177177

178-
m_hasSubDWAlignedPtrArg = (IGC_IS_FLAG_ENABLED(UseSubDWAlignedPtrArg) || modMD->compOpt.HasSubDWAlignedPtrArg);
179-
180178
m_hasPositivePointerOffset = (IGC_IS_FLAG_ENABLED(SToSProducesPositivePointer) || modMD->compOpt.HasPositivePointerOffset);
181179

182180
m_pImplicitArgs = new ImplicitArgs(F, pMdUtils);
183-
CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
184-
m_pKernelArgs = new KernelArgs(F, &(F.getParent()->getDataLayout()), pMdUtils, modMD, ctx->platform.getGRFSize());
181+
m_ctx = static_cast<OpenCLProgramContext*>(getAnalysis<CodeGenContextWrapper>().getCodeGenContext());
182+
m_pKernelArgs = new KernelArgs(F, &(F.getParent()->getDataLayout()), pMdUtils, modMD, m_ctx->platform.getGRFSize());
185183

186184
findPromotableInstructions();
187185
promote();
@@ -428,10 +426,28 @@ bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(
428426
// guarantted to be DW-aligned.)
429427
//
430428
// Note that implicit arg is always aligned.
431-
bool isAlignedPointee =
432-
(!m_hasSubDWAlignedPtrArg || arg->isImplicitArg())
433-
? true
434-
: (getPointeeAlign(DL, base) >= 4);
429+
bool isAlignedPointee = false;
430+
if (arg->isImplicitArg())
431+
{
432+
isAlignedPointee = true;
433+
}
434+
else
435+
{
436+
isAlignedPointee = getPointeeAlign(DL, base) >= 4 ||
437+
// The intent of getKnownAlignment below is to check if any llvm.assume intrinsic provides
438+
// a hint about the base pointer alignment
439+
getKnownAlignment((Value*)arg->getArg(), *DL, F->getEntryBlock().getFirstNonPHI(), AC) >= 4;
440+
}
441+
442+
// When compiling with patch tokens, always assume that the address
443+
// is aligned. This is a workaround for old OneMKL Releases. Assuming
444+
// that the address is not aligned leads to using bufferOffset implicit
445+
// argument. The additional argument confuses compatibility check in OneMKL
446+
// and forces it to make a fallback to a different kernels.
447+
// TODO: Remove below if statement as soon as support for old OneMKL
448+
// versions is dropped.
449+
if (!m_ctx->enableZEBinary())
450+
isAlignedPointee = true;
435451

436452
// special handling
437453
if (m_supportNonGEPPtr && gep == nullptr && !arg->isImplicitArg())

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.hpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ SPDX-License-Identifier: MIT
1010

1111
#include "AdaptorCommon/ImplicitArgs.hpp"
1212
#include "Compiler/Optimizer/OpenCLPasses/KernelArgs.hpp"
13+
#include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
1314
#include "Compiler/MetaDataUtilsWrapper.h"
1415
#include "common/LLVMWarningsPush.hpp"
1516
#include <llvm/Pass.h>
@@ -151,11 +152,6 @@ namespace IGC
151152
// can be on or off, which is indicated by this boolean flag.
152153
bool m_hasOptionalBufferOffsetArg;
153154

154-
// For historic reason, kernel ptrArgs, such as char*, short*, are assumed to
155-
// be aligned on DW (which is stronger than what OCL's natural alignment) in this
156-
// stateful optimization. If this is not a case, this arg should be set to true!
157-
bool m_hasSubDWAlignedPtrArg;
158-
159155
// When true, every messages that are in ptrArg + offset will have offset >= 0.
160156
bool m_hasPositivePointerOffset;
161157

@@ -171,6 +167,7 @@ namespace IGC
171167
: nullptr);
172168
}
173169

170+
OpenCLProgramContext* m_ctx;
174171
ImplicitArgs* m_pImplicitArgs;
175172
KernelArgs* m_pKernelArgs;
176173
ArgInfoMap m_argsInfo;
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2023 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt %s -S -o - -serialize-igc-metadata -igc-stateless-to-stateful-resolution -platformpvc | FileCheck %s
10+
11+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
12+
13+
define spir_kernel void @test(i8 addrspace(1)* %dst, i32 %bufferOffset) {
14+
entry:
15+
; CHECK: %[[OFFSET:[0-9]+]] = add i32 %bufferOffset, 3
16+
; CHECK: %[[PTR:[0-9]+]] = inttoptr i32 %[[OFFSET]] to i8 addrspace(131072)*
17+
; CHECK: store i8 0, i8 addrspace(131072)* %[[PTR]], align 1
18+
%ptr = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 3
19+
store i8 0, i8 addrspace(1)* %ptr, align 1
20+
ret void
21+
}
22+
23+
!IGCMetadata = !{!0}
24+
!igc.functions = !{!1}
25+
26+
!0 = !{!"ModuleMD", !5, !16}
27+
!1 = !{void (i8 addrspace(1)*, i32)* @test, !2}
28+
!2 = !{!3, !14}
29+
!3 = !{!"function_type", i32 0}
30+
!4 = !{}
31+
!5 = !{!"FuncMD", !6, !7}
32+
!6 = !{!"FuncMDMap[0]", void (i8 addrspace(1)*, i32)* @test}
33+
!7 = !{!"FuncMDValue[0]", !8}
34+
!8 = !{!"resAllocMD", !9}
35+
!9 = !{!"argAllocMDList", !10}
36+
!10 = !{!"argAllocMDListVec[0]", !11}
37+
!11 = !{!"type", i32 1}
38+
!12 = !{!"extensionType", i32 -1}
39+
!13 = !{!"indexType", i32 0}
40+
!14 = !{!"implicit_arg_desc", !15}
41+
!15 = !{i32 14}
42+
!16 = !{!"compOpt", !17}
43+
!17 = !{!"HasBufferOffsetArg", i1 true}

IGC/OCLFE/igd_fcl_mcl/source/clang_tb.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,8 +1347,6 @@ namespace TC
13471347
(strcmp(pParam, "-ze-opt-has-buffer-offset-arg") == 0) || //temporary options
13481348
(strcmp(pParam, "-cl-intel-buffer-offset-arg-required") == 0) || //temporary options
13491349
(strcmp(pParam, "-ze-opt-buffer-offset-arg-required") == 0) || //temporary options
1350-
(strcmp(pParam, "-cl-intel-has-subDW-aligned-ptr-arg") == 0) || //temporary options
1351-
(strcmp(pParam, "-ze-opt-has-subDW-aligned-ptr-arg") == 0) || //temporary options
13521350
(strcmp(pParam, "-cl-force-global-mem-allocation") == 0) || // temp
13531351
(strcmp(pParam, "-ze-force-global-mem-allocation") == 0) || // temp
13541352
(strcmp(pParam, "-cl-no-local-to-generic") == 0) || // temp

IGC/Options/include/igc/Options/IGCInternalOptions.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ defm buffer_offset_arg_required : CommonFlag<"buffer-offset-arg-required">;
3939
// -cl-intel-has-positive-pointer-offset, -ze-opt-has-positive-pointer-offset
4040
defm has_positive_pointer_offset : CommonFlag<"has-positive-pointer-offset">;
4141

42-
// -cl-intel-has-subDW-aligned-ptr-arg, -ze-opt-has-subDW-aligned-ptr-arg
43-
defm has_subdw_aligned_ptr_arg : CommonFlag<"has-subDW-aligned-ptr-arg">;
44-
4542
// -cl-intel-disable-a64WA
4643
defm disable_a64wa : CommonFlag<"disable-a64WA">;
4744

IGC/common/MDFrameWork.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,6 @@ namespace IGC
397397
bool HasPositivePointerOffset = false;
398398
bool HasBufferOffsetArg = false;
399399
bool BufferOffsetArgOptional = true;
400-
bool HasSubDWAlignedPtrArg = false;
401400
bool replaceGlobalOffsetsByZero = false;
402401
unsigned forcePixelShaderSIMDMode = 0;
403402
bool pixelShaderDoNotAbortOnSpill = false;

IGC/common/igc_flags.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,6 @@ DECLARE_IGC_REGKEY(DWORD, DebugInternalSwitch, 0, "Code pass select
292292
DECLARE_IGC_REGKEY(bool, SToSProducesPositivePointer, false, "This key is for StatelessToStateful optimization if the user knows the pointer offset is postive to the kernel argument.", false)
293293
DECLARE_IGC_REGKEY(bool, EnableSupportBufferOffset, false, "[debugging]For StatelessToStateful optimization [OCL], support implicit buffer offset argument (same as -cl-intel-has-buffer-offset-arg).", false)
294294
DECLARE_IGC_REGKEY(bool, EnableOptionalBufferOffset, true, "For StatelessToStateful optimization [OCL], if true, make buffer offset optional. Valid only if buffer offset is supported.", true)
295-
DECLARE_IGC_REGKEY(bool, UseSubDWAlignedPtrArg, false, "[OCL]If set, for kernel pointer arg such as ptr to char or short, the arg is not necessarily DW aligned", false)
296295
DECLARE_IGC_REGKEY(bool, EnableTestIGCBuiltin, false, "Enable testing igc builtin (precompiled kernels) using OCL.", false)
297296
DECLARE_IGC_REGKEY(bool, TestIGCPreCompiledFunctions, false, "Enable testing for precompiled kernels. [TEST ONLY]", false)
298297
DECLARE_IGC_REGKEY(bool, EnableCSSIMD32, false, "Enable computer shader SIMD32 mode, and fall back to lower SIMD when spill", false)

documentation/configuration_flags.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,6 @@ $ export IGC_ShaderDumpEnable=1
307307
| `ShaderOverride` | Will override any LLVM shader with matching name in c:\\Intel\\IGC\\ShaderOverride | - |
308308
| `ShaderPassDisable` | Disable specific passes eg. '9;17-19;239-;Error Check;ResolveOCLAtomics:2;Dead Code Elimination:3-5;BreakConstantExprPass:7-'<br/> disable pass 9, disable passes from 17 to 19, disable all passes after 238, disable all occurrences of pass Error Check,<br/> disable second occurrence of ResolveOCLAtomics, disable pass Dead Code Elimination occurrences from 3 to 5,<br/> disable all BreakConstantExprPass after his 6 occurrence<br/> To show a list of pass names and their occurrence set ShaderDisplayAllPassesNames.<br/> Must be used with ShaderDumpEnableAll flag. | - |
309309
| `SystemThreadEnable` | This key forces software to create a system thread. The system thread may still be created by software even<br/> if this control is set to false.The system thread is invoked if either the software requires<br/> exception handling or if kernel debugging is active and a breakpoint is hit. | - |
310-
| `UseSubDWAlignedPtrArg` | [OCL]If set, for kernel pointer arg such as ptr to char or short, the arg is not necessarily DW aligned | - |
311310
| `ld2dmsInstsClubbingThreshold` | Do not club more than these ld2dms insts into the new BB during MCSOpt | - |
312311
| `manualEnableRSWA` | Enable read suppression WA for the send and indirect access | - |
313312
## Shader dumping

0 commit comments

Comments
 (0)