Skip to content

Commit a746105

Browse files
aratajewigcbot
authored andcommitted
Avoid generating private branch for generic accesses
This change fixes an incorrect condition which was causing generation of branch for private memory even if allocatePrivateAsGlobalBuffer was enabled. This change also implements back `-cl-intel-no-local-to-generic` option which was removed in ee325d492f12192a8fc58f440469d2204da1eeca.
1 parent 8247f94 commit a746105

File tree

10 files changed

+72
-39
lines changed

10 files changed

+72
-39
lines changed

IGC/Compiler/CISACodeGen/CastToGASAnalysis.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ SPDX-License-Identifier: MIT
77
============================= end_copyright_notice ===========================*/
88

99
#include "Compiler/CISACodeGen/CastToGASAnalysis.h"
10+
#include "Compiler/CodeGenPublic.h"
1011
#include "Compiler/CodeGenPublicEnums.h"
1112
#include "Compiler/IGCPassSupport.h"
1213
#include "Probe/Assertion.h"
@@ -127,6 +128,13 @@ void CastToGASWrapperPass::setInfoForGroup(
127128

128129
bool CastToGASWrapperPass::runOnModule(Module& M)
129130
{
131+
m_ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
132+
GI.noLocalToGenericOptionEnabled = m_ctx->noLocalToGenericOptionEnabled();
133+
GI.allocatePrivateAsGlobalBuffer = m_ctx->allocatePrivateAsGlobalBuffer();
134+
135+
if (GI.noLocalToGenericOptionEnabled && GI.allocatePrivateAsGlobalBuffer)
136+
return false;
137+
130138
castInfoCache.clear();
131139
CallGraph& CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
132140
for (auto& F : M.functions()) {

IGC/Compiler/CISACodeGen/CastToGASAnalysis.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ SPDX-License-Identifier: MIT
1414
#include <llvm/Analysis/CallGraph.h>
1515
#include "common/LLVMWarningsPop.hpp"
1616

17+
#include "Compiler/CodeGenContextWrapper.hpp"
18+
1719
namespace IGC
1820
{
1921
enum {
@@ -24,6 +26,9 @@ namespace IGC
2426
class GASInfo {
2527
public:
2628
bool canGenericPointToPrivate(llvm::Function& F) const {
29+
if (allocatePrivateAsGlobalBuffer)
30+
return false;
31+
2732
auto E = FunctionMap.find(&F);
2833
if (E == FunctionMap.end())
2934
return true;
@@ -32,6 +37,9 @@ namespace IGC
3237
}
3338

3439
bool canGenericPointToLocal(llvm::Function& F) const {
40+
if (noLocalToGenericOptionEnabled)
41+
return false;
42+
3543
auto E = FunctionMap.find(&F);
3644
if (E == FunctionMap.end())
3745
return true;
@@ -42,6 +50,10 @@ namespace IGC
4250
using FunctionMapTy = llvm::DenseMap<const llvm::Function*, unsigned>;
4351
FunctionMapTy FunctionMap;
4452

53+
// True when -cl-intel-no-local-to-generic is enabled
54+
bool noLocalToGenericOptionEnabled = false;
55+
bool allocatePrivateAsGlobalBuffer = false;
56+
4557
friend class CastToGASWrapperPass;
4658
};
4759

@@ -63,11 +75,13 @@ namespace IGC
6375
void getAnalysisUsage(llvm::AnalysisUsage & AU) const override {
6476
AU.setPreservesAll();
6577
AU.addRequired<llvm::CallGraphWrapperPass>();
78+
AU.addRequired<CodeGenContextWrapper>();
6679
}
6780

6881
GASInfo& getGASInfo() { return GI; }
6982

7083
private:
84+
CodeGenContext* m_ctx = nullptr;
7185
GASInfo GI;
7286
llvm::DenseMap<const llvm::Function*, unsigned> castInfoCache;
7387

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8383,10 +8383,9 @@ void EmitPass::emitAddrSpaceCast(llvm::AddrSpaceCastInst* addrSpaceCast)
83838383

83848384
CVariable* srcV = GetSymbol(addrSpaceCast->getOperand(0));
83858385

8386-
if ((m_pCtx->allocatePrivateAsGlobalBuffer() || !m_canGenericPointToPrivate) &&
8387-
!m_canGenericPointToLocal)
8386+
if (!m_canGenericPointToPrivate && !m_canGenericPointToLocal)
83888387
{
8389-
// If forcing global memory allocacion and there are no generic pointers to local AS,
8388+
// If forcing global memory allocation and there are no generic pointers to local AS,
83908389
// there is no need to tag generic pointers.
83918390
m_encoder->Cast(m_destination, srcV);
83928391
m_encoder->Push();

IGC/Compiler/CISACodeGen/ResolveGAS.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,6 +1430,7 @@ void LowerGPCallArg::processGASInst(Module& M)
14301430
return true;
14311431
};
14321432

1433+
bool changed = false;
14331434
IRBuilder<> IRB(M.getContext());
14341435
// Change GAS inst, such as ld/st, etc to global ld/st, etc.
14351436
for (Function& F : M)
@@ -1462,27 +1463,31 @@ void LowerGPCallArg::processGASInst(Module& M)
14621463
tI->setDebugLoc(I->getDebugLoc());
14631464
}
14641465

1465-
m_changed = true;
1466-
}
1467-
}
1468-
else if (CallInst* CallI = dyn_cast<CallInst>(I))
1469-
{
1470-
Function* Callee = CallI->getCalledFunction();
1471-
if (Callee &&
1472-
(Callee->getName().equals("__builtin_IB_to_local") ||
1473-
Callee->getName().equals("__builtin_IB_to_private")) &&
1474-
!toSkip(CallI->getOperand(0)))
1475-
{
1476-
Type* DstTy = I->getType();
1477-
Value* NewPtr = Constant::getNullValue(DstTy);
1478-
I->replaceAllUsesWith(NewPtr);
1479-
I->eraseFromParent();
1480-
1481-
m_changed = true;
1466+
changed = true;
14821467
}
14831468
}
14841469
}
14851470
}
1471+
1472+
if (changed)
1473+
{
1474+
// Above optimization changes an addrspace of load and store instructions
1475+
// operating on a generic pointer. One of the cases when optimization may
1476+
// be applied is when private memory is allocated in a global buffer. Together
1477+
// with an information that local pointers are not casted to generic addrspace,
1478+
// compiler is assured that all generic pointers point to a global memory.
1479+
// If the optimization happens, usage of generic pointer in any load and store
1480+
// instructions disappear.
1481+
//
1482+
// m_mustAllocatePrivateAsGlobalBuffer variable is necessary to point out that
1483+
// above optimization has been applied. Since PrivateMemoryResolution pass
1484+
// allocates private memory in a global buffer only if there is any load or
1485+
// store operating on a generic addrspace, the above optimization could mislead
1486+
// the logic in PrivateMemoryResolution causing private memory not being
1487+
// allocated in a global buffer.
1488+
m_ctx->m_mustAllocatePrivateAsGlobalBuffer = true;
1489+
m_changed = true;
1490+
}
14861491
}
14871492

14881493
std::vector<Function*> LowerGPCallArg::findCandidates(CallGraph& CG)

IGC/Compiler/CodeGenContext.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,15 @@ namespace IGC
567567

568568
bool OpenCLProgramContext::allocatePrivateAsGlobalBuffer() const
569569
{
570-
return forceGlobalMemoryAllocation() || (m_instrTypes.hasDynamicGenericLoadStore && platform.canForcePrivateToGlobal());
570+
return forceGlobalMemoryAllocation() ||
571+
(m_instrTypes.hasDynamicGenericLoadStore &&
572+
platform.canForcePrivateToGlobal()) ||
573+
m_mustAllocatePrivateAsGlobalBuffer;
574+
}
575+
576+
bool OpenCLProgramContext::noLocalToGenericOptionEnabled() const
577+
{
578+
return m_InternalOptions.NoLocalToGeneric;
571579
}
572580

573581
bool OpenCLProgramContext::enableTakeGlobalAddress() const
@@ -778,6 +786,11 @@ namespace IGC
778786
{
779787
IntelEnablePreRAScheduling = false;
780788
}
789+
// -cl-intel-no-local-to-generic
790+
else if (suffix.equals("-no-local-to-generic"))
791+
{
792+
NoLocalToGeneric = true;
793+
}
781794
// -cl-intel-force-global-mem-allocation
782795
else if (suffix.equals("-force-global-mem-allocation"))
783796
{
@@ -1421,6 +1434,11 @@ namespace IGC
14211434
return false;
14221435
}
14231436

1437+
bool CodeGenContext::noLocalToGenericOptionEnabled() const
1438+
{
1439+
return false;
1440+
}
1441+
14241442
bool CodeGenContext::enableTakeGlobalAddress() const
14251443
{
14261444
return false;

IGC/Compiler/CodeGenPublic.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,8 @@ namespace IGC
10901090
// When this is true, the flag "ForceGlobalMemoryAllocation" is enabled as a WA
10911091
bool m_hasGlobalInPrivateAddressSpace = false;
10921092

1093+
bool m_mustAllocatePrivateAsGlobalBuffer = false;
1094+
10931095
///// used for instruction statistic before/after pass
10941096
int instrStat[TOTAL_TYPES][TOTAL_STAGE];
10951097

@@ -1264,6 +1266,7 @@ namespace IGC
12641266
virtual uint32_t getNumGRFPerThread() const;
12651267
virtual bool forceGlobalMemoryAllocation() const;
12661268
virtual bool allocatePrivateAsGlobalBuffer() const;
1269+
virtual bool noLocalToGenericOptionEnabled() const;
12671270
virtual bool enableTakeGlobalAddress() const;
12681271
virtual int16_t getVectorCoalescingControl() const;
12691272
virtual uint32_t getPrivateMemoryMinimalSizePerThread() const;
@@ -1765,6 +1768,7 @@ namespace IGC
17651768
bool CompileOneKernelAtTime = false;
17661769

17671770
// Generic address related
1771+
bool NoLocalToGeneric = false;
17681772
bool ForceGlobalMemoryAllocation = false;
17691773

17701774
// -1 : initial value that means it is not set from cmdline
@@ -1915,6 +1919,7 @@ namespace IGC
19151919
uint32_t getNumThreadsPerEU() const override;
19161920
bool forceGlobalMemoryAllocation() const override;
19171921
bool allocatePrivateAsGlobalBuffer() const override;
1922+
bool noLocalToGenericOptionEnabled() const override;
19181923
bool enableTakeGlobalAddress() const override;
19191924
int16_t getVectorCoalescingControl() const override;
19201925
uint32_t getPrivateMemoryMinimalSizePerThread() const override;

IGC/Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,7 @@ bool GenericAddressDynamicResolution::visitLoadStoreInst(Instruction& I)
220220
}
221221

222222
if (pointerAddressSpace == ADDRESS_SPACE_GENERIC) {
223-
if((m_ctx->allocatePrivateAsGlobalBuffer() || !m_needPrivateBranches) &&
224-
!m_needLocalBranches)
223+
if(!m_needPrivateBranches && !m_needLocalBranches)
225224
{
226225
resolveGASWithoutBranches(I, pointerOperand);
227226
}
@@ -294,7 +293,7 @@ void GenericAddressDynamicResolution::resolveGAS(Instruction& I, Value* pointerO
294293
// GAS needs to resolve to private only if
295294
// 1) private is NOT allocated in global space; and
296295
// 2) there is a cast from private to GAS.
297-
bool needPrivateBranch = !(m_ctx->allocatePrivateAsGlobalBuffer()) || m_needPrivateBranches;
296+
bool needPrivateBranch = m_needPrivateBranches;
298297
bool needLocalBranch = m_needLocalBranches;
299298

300299
auto createBlock = [&](const Twine& BlockName, const Twine& LoadName, IGC::ADDRESS_SPACE addressSpace, Value*& load)

IGC/Compiler/tests/GenericAddressDynamicResolution/CastToGASAnalysis-cyclic-call-graph.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,9 @@ define spir_func void @f2(i32 addrspace(4)* %ptr) {
3131
; CHECK: %[[PTI:.*]] = ptrtoint i32 addrspace(4)* %ptr to i64
3232
; CHECK: %[[TAG:.*]] = lshr i64 %1, 61
3333
; CHECK: switch i64 %2, label %GlobalBlock [
34-
; CHECK: i64 1, label %PrivateBlock
3534
; CHECK: i64 2, label %LocalBlock
3635
; CHECK: ]
3736

38-
; CHECK: PrivateBlock:
39-
; CHECK: %[[PRIVATE_PTR:.*]] = addrspacecast i32 addrspace(4)* %ptr to i32*
40-
; CHECK: store i32 123, i32* %[[PRIVATE_PTR]], align 4
41-
4237
; CHECK: LocalBlock:
4338
; CHECK: %[[LOCAL_PTR:.*]] = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
4439
; CHECK: store i32 123, i32 addrspace(3)* %[[LOCAL_PTR]], align 4

IGC/Compiler/tests/GenericAddressDynamicResolution/CastToGASAnalysis-referenced-indirectly.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,9 @@ define spir_kernel void @kernel(i32 addrspace(1)* %global_buffer) {
2727
; CHECK: %[[PTI:.*]] = ptrtoint i32 addrspace(4)* %generic_ptr to i64
2828
; CHECK: %[[TAG:.*]] = lshr i64 %[[PTI]], 61
2929
; CHECK: switch i64 %[[TAG]], label %GlobalBlock [
30-
; CHECK: i64 1, label %PrivateBlock
3130
; CHECK: i64 2, label %LocalBlock
3231
; CHECK: ]
3332

34-
; CHECK: PrivateBlock:
35-
; CHECK: %[[PRIVATE_PTR:.*]] = addrspacecast i32 addrspace(4)* %generic_ptr to i32*
36-
; CHECK: store i32 5, i32* %[[PRIVATE_PTR]], align 4
37-
3833
; CHECK: LocalBlock:
3934
; CHECK: %[[LOCAL_PTR:.*]] = addrspacecast i32 addrspace(4)* %generic_ptr to i32 addrspace(3)*
4035
; CHECK: store i32 5, i32 addrspace(3)* %[[LOCAL_PTR]], align 4

IGC/Compiler/tests/GenericAddressDynamicResolution/CastToGASAnalysis-simple.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,9 @@ return:
4141
; CHECK: %[[PTI:.*]] = ptrtoint i32 addrspace(4)* %generic_ptr to i64
4242
; CHECK: %[[TAG:.*]] = lshr i64 %[[PTI]], 61
4343
; CHECK: switch i64 %[[TAG]], label %GlobalBlock [
44-
; CHECK: i64 1, label %PrivateBlock
4544
; CHECK: i64 2, label %LocalBlock
4645
; CHECK: ]
4746

48-
; CHECK: PrivateBlock:
49-
; CHECK: %[[PRIVATE_PTR:.*]] = addrspacecast i32 addrspace(4)* %generic_ptr to i32*
50-
; CHECK: store i32 5, i32* %[[PRIVATE_PTR]], align 4
51-
5247
; CHECK: LocalBlock:
5348
; CHECK: %[[LOCAL_PTR:.*]] = addrspacecast i32 addrspace(4)* %generic_ptr to i32 addrspace(3)*
5449
; CHECK: store i32 5, i32 addrspace(3)* %[[LOCAL_PTR]], align 4

0 commit comments

Comments
 (0)