Skip to content

Commit ff6d318

Browse files
aratajewigcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: a746105
Avoid generating private branch for generic accesses This change fixes an incorrect condition which was causing generation of branch for private memory even if allocatePrivateAsGlobalBuffer was enabled. This change also implements back `-cl-intel-no-local-to-generic` option which was removed in ee325d492f12192a8fc58f440469d2204da1eeca.
1 parent 8979657 commit ff6d318

File tree

10 files changed

+39
-72
lines changed

10 files changed

+39
-72
lines changed

IGC/Compiler/CISACodeGen/CastToGASAnalysis.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ SPDX-License-Identifier: MIT
77
============================= end_copyright_notice ===========================*/
88

99
#include "Compiler/CISACodeGen/CastToGASAnalysis.h"
10-
#include "Compiler/CodeGenPublic.h"
1110
#include "Compiler/CodeGenPublicEnums.h"
1211
#include "Compiler/IGCPassSupport.h"
1312
#include "Probe/Assertion.h"
@@ -128,13 +127,6 @@ void CastToGASWrapperPass::setInfoForGroup(
128127

129128
bool CastToGASWrapperPass::runOnModule(Module& M)
130129
{
131-
m_ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
132-
GI.noLocalToGenericOptionEnabled = m_ctx->noLocalToGenericOptionEnabled();
133-
GI.allocatePrivateAsGlobalBuffer = m_ctx->allocatePrivateAsGlobalBuffer();
134-
135-
if (GI.noLocalToGenericOptionEnabled && GI.allocatePrivateAsGlobalBuffer)
136-
return false;
137-
138130
castInfoCache.clear();
139131
CallGraph& CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
140132
for (auto& F : M.functions()) {

IGC/Compiler/CISACodeGen/CastToGASAnalysis.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ SPDX-License-Identifier: MIT
1414
#include <llvm/Analysis/CallGraph.h>
1515
#include "common/LLVMWarningsPop.hpp"
1616

17-
#include "Compiler/CodeGenContextWrapper.hpp"
18-
1917
namespace IGC
2018
{
2119
enum {
@@ -26,9 +24,6 @@ namespace IGC
2624
class GASInfo {
2725
public:
2826
bool canGenericPointToPrivate(llvm::Function& F) const {
29-
if (allocatePrivateAsGlobalBuffer)
30-
return false;
31-
3227
auto E = FunctionMap.find(&F);
3328
if (E == FunctionMap.end())
3429
return true;
@@ -37,9 +32,6 @@ namespace IGC
3732
}
3833

3934
bool canGenericPointToLocal(llvm::Function& F) const {
40-
if (noLocalToGenericOptionEnabled)
41-
return false;
42-
4335
auto E = FunctionMap.find(&F);
4436
if (E == FunctionMap.end())
4537
return true;
@@ -50,10 +42,6 @@ namespace IGC
5042
using FunctionMapTy = llvm::DenseMap<const llvm::Function*, unsigned>;
5143
FunctionMapTy FunctionMap;
5244

53-
// True when -cl-intel-no-local-to-generic is enabled
54-
bool noLocalToGenericOptionEnabled = false;
55-
bool allocatePrivateAsGlobalBuffer = false;
56-
5745
friend class CastToGASWrapperPass;
5846
};
5947

@@ -75,13 +63,11 @@ namespace IGC
7563
void getAnalysisUsage(llvm::AnalysisUsage & AU) const override {
7664
AU.setPreservesAll();
7765
AU.addRequired<llvm::CallGraphWrapperPass>();
78-
AU.addRequired<CodeGenContextWrapper>();
7966
}
8067

8168
GASInfo& getGASInfo() { return GI; }
8269

8370
private:
84-
CodeGenContext* m_ctx = nullptr;
8571
GASInfo GI;
8672
llvm::DenseMap<const llvm::Function*, unsigned> castInfoCache;
8773

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8383,9 +8383,10 @@ void EmitPass::emitAddrSpaceCast(llvm::AddrSpaceCastInst* addrSpaceCast)
83838383

83848384
CVariable* srcV = GetSymbol(addrSpaceCast->getOperand(0));
83858385

8386-
if (!m_canGenericPointToPrivate && !m_canGenericPointToLocal)
8386+
if ((m_pCtx->allocatePrivateAsGlobalBuffer() || !m_canGenericPointToPrivate) &&
8387+
!m_canGenericPointToLocal)
83878388
{
8388-
// If forcing global memory allocation and there are no generic pointers to local AS,
8389+
// If forcing global memory allocacion and there are no generic pointers to local AS,
83898390
// there is no need to tag generic pointers.
83908391
m_encoder->Cast(m_destination, srcV);
83918392
m_encoder->Push();

IGC/Compiler/CISACodeGen/ResolveGAS.cpp

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,7 +1430,6 @@ void LowerGPCallArg::processGASInst(Module& M)
14301430
return true;
14311431
};
14321432

1433-
bool changed = false;
14341433
IRBuilder<> IRB(M.getContext());
14351434
// Change GAS inst, such as ld/st, etc to global ld/st, etc.
14361435
for (Function& F : M)
@@ -1463,30 +1462,26 @@ void LowerGPCallArg::processGASInst(Module& M)
14631462
tI->setDebugLoc(I->getDebugLoc());
14641463
}
14651464

1466-
changed = true;
1465+
m_changed = true;
14671466
}
14681467
}
1469-
}
1470-
}
1468+
else if (CallInst* CallI = dyn_cast<CallInst>(I))
1469+
{
1470+
Function* Callee = CallI->getCalledFunction();
1471+
if (Callee &&
1472+
(Callee->getName().equals("__builtin_IB_to_local") ||
1473+
Callee->getName().equals("__builtin_IB_to_private")) &&
1474+
!toSkip(CallI->getOperand(0)))
1475+
{
1476+
Type* DstTy = I->getType();
1477+
Value* NewPtr = Constant::getNullValue(DstTy);
1478+
I->replaceAllUsesWith(NewPtr);
1479+
I->eraseFromParent();
14711480

1472-
if (changed)
1473-
{
1474-
// Above optimization changes an addrspace of load and store instructions
1475-
// operating on a generic pointer. One of the cases when optimization may
1476-
// be applied is when private memory is allocated in a global buffer. Together
1477-
// with an information that local pointers are not casted to generic addrspace,
1478-
// compiler is assured that all generic pointers point to a global memory.
1479-
// If the optimization happens, usage of generic pointer in any load and store
1480-
// instructions disappear.
1481-
//
1482-
// m_mustAllocatePrivateAsGlobalBuffer variable is necessary to point out that
1483-
// above optimization has been applied. Since PrivateMemoryResolution pass
1484-
// allocates private memory in a global buffer only if there is any load or
1485-
// store operating on a generic addrspace, the above optimization could mislead
1486-
// the logic in PrivateMemoryResolution causing private memory not being
1487-
// allocated in a global buffer.
1488-
m_ctx->m_mustAllocatePrivateAsGlobalBuffer = true;
1489-
m_changed = true;
1481+
m_changed = true;
1482+
}
1483+
}
1484+
}
14901485
}
14911486
}
14921487

IGC/Compiler/CodeGenContext.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -567,15 +567,7 @@ namespace IGC
567567

568568
bool OpenCLProgramContext::allocatePrivateAsGlobalBuffer() const
569569
{
570-
return forceGlobalMemoryAllocation() ||
571-
(m_instrTypes.hasDynamicGenericLoadStore &&
572-
platform.canForcePrivateToGlobal()) ||
573-
m_mustAllocatePrivateAsGlobalBuffer;
574-
}
575-
576-
bool OpenCLProgramContext::noLocalToGenericOptionEnabled() const
577-
{
578-
return m_InternalOptions.NoLocalToGeneric;
570+
return forceGlobalMemoryAllocation() || (m_instrTypes.hasDynamicGenericLoadStore && platform.canForcePrivateToGlobal());
579571
}
580572

581573
bool OpenCLProgramContext::enableTakeGlobalAddress() const
@@ -786,11 +778,6 @@ namespace IGC
786778
{
787779
IntelEnablePreRAScheduling = false;
788780
}
789-
// -cl-intel-no-local-to-generic
790-
else if (suffix.equals("-no-local-to-generic"))
791-
{
792-
NoLocalToGeneric = true;
793-
}
794781
// -cl-intel-force-global-mem-allocation
795782
else if (suffix.equals("-force-global-mem-allocation"))
796783
{
@@ -1434,11 +1421,6 @@ namespace IGC
14341421
return false;
14351422
}
14361423

1437-
bool CodeGenContext::noLocalToGenericOptionEnabled() const
1438-
{
1439-
return false;
1440-
}
1441-
14421424
bool CodeGenContext::enableTakeGlobalAddress() const
14431425
{
14441426
return false;

IGC/Compiler/CodeGenPublic.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,8 +1090,6 @@ namespace IGC
10901090
// When this is true, the flag "ForceGlobalMemoryAllocation" is enabled as a WA
10911091
bool m_hasGlobalInPrivateAddressSpace = false;
10921092

1093-
bool m_mustAllocatePrivateAsGlobalBuffer = false;
1094-
10951093
///// used for instruction statistic before/after pass
10961094
int instrStat[TOTAL_TYPES][TOTAL_STAGE];
10971095

@@ -1266,7 +1264,6 @@ namespace IGC
12661264
virtual uint32_t getNumGRFPerThread() const;
12671265
virtual bool forceGlobalMemoryAllocation() const;
12681266
virtual bool allocatePrivateAsGlobalBuffer() const;
1269-
virtual bool noLocalToGenericOptionEnabled() const;
12701267
virtual bool enableTakeGlobalAddress() const;
12711268
virtual int16_t getVectorCoalescingControl() const;
12721269
virtual uint32_t getPrivateMemoryMinimalSizePerThread() const;
@@ -1768,7 +1765,6 @@ namespace IGC
17681765
bool CompileOneKernelAtTime = false;
17691766

17701767
// Generic address related
1771-
bool NoLocalToGeneric = false;
17721768
bool ForceGlobalMemoryAllocation = false;
17731769

17741770
// -1 : initial value that means it is not set from cmdline
@@ -1919,7 +1915,6 @@ namespace IGC
19191915
uint32_t getNumThreadsPerEU() const override;
19201916
bool forceGlobalMemoryAllocation() const override;
19211917
bool allocatePrivateAsGlobalBuffer() const override;
1922-
bool noLocalToGenericOptionEnabled() const override;
19231918
bool enableTakeGlobalAddress() const override;
19241919
int16_t getVectorCoalescingControl() const override;
19251920
uint32_t getPrivateMemoryMinimalSizePerThread() const override;

IGC/Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,8 @@ bool GenericAddressDynamicResolution::visitLoadStoreInst(Instruction& I)
220220
}
221221

222222
if (pointerAddressSpace == ADDRESS_SPACE_GENERIC) {
223-
if(!m_needPrivateBranches && !m_needLocalBranches)
223+
if((m_ctx->allocatePrivateAsGlobalBuffer() || !m_needPrivateBranches) &&
224+
!m_needLocalBranches)
224225
{
225226
resolveGASWithoutBranches(I, pointerOperand);
226227
}
@@ -293,7 +294,7 @@ void GenericAddressDynamicResolution::resolveGAS(Instruction& I, Value* pointerO
293294
// GAS needs to resolve to private only if
294295
// 1) private is NOT allocated in global space; and
295296
// 2) there is a cast from private to GAS.
296-
bool needPrivateBranch = m_needPrivateBranches;
297+
bool needPrivateBranch = !(m_ctx->allocatePrivateAsGlobalBuffer()) || m_needPrivateBranches;
297298
bool needLocalBranch = m_needLocalBranches;
298299

299300
auto createBlock = [&](const Twine& BlockName, const Twine& LoadName, IGC::ADDRESS_SPACE addressSpace, Value*& load)

IGC/Compiler/tests/GenericAddressDynamicResolution/CastToGASAnalysis-cyclic-call-graph.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,14 @@ define spir_func void @f2(i32 addrspace(4)* %ptr) {
3131
; CHECK: %[[PTI:.*]] = ptrtoint i32 addrspace(4)* %ptr to i64
3232
; CHECK: %[[TAG:.*]] = lshr i64 %1, 61
3333
; CHECK: switch i64 %2, label %GlobalBlock [
34+
; CHECK: i64 1, label %PrivateBlock
3435
; CHECK: i64 2, label %LocalBlock
3536
; CHECK: ]
3637

38+
; CHECK: PrivateBlock:
39+
; CHECK: %[[PRIVATE_PTR:.*]] = addrspacecast i32 addrspace(4)* %ptr to i32*
40+
; CHECK: store i32 123, i32* %[[PRIVATE_PTR]], align 4
41+
3742
; CHECK: LocalBlock:
3843
; CHECK: %[[LOCAL_PTR:.*]] = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
3944
; CHECK: store i32 123, i32 addrspace(3)* %[[LOCAL_PTR]], align 4

IGC/Compiler/tests/GenericAddressDynamicResolution/CastToGASAnalysis-referenced-indirectly.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,14 @@ define spir_kernel void @kernel(i32 addrspace(1)* %global_buffer) {
2727
; CHECK: %[[PTI:.*]] = ptrtoint i32 addrspace(4)* %generic_ptr to i64
2828
; CHECK: %[[TAG:.*]] = lshr i64 %[[PTI]], 61
2929
; CHECK: switch i64 %[[TAG]], label %GlobalBlock [
30+
; CHECK: i64 1, label %PrivateBlock
3031
; CHECK: i64 2, label %LocalBlock
3132
; CHECK: ]
3233

34+
; CHECK: PrivateBlock:
35+
; CHECK: %[[PRIVATE_PTR:.*]] = addrspacecast i32 addrspace(4)* %generic_ptr to i32*
36+
; CHECK: store i32 5, i32* %[[PRIVATE_PTR]], align 4
37+
3338
; CHECK: LocalBlock:
3439
; CHECK: %[[LOCAL_PTR:.*]] = addrspacecast i32 addrspace(4)* %generic_ptr to i32 addrspace(3)*
3540
; CHECK: store i32 5, i32 addrspace(3)* %[[LOCAL_PTR]], align 4

IGC/Compiler/tests/GenericAddressDynamicResolution/CastToGASAnalysis-simple.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@ return:
4141
; CHECK: %[[PTI:.*]] = ptrtoint i32 addrspace(4)* %generic_ptr to i64
4242
; CHECK: %[[TAG:.*]] = lshr i64 %[[PTI]], 61
4343
; CHECK: switch i64 %[[TAG]], label %GlobalBlock [
44+
; CHECK: i64 1, label %PrivateBlock
4445
; CHECK: i64 2, label %LocalBlock
4546
; CHECK: ]
4647

48+
; CHECK: PrivateBlock:
49+
; CHECK: %[[PRIVATE_PTR:.*]] = addrspacecast i32 addrspace(4)* %generic_ptr to i32*
50+
; CHECK: store i32 5, i32* %[[PRIVATE_PTR]], align 4
51+
4752
; CHECK: LocalBlock:
4853
; CHECK: %[[LOCAL_PTR:.*]] = addrspacecast i32 addrspace(4)* %generic_ptr to i32 addrspace(3)*
4954
; CHECK: store i32 5, i32 addrspace(3)* %[[LOCAL_PTR]], align 4

0 commit comments

Comments
 (0)