Skip to content

Commit b662d85

Browse files
sys-d3djenkinsgfxbot
authored andcommitted
Backout of f6b3af5 due to Functional Regression
Change-Id: I26fcbe398f839e67a8172cf84be3e80833e7810b
1 parent c31fbdd commit b662d85

File tree

2 files changed

+11
-20
lines changed

2 files changed

+11
-20
lines changed

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,6 @@ unsigned int LowerGEPForPrivMem::extractAllocaSize(llvm::AllocaInst* pAlloca)
199199

200200
bool LowerGEPForPrivMem::CheckIfAllocaPromotable(llvm::AllocaInst* pAlloca)
201201
{
202-
auto WI = &getAnalysis<WIAnalysis>();
203-
bool isUniformAlloca = WI->whichDepend(pAlloca) == WIAnalysis::UNIFORM;
204-
if(isUniformAlloca)
205-
{
206-
IRBuilder<> builder(pAlloca);
207-
MDNode* node = MDNode::get(pAlloca->getContext(), ConstantAsMetadata::get(builder.getInt1(true)));
208-
pAlloca->setMetadata("uniform", node);
209-
}
210202
unsigned int allocaSize = extractAllocaSize(pAlloca);
211203
unsigned int allowedAllocaSizeInBytes = MAX_ALLOCA_PROMOTE_GRF_NUM * 4;
212204

@@ -227,6 +219,8 @@ bool LowerGEPForPrivMem::CheckIfAllocaPromotable(llvm::AllocaInst* pAlloca)
227219
{
228220
return false;
229221
}
222+
auto WI = &getAnalysis<WIAnalysis>();
223+
bool isUniformAlloca = WI->whichDepend(pAlloca) == WIAnalysis::UNIFORM;
230224
if(isUniformAlloca)
231225
{
232226
// Heuristic: for uniform alloca we divide the size by 8 to adjust the pressure

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.cpp

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -750,12 +750,12 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
750750
// Creates intrinsics that will be lowered in the CodeGen and will handle the stack-pointer
751751
Function *stackAllocaFunc = GenISAIntrinsic::getDeclaration(m_currFunction->getParent(), GenISAIntrinsic::GenISA_StackAlloca);
752752
Instruction *simdLaneId16 = CallInst::Create(simdLaneIdFunc, VALUE_NAME("simdLaneId16"), pEntryPoint);
753-
Value *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
753+
Instruction *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
754754
Instruction *simdSize = CallInst::Create(simdSizeFunc, VALUE_NAME("simdSize"), pEntryPoint);
755755
for (auto pAI : allocaInsts)
756756
{
757757
assert(!pAI->use_empty() && "Should not reach here with alloca instruction that has no usage!");
758-
bool isUniform = pAI->getMetadata("uniform") != nullptr;
758+
759759
llvm::IRBuilder<> builder(pAI);
760760
IF_DEBUG_INFO(builder.SetCurrentDebugLocation(emptyDebugLoc));
761761

@@ -764,8 +764,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
764764
unsigned int bufferSize = m_ModAllocaInfo->getBufferSize(pAI);
765765

766766
Value* bufferOffset = builder.CreateMul(simdSize, ConstantInt::get(typeInt32, scalarBufferOffset), VALUE_NAME(pAI->getName() + ".SIMDBufferOffset"));
767-
Value* increment = isUniform ? builder.getInt32(0) : simdLaneId;
768-
Value* perLaneOffset = builder.CreateMul(increment, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
767+
Value* perLaneOffset = builder.CreateMul(simdLaneId, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
769768
Value* totalOffset = builder.CreateAdd(bufferOffset, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
770769
Value* stackAlloca = builder.CreateCall(stackAllocaFunc, totalOffset, VALUE_NAME("stackAlloca"));
771770
Value* privateBuffer = builder.CreatePointerCast(stackAlloca, pAI->getType(), VALUE_NAME(pAI->getName() + ".privateBuffer"));
@@ -787,7 +786,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
787786
// PrivateMemoryUsageAnalysis pass, no need to run AddImplicitArgs pass.
788787

789788
Instruction *simdLaneId16 = CallInst::Create(simdLaneIdFunc, VALUE_NAME("simdLaneId16"), pEntryPoint);
790-
Value *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
789+
Instruction *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
791790
Instruction *simdSize = CallInst::Create(simdSizeFunc, VALUE_NAME("simdSize"), pEntryPoint);
792791

793792
Argument* r0Arg = implicitArgs.getArgInFunc(*m_currFunction, ImplicitArg::R0);
@@ -798,7 +797,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
798797
for (auto pAI : allocaInsts)
799798
{
800799
assert(!pAI->use_empty() && "Should not reach here with alloca instruction that has no usage!");
801-
bool isUniform = pAI->getMetadata("uniform") != nullptr;
800+
802801
llvm::IRBuilder<> builder(pAI);
803802
// Post upgrade to LLVM 3.5.1, it was found that inliner propagates debug info of callee
804803
// in to the alloca. Further, those allocas are somehow hoisted to the top of program.
@@ -858,8 +857,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
858857

859858

860859
Value* bufferOffset = builder.CreateMul(simdSize, ConstantInt::get(typeInt32, scalarBufferOffset), VALUE_NAME(pAI->getName() + ".SIMDBufferOffset"));
861-
Value* perLaneOffset = isUniform ? builder.getInt32(0) : simdLaneId;
862-
perLaneOffset = builder.CreateMul(perLaneOffset, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
860+
Value* perLaneOffset = builder.CreateMul(simdLaneId, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
863861
Value* totalOffset = builder.CreateAdd(bufferOffset, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
864862
Value* threadOffset = builder.CreateAdd(privateBase, totalOffset, VALUE_NAME(pAI->getName() + ".threadOffset"));
865863
Value* privateBufferPTR = builder.CreateIntToPtr(threadOffset, Type::getInt8Ty(C)->getPointerTo(ADDRESS_SPACE_PRIVATE), VALUE_NAME(pAI->getName() + ".privateBufferPTR"));
@@ -905,7 +903,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
905903
ConstantInt *totalPrivateMemPerWIValue = ConstantInt::get(typeInt32, totalPrivateMemPerWI);
906904

907905
Instruction *simdLaneId16 = CallInst::Create(simdLaneIdFunc, VALUE_NAME("simdLaneId16"), pEntryPoint);
908-
Value* simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
906+
Instruction *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
909907
Instruction *simdSize = CallInst::Create(simdSizeFunc, VALUE_NAME("simdSize"), pEntryPoint);
910908
BinaryOperator* totalPrivateMemPerThread = BinaryOperator::CreateMul(simdSize, totalPrivateMemPerWIValue, VALUE_NAME("totalPrivateMemPerThread"), pEntryPoint);
911909
ExtractElementInst* r0_5 = ExtractElementInst::Create(r0Arg, ConstantInt::get(typeInt32, 5), VALUE_NAME("r0.5"), pEntryPoint);
@@ -938,15 +936,14 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
938936

939937
llvm::IRBuilder<> builder(pAI);
940938
IF_DEBUG_INFO(builder.SetCurrentDebugLocation(emptyDebugLoc));
941-
bool isUniform = pAI->getMetadata("uniform") != nullptr;
939+
942940
// Get buffer information from the analysis
943941
unsigned int scalarBufferOffset = m_ModAllocaInfo->getBufferOffset(pAI);
944942
unsigned int bufferSize = m_ModAllocaInfo->getBufferSize(pAI);
945943

946944
Value* bufferOffset = builder.CreateMul(simdSize, ConstantInt::get(typeInt32, scalarBufferOffset), VALUE_NAME(pAI->getName() + ".SIMDBufferOffset"));
947945
Value* bufferOffsetForThread = builder.CreateAdd(perThreadOffset, bufferOffset, VALUE_NAME(pAI->getName() + ".bufferOffsetForThread"));
948-
Value* perLaneOffset = isUniform ? builder.getInt32(0) : simdLaneId;
949-
perLaneOffset = builder.CreateMul(simdLaneId, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
946+
Value* perLaneOffset = builder.CreateMul(simdLaneId, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
950947
Value* totalOffset = builder.CreateAdd(bufferOffsetForThread, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
951948
Value* privateBufferGEP = builder.CreateGEP(privateMemArg, totalOffset, VALUE_NAME(pAI->getName() + ".privateBufferGEP"));
952949
Value* privateBuffer = builder.CreatePointerCast(privateBufferGEP, pAI->getType(), VALUE_NAME(pAI->getName() + ".privateBuffer"));

0 commit comments

Comments
 (0)