@@ -750,12 +750,12 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
750
750
// Creates intrinsics that will be lowered in the CodeGen and will handle the stack-pointer
751
751
Function *stackAllocaFunc = GenISAIntrinsic::getDeclaration (m_currFunction->getParent (), GenISAIntrinsic::GenISA_StackAlloca);
752
752
Instruction *simdLaneId16 = CallInst::Create (simdLaneIdFunc, VALUE_NAME (" simdLaneId16" ), pEntryPoint);
753
- Value *simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
753
+ Instruction *simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
754
754
Instruction *simdSize = CallInst::Create (simdSizeFunc, VALUE_NAME (" simdSize" ), pEntryPoint);
755
755
for (auto pAI : allocaInsts)
756
756
{
757
757
assert (!pAI->use_empty () && " Should not reach here with alloca instruction that has no usage!" );
758
- bool isUniform = pAI-> getMetadata ( " uniform " ) != nullptr ;
758
+
759
759
llvm::IRBuilder<> builder (pAI);
760
760
IF_DEBUG_INFO (builder.SetCurrentDebugLocation (emptyDebugLoc));
761
761
@@ -764,8 +764,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
764
764
unsigned int bufferSize = m_ModAllocaInfo->getBufferSize (pAI);
765
765
766
766
Value* bufferOffset = builder.CreateMul (simdSize, ConstantInt::get (typeInt32, scalarBufferOffset), VALUE_NAME (pAI->getName () + " .SIMDBufferOffset" ));
767
- Value* increment = isUniform ? builder.getInt32 (0 ) : simdLaneId;
768
- Value* perLaneOffset = builder.CreateMul (increment, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
767
+ Value* perLaneOffset = builder.CreateMul (simdLaneId, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
769
768
Value* totalOffset = builder.CreateAdd (bufferOffset, perLaneOffset, VALUE_NAME (pAI->getName () + " .totalOffset" ));
770
769
Value* stackAlloca = builder.CreateCall (stackAllocaFunc, totalOffset, VALUE_NAME (" stackAlloca" ));
771
770
Value* privateBuffer = builder.CreatePointerCast (stackAlloca, pAI->getType (), VALUE_NAME (pAI->getName () + " .privateBuffer" ));
@@ -787,7 +786,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
787
786
// PrivateMemoryUsageAnalysis pass, no need to run AddImplicitArgs pass.
788
787
789
788
Instruction *simdLaneId16 = CallInst::Create (simdLaneIdFunc, VALUE_NAME (" simdLaneId16" ), pEntryPoint);
790
- Value *simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
789
+ Instruction *simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
791
790
Instruction *simdSize = CallInst::Create (simdSizeFunc, VALUE_NAME (" simdSize" ), pEntryPoint);
792
791
793
792
Argument* r0Arg = implicitArgs.getArgInFunc (*m_currFunction, ImplicitArg::R0);
@@ -798,7 +797,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
798
797
for (auto pAI : allocaInsts)
799
798
{
800
799
assert (!pAI->use_empty () && " Should not reach here with alloca instruction that has no usage!" );
801
- bool isUniform = pAI-> getMetadata ( " uniform " ) != nullptr ;
800
+
802
801
llvm::IRBuilder<> builder (pAI);
803
802
// Post upgrade to LLVM 3.5.1, it was found that inliner propagates debug info of callee
804
803
// in to the alloca. Further, those allocas are somehow hoisted to the top of program.
@@ -858,8 +857,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
858
857
859
858
860
859
Value* bufferOffset = builder.CreateMul (simdSize, ConstantInt::get (typeInt32, scalarBufferOffset), VALUE_NAME (pAI->getName () + " .SIMDBufferOffset" ));
861
- Value* perLaneOffset = isUniform ? builder.getInt32 (0 ) : simdLaneId;
862
- perLaneOffset = builder.CreateMul (perLaneOffset, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
860
+ Value* perLaneOffset = builder.CreateMul (simdLaneId, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
863
861
Value* totalOffset = builder.CreateAdd (bufferOffset, perLaneOffset, VALUE_NAME (pAI->getName () + " .totalOffset" ));
864
862
Value* threadOffset = builder.CreateAdd (privateBase, totalOffset, VALUE_NAME (pAI->getName () + " .threadOffset" ));
865
863
Value* privateBufferPTR = builder.CreateIntToPtr (threadOffset, Type::getInt8Ty (C)->getPointerTo (ADDRESS_SPACE_PRIVATE), VALUE_NAME (pAI->getName () + " .privateBufferPTR" ));
@@ -905,7 +903,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
905
903
ConstantInt *totalPrivateMemPerWIValue = ConstantInt::get (typeInt32, totalPrivateMemPerWI);
906
904
907
905
Instruction *simdLaneId16 = CallInst::Create (simdLaneIdFunc, VALUE_NAME (" simdLaneId16" ), pEntryPoint);
908
- Value* simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
906
+ Instruction * simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
909
907
Instruction *simdSize = CallInst::Create (simdSizeFunc, VALUE_NAME (" simdSize" ), pEntryPoint);
910
908
BinaryOperator* totalPrivateMemPerThread = BinaryOperator::CreateMul (simdSize, totalPrivateMemPerWIValue, VALUE_NAME (" totalPrivateMemPerThread" ), pEntryPoint);
911
909
ExtractElementInst* r0_5 = ExtractElementInst::Create (r0Arg, ConstantInt::get (typeInt32, 5 ), VALUE_NAME (" r0.5" ), pEntryPoint);
@@ -938,15 +936,14 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
938
936
939
937
llvm::IRBuilder<> builder (pAI);
940
938
IF_DEBUG_INFO (builder.SetCurrentDebugLocation (emptyDebugLoc));
941
- bool isUniform = pAI-> getMetadata ( " uniform " ) != nullptr ;
939
+
942
940
// Get buffer information from the analysis
943
941
unsigned int scalarBufferOffset = m_ModAllocaInfo->getBufferOffset (pAI);
944
942
unsigned int bufferSize = m_ModAllocaInfo->getBufferSize (pAI);
945
943
946
944
Value* bufferOffset = builder.CreateMul (simdSize, ConstantInt::get (typeInt32, scalarBufferOffset), VALUE_NAME (pAI->getName () + " .SIMDBufferOffset" ));
947
945
Value* bufferOffsetForThread = builder.CreateAdd (perThreadOffset, bufferOffset, VALUE_NAME (pAI->getName () + " .bufferOffsetForThread" ));
948
- Value* perLaneOffset = isUniform ? builder.getInt32 (0 ) : simdLaneId;
949
- perLaneOffset = builder.CreateMul (simdLaneId, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
946
+ Value* perLaneOffset = builder.CreateMul (simdLaneId, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
950
947
Value* totalOffset = builder.CreateAdd (bufferOffsetForThread, perLaneOffset, VALUE_NAME (pAI->getName () + " .totalOffset" ));
951
948
Value* privateBufferGEP = builder.CreateGEP (privateMemArg, totalOffset, VALUE_NAME (pAI->getName () + " .privateBufferGEP" ));
952
949
Value* privateBuffer = builder.CreatePointerCast (privateBufferGEP, pAI->getType (), VALUE_NAME (pAI->getName () + " .privateBuffer" ));
0 commit comments