@@ -750,12 +750,12 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
750
750
// Creates intrinsics that will be lowered in the CodeGen and will handle the stack-pointer
751
751
Function *stackAllocaFunc = GenISAIntrinsic::getDeclaration (m_currFunction->getParent (), GenISAIntrinsic::GenISA_StackAlloca);
752
752
Instruction *simdLaneId16 = CallInst::Create (simdLaneIdFunc, VALUE_NAME (" simdLaneId16" ), pEntryPoint);
753
- Instruction *simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
753
+ Value *simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
754
754
Instruction *simdSize = CallInst::Create (simdSizeFunc, VALUE_NAME (" simdSize" ), pEntryPoint);
755
755
for (auto pAI : allocaInsts)
756
756
{
757
757
assert (!pAI->use_empty () && " Should not reach here with alloca instruction that has no usage!" );
758
-
758
+ bool isUniform = pAI-> getMetadata ( " uniform " ) != nullptr ;
759
759
llvm::IRBuilder<> builder (pAI);
760
760
IF_DEBUG_INFO (builder.SetCurrentDebugLocation (emptyDebugLoc));
761
761
@@ -764,7 +764,8 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
764
764
unsigned int bufferSize = m_ModAllocaInfo->getBufferSize (pAI);
765
765
766
766
Value* bufferOffset = builder.CreateMul (simdSize, ConstantInt::get (typeInt32, scalarBufferOffset), VALUE_NAME (pAI->getName () + " .SIMDBufferOffset" ));
767
- Value* perLaneOffset = builder.CreateMul (simdLaneId, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
767
+ Value* increment = isUniform ? builder.getInt32 (0 ) : simdLaneId;
768
+ Value* perLaneOffset = builder.CreateMul (increment, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
768
769
Value* totalOffset = builder.CreateAdd (bufferOffset, perLaneOffset, VALUE_NAME (pAI->getName () + " .totalOffset" ));
769
770
Value* stackAlloca = builder.CreateCall (stackAllocaFunc, totalOffset, VALUE_NAME (" stackAlloca" ));
770
771
Value* privateBuffer = builder.CreatePointerCast (stackAlloca, pAI->getType (), VALUE_NAME (pAI->getName () + " .privateBuffer" ));
@@ -786,7 +787,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
786
787
// PrivateMemoryUsageAnalysis pass, no need to run AddImplicitArgs pass.
787
788
788
789
Instruction *simdLaneId16 = CallInst::Create (simdLaneIdFunc, VALUE_NAME (" simdLaneId16" ), pEntryPoint);
789
- Instruction *simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
790
+ Value *simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
790
791
Instruction *simdSize = CallInst::Create (simdSizeFunc, VALUE_NAME (" simdSize" ), pEntryPoint);
791
792
792
793
Argument* r0Arg = implicitArgs.getArgInFunc (*m_currFunction, ImplicitArg::R0);
@@ -797,7 +798,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
797
798
for (auto pAI : allocaInsts)
798
799
{
799
800
assert (!pAI->use_empty () && " Should not reach here with alloca instruction that has no usage!" );
800
-
801
+ bool isUniform = pAI-> getMetadata ( " uniform " ) != nullptr ;
801
802
llvm::IRBuilder<> builder (pAI);
802
803
// Post upgrade to LLVM 3.5.1, it was found that inliner propagates debug info of callee
803
804
// in to the alloca. Further, those allocas are somehow hoisted to the top of program.
@@ -857,7 +858,8 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
857
858
858
859
859
860
Value* bufferOffset = builder.CreateMul (simdSize, ConstantInt::get (typeInt32, scalarBufferOffset), VALUE_NAME (pAI->getName () + " .SIMDBufferOffset" ));
860
- Value* perLaneOffset = builder.CreateMul (simdLaneId, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
861
+ Value* perLaneOffset = isUniform ? builder.getInt32 (0 ) : simdLaneId;
862
+ perLaneOffset = builder.CreateMul (perLaneOffset, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
861
863
Value* totalOffset = builder.CreateAdd (bufferOffset, perLaneOffset, VALUE_NAME (pAI->getName () + " .totalOffset" ));
862
864
Value* threadOffset = builder.CreateAdd (privateBase, totalOffset, VALUE_NAME (pAI->getName () + " .threadOffset" ));
863
865
Value* privateBufferPTR = builder.CreateIntToPtr (threadOffset, Type::getInt8Ty (C)->getPointerTo (ADDRESS_SPACE_PRIVATE), VALUE_NAME (pAI->getName () + " .privateBufferPTR" ));
@@ -903,7 +905,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
903
905
ConstantInt *totalPrivateMemPerWIValue = ConstantInt::get (typeInt32, totalPrivateMemPerWI);
904
906
905
907
Instruction *simdLaneId16 = CallInst::Create (simdLaneIdFunc, VALUE_NAME (" simdLaneId16" ), pEntryPoint);
906
- Instruction * simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
908
+ Value* simdLaneId = ZExtInst::CreateIntegerCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ), pEntryPoint);
907
909
Instruction *simdSize = CallInst::Create (simdSizeFunc, VALUE_NAME (" simdSize" ), pEntryPoint);
908
910
BinaryOperator* totalPrivateMemPerThread = BinaryOperator::CreateMul (simdSize, totalPrivateMemPerWIValue, VALUE_NAME (" totalPrivateMemPerThread" ), pEntryPoint);
909
911
ExtractElementInst* r0_5 = ExtractElementInst::Create (r0Arg, ConstantInt::get (typeInt32, 5 ), VALUE_NAME (" r0.5" ), pEntryPoint);
@@ -936,14 +938,15 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
936
938
937
939
llvm::IRBuilder<> builder (pAI);
938
940
IF_DEBUG_INFO (builder.SetCurrentDebugLocation (emptyDebugLoc));
939
-
941
+ bool isUniform = pAI-> getMetadata ( " uniform " ) != nullptr ;
940
942
// Get buffer information from the analysis
941
943
unsigned int scalarBufferOffset = m_ModAllocaInfo->getBufferOffset (pAI);
942
944
unsigned int bufferSize = m_ModAllocaInfo->getBufferSize (pAI);
943
945
944
946
Value* bufferOffset = builder.CreateMul (simdSize, ConstantInt::get (typeInt32, scalarBufferOffset), VALUE_NAME (pAI->getName () + " .SIMDBufferOffset" ));
945
947
Value* bufferOffsetForThread = builder.CreateAdd (perThreadOffset, bufferOffset, VALUE_NAME (pAI->getName () + " .bufferOffsetForThread" ));
946
- Value* perLaneOffset = builder.CreateMul (simdLaneId, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
948
+ Value* perLaneOffset = isUniform ? builder.getInt32 (0 ) : simdLaneId;
949
+ perLaneOffset = builder.CreateMul (simdLaneId, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
947
950
Value* totalOffset = builder.CreateAdd (bufferOffsetForThread, perLaneOffset, VALUE_NAME (pAI->getName () + " .totalOffset" ));
948
951
Value* privateBufferGEP = builder.CreateGEP (privateMemArg, totalOffset, VALUE_NAME (pAI->getName () + " .privateBufferGEP" ));
949
952
Value* privateBuffer = builder.CreatePointerCast (privateBufferGEP, pAI->getType (), VALUE_NAME (pAI->getName () + " .privateBuffer" ));
0 commit comments