@@ -868,28 +868,31 @@ namespace IGC
868
868
return size;
869
869
}
870
870
871
- void PushAnalysis::AllocatePushedConstant (
871
+ unsigned int PushAnalysis::AllocatePushedConstant (
872
872
Instruction* load,
873
873
const SimplePushInfo& newChunk,
874
874
const unsigned int maxSizeAllowed)
875
875
{
876
876
if (!newChunk.isBindless &&
877
877
newChunk.cbIdx > m_context->m_DriverInfo .MaximumSimplePushBufferID ())
878
878
{
879
- return ;
879
+ return 0 ;
880
880
}
881
881
unsigned int size = GetSizeInBits (load->getType ()) / 8 ;
882
882
IGC_ASSERT_MESSAGE (isa<LoadInst>(load) || isa<LdRawIntrinsic>(load),
883
883
" Expected a load instruction" );
884
+ PushInfo& pushInfo = m_context->getModuleMetaData ()->pushInfo ;
884
885
886
+ bool canPromote = false ;
887
+ unsigned int sizeGrown = 0 ;
885
888
// greedy allocation for now
886
889
// first check if we are already pushing from the buffer
887
890
unsigned int piIndex;
888
891
bool regionFound = false ;
889
892
890
- for (piIndex = 0 ; piIndex < numSimplePush ; piIndex++)
893
+ for (piIndex = 0 ; piIndex < pushInfo. simplePushBufferUsed ; piIndex++)
891
894
{
892
- const SimplePushData & info = CollectAllSimplePushInfoArr [piIndex];
895
+ const SimplePushInfo & info = pushInfo. simplePushInfoArr [piIndex];
893
896
// Stateless load - GRF offsets need to match.
894
897
if (info.isStateless &&
895
898
newChunk.isStateless &&
@@ -922,7 +925,7 @@ namespace IGC
922
925
}
923
926
if (regionFound)
924
927
{
925
- SimplePushData & info = CollectAllSimplePushInfoArr [piIndex];
928
+ SimplePushInfo & info = pushInfo. simplePushInfoArr [piIndex];
926
929
unsigned int newStartOffset = iSTD::RoundDown (
927
930
std::min (newChunk.offset , info.offset ),
928
931
getMinPushConstantBufferAlignmentInBytes ());
@@ -931,36 +934,54 @@ namespace IGC
931
934
getMinPushConstantBufferAlignmentInBytes ());
932
935
unsigned int newSize = newEndOffset - newStartOffset;
933
936
934
- if (newSize <= maxSizeAllowed)
937
+ if (newSize - info. size <= maxSizeAllowed)
935
938
{
939
+ sizeGrown = newSize - info.size ;
940
+ canPromote = true ;
936
941
info.offset = newStartOffset;
937
942
info.size = newSize;
938
- info.Load [load] = newChunk.offset ;
939
943
}
940
944
}
941
945
946
+ const unsigned int maxNumberOfPushedBuffers = pushInfo.MaxNumberOfPushedBuffers ;
947
+
942
948
// we couldn't add it to an existing buffer try to add a new one if there is a slot available
943
- else
949
+ if (canPromote == false &&
950
+ maxSizeAllowed > 0 &&
951
+ pushInfo.simplePushBufferUsed < maxNumberOfPushedBuffers)
944
952
{
945
953
unsigned int newStartOffset = iSTD::RoundDown (newChunk.offset , getMinPushConstantBufferAlignmentInBytes ());
946
954
unsigned int newEndOffset = iSTD::Round (newChunk.offset + size, getMinPushConstantBufferAlignmentInBytes ());
947
955
unsigned int newSize = newEndOffset - newStartOffset;
948
956
949
957
if (newSize <= maxSizeAllowed)
950
958
{
951
- SimplePushData& info = CollectAllSimplePushInfoArr[numSimplePush];
959
+ canPromote = true ;
960
+ sizeGrown = newSize;
961
+
962
+ piIndex = pushInfo.simplePushBufferUsed ;
963
+ SimplePushInfo& info = pushInfo.simplePushInfoArr [piIndex];
952
964
info.pushableAddressGrfOffset = newChunk.pushableAddressGrfOffset ;
953
965
info.pushableOffsetGrfOffset = newChunk.pushableOffsetGrfOffset ;
954
966
info.cbIdx = newChunk.cbIdx ;
955
967
info.isStateless = newChunk.isStateless ;
956
968
info.isBindless = newChunk.isBindless ;
957
969
info.offset = newStartOffset;
958
970
info.size = newSize;
959
- info. Load [load] = newChunk. offset ;
960
- numSimplePush ++;
971
+
972
+ pushInfo. simplePushBufferUsed ++;
961
973
}
962
974
}
963
- return ;
975
+
976
+ if (canPromote)
977
+ {
978
+ // promote the load to be pushed
979
+ PromoteLoadToSimplePush (
980
+ load,
981
+ pushInfo.simplePushInfoArr [piIndex],
982
+ newChunk.offset );
983
+ }
984
+ return sizeGrown;
964
985
}
965
986
966
987
void PushAnalysis::PromoteLoadToSimplePush (Instruction* load, SimplePushInfo& info, unsigned int offset)
@@ -1082,45 +1103,13 @@ namespace IGC
1082
1103
bool isPushable = IsPushableShaderConstant (instr, info);
1083
1104
if (isPushable)
1084
1105
{
1085
- AllocatePushedConstant (
1106
+ sizePushed += AllocatePushedConstant (
1086
1107
instr,
1087
1108
info,
1088
- cthreshold); // maxSizeAllowed
1109
+ cthreshold - sizePushed ); // maxSizeAllowed
1089
1110
}
1090
1111
}
1091
1112
}
1092
-
1093
-
1094
- PushInfo& pushInfo = m_context->getModuleMetaData ()->pushInfo ;
1095
- while ((pushInfo.simplePushBufferUsed < pushInfo.MaxNumberOfPushedBuffers ) && CollectAllSimplePushInfoArr.size ())
1096
- {
1097
- unsigned int iter = CollectAllSimplePushInfoArr.begin ()->first ;
1098
- SimplePushData info;
1099
- for (auto I = CollectAllSimplePushInfoArr.begin (), E = CollectAllSimplePushInfoArr.end (); I != E; I++)
1100
- {
1101
- if (I->second .size > info.size )
1102
- {
1103
- info = I->second ;
1104
- iter = I->first ;
1105
- }
1106
- }
1107
-
1108
- SimplePushInfo& newChunk = pushInfo.simplePushInfoArr [pushInfo.simplePushBufferUsed ];
1109
- if (sizePushed + info.size <= cthreshold)
1110
- {
1111
- newChunk.cbIdx = info.cbIdx ;
1112
- newChunk.isBindless = info.isBindless ;
1113
- newChunk.isStateless = info.isStateless ;
1114
- newChunk.offset = info.offset ;
1115
- newChunk.size = info.size ;
1116
- newChunk.pushableAddressGrfOffset = info.pushableAddressGrfOffset ;
1117
- newChunk.pushableOffsetGrfOffset = info.pushableOffsetGrfOffset ;
1118
- for (auto I = info.Load .rbegin (), E = info.Load .rend (); I != E; I++)
1119
- PromoteLoadToSimplePush (I->first , newChunk, I->second );
1120
- pushInfo.simplePushBufferUsed ++;
1121
- }
1122
- CollectAllSimplePushInfoArr.erase (iter);
1123
- }
1124
1113
}
1125
1114
1126
1115
PushConstantMode PushAnalysis::GetPushConstantMode ()
0 commit comments