@@ -868,31 +868,28 @@ namespace IGC
868
868
return size;
869
869
}
870
870
871
- unsigned int PushAnalysis::AllocatePushedConstant (
871
+ void PushAnalysis::AllocatePushedConstant (
872
872
Instruction* load,
873
873
const SimplePushInfo& newChunk,
874
874
const unsigned int maxSizeAllowed)
875
875
{
876
876
if (!newChunk.isBindless &&
877
877
newChunk.cbIdx > m_context->m_DriverInfo .MaximumSimplePushBufferID ())
878
878
{
879
- return 0 ;
879
+ return ;
880
880
}
881
881
unsigned int size = GetSizeInBits (load->getType ()) / 8 ;
882
882
IGC_ASSERT_MESSAGE (isa<LoadInst>(load) || isa<LdRawIntrinsic>(load),
883
883
" Expected a load instruction" );
884
- PushInfo& pushInfo = m_context->getModuleMetaData ()->pushInfo ;
885
884
886
- bool canPromote = false ;
887
- unsigned int sizeGrown = 0 ;
888
885
// greedy allocation for now
889
886
// first check if we are already pushing from the buffer
890
887
unsigned int piIndex;
891
888
bool regionFound = false ;
892
889
893
- for (piIndex = 0 ; piIndex < pushInfo. simplePushBufferUsed ; piIndex++)
890
+ for (piIndex = 0 ; piIndex < numSimplePush ; piIndex++)
894
891
{
895
- const SimplePushInfo & info = pushInfo. simplePushInfoArr [piIndex];
892
+ const SimplePushData & info = CollectAllSimplePushInfoArr [piIndex];
896
893
// Stateless load - GRF offsets need to match.
897
894
if (info.isStateless &&
898
895
newChunk.isStateless &&
@@ -925,7 +922,7 @@ namespace IGC
925
922
}
926
923
if (regionFound)
927
924
{
928
- SimplePushInfo & info = pushInfo. simplePushInfoArr [piIndex];
925
+ SimplePushData & info = CollectAllSimplePushInfoArr [piIndex];
929
926
unsigned int newStartOffset = iSTD::RoundDown (
930
927
std::min (newChunk.offset , info.offset ),
931
928
getMinPushConstantBufferAlignmentInBytes ());
@@ -934,54 +931,36 @@ namespace IGC
934
931
getMinPushConstantBufferAlignmentInBytes ());
935
932
unsigned int newSize = newEndOffset - newStartOffset;
936
933
937
- if (newSize - info. size <= maxSizeAllowed)
934
+ if (newSize <= maxSizeAllowed)
938
935
{
939
- sizeGrown = newSize - info.size ;
940
- canPromote = true ;
941
936
info.offset = newStartOffset;
942
937
info.size = newSize;
938
+ info.Load [load] = newChunk.offset ;
943
939
}
944
940
}
945
941
946
- const unsigned int maxNumberOfPushedBuffers = pushInfo.MaxNumberOfPushedBuffers ;
947
-
948
942
// we couldn't add it to an existing buffer try to add a new one if there is a slot available
949
- if (canPromote == false &&
950
- maxSizeAllowed > 0 &&
951
- pushInfo.simplePushBufferUsed < maxNumberOfPushedBuffers)
943
+ else
952
944
{
953
945
unsigned int newStartOffset = iSTD::RoundDown (newChunk.offset , getMinPushConstantBufferAlignmentInBytes ());
954
946
unsigned int newEndOffset = iSTD::Round (newChunk.offset + size, getMinPushConstantBufferAlignmentInBytes ());
955
947
unsigned int newSize = newEndOffset - newStartOffset;
956
948
957
949
if (newSize <= maxSizeAllowed)
958
950
{
959
- canPromote = true ;
960
- sizeGrown = newSize;
961
-
962
- piIndex = pushInfo.simplePushBufferUsed ;
963
- SimplePushInfo& info = pushInfo.simplePushInfoArr [piIndex];
951
+ SimplePushData& info = CollectAllSimplePushInfoArr[numSimplePush];
964
952
info.pushableAddressGrfOffset = newChunk.pushableAddressGrfOffset ;
965
953
info.pushableOffsetGrfOffset = newChunk.pushableOffsetGrfOffset ;
966
954
info.cbIdx = newChunk.cbIdx ;
967
955
info.isStateless = newChunk.isStateless ;
968
956
info.isBindless = newChunk.isBindless ;
969
957
info.offset = newStartOffset;
970
958
info.size = newSize;
971
-
972
- pushInfo. simplePushBufferUsed ++;
959
+ info. Load [load] = newChunk. offset ;
960
+ numSimplePush ++;
973
961
}
974
962
}
975
-
976
- if (canPromote)
977
- {
978
- // promote the load to be pushed
979
- PromoteLoadToSimplePush (
980
- load,
981
- pushInfo.simplePushInfoArr [piIndex],
982
- newChunk.offset );
983
- }
984
- return sizeGrown;
963
+ return ;
985
964
}
986
965
987
966
void PushAnalysis::PromoteLoadToSimplePush (Instruction* load, SimplePushInfo& info, unsigned int offset)
@@ -1103,13 +1082,45 @@ namespace IGC
1103
1082
bool isPushable = IsPushableShaderConstant (instr, info);
1104
1083
if (isPushable)
1105
1084
{
1106
- sizePushed += AllocatePushedConstant (
1085
+ AllocatePushedConstant (
1107
1086
instr,
1108
1087
info,
1109
- cthreshold - sizePushed ); // maxSizeAllowed
1088
+ cthreshold); // maxSizeAllowed
1110
1089
}
1111
1090
}
1112
1091
}
1092
+
1093
+
1094
+ PushInfo& pushInfo = m_context->getModuleMetaData ()->pushInfo ;
1095
+ while ((pushInfo.simplePushBufferUsed < pushInfo.MaxNumberOfPushedBuffers ) && CollectAllSimplePushInfoArr.size ())
1096
+ {
1097
+ unsigned int iter = CollectAllSimplePushInfoArr.begin ()->first ;
1098
+ SimplePushData info;
1099
+ for (auto I = CollectAllSimplePushInfoArr.begin (), E = CollectAllSimplePushInfoArr.end (); I != E; I++)
1100
+ {
1101
+ if (I->second .size > info.size )
1102
+ {
1103
+ info = I->second ;
1104
+ iter = I->first ;
1105
+ }
1106
+ }
1107
+
1108
+ SimplePushInfo& newChunk = pushInfo.simplePushInfoArr [pushInfo.simplePushBufferUsed ];
1109
+ if (sizePushed + info.size <= cthreshold)
1110
+ {
1111
+ newChunk.cbIdx = info.cbIdx ;
1112
+ newChunk.isBindless = info.isBindless ;
1113
+ newChunk.isStateless = info.isStateless ;
1114
+ newChunk.offset = info.offset ;
1115
+ newChunk.size = info.size ;
1116
+ newChunk.pushableAddressGrfOffset = info.pushableAddressGrfOffset ;
1117
+ newChunk.pushableOffsetGrfOffset = info.pushableOffsetGrfOffset ;
1118
+ for (auto I = info.Load .rbegin (), E = info.Load .rend (); I != E; I++)
1119
+ PromoteLoadToSimplePush (I->first , newChunk, I->second );
1120
+ pushInfo.simplePushBufferUsed ++;
1121
+ }
1122
+ CollectAllSimplePushInfoArr.erase (iter);
1123
+ }
1113
1124
}
1114
1125
1115
1126
PushConstantMode PushAnalysis::GetPushConstantMode ()
0 commit comments