@@ -2804,9 +2804,128 @@ void SpillManagerGRF::insertSpillRangeCode(
2804
2804
return ;
2805
2805
}
2806
2806
2807
+ auto IsUniqueDef = [this , inst, bb, spillDcl]()
2808
+ {
2809
+ // return true if spilled variable has a single def
2810
+ // and it is not live-in to current bb (eg, loop, sub).
2811
+ if (VarDefs[spillDcl].size () != 1 )
2812
+ {
2813
+ // check whether multiple defs exist in program for current
2814
+ // lb, rb
2815
+ auto dst = inst->getDst ();
2816
+ auto lb = dst->getLeftBound ();
2817
+ auto rb = dst->getRightBound ();
2818
+
2819
+ unsigned int count = 0 ;
2820
+ auto & defs = VarDefs[spillDcl];
2821
+ for (auto & def : defs)
2822
+ {
2823
+ if (def.first <= rb &&
2824
+ def.second >= lb)
2825
+ ++count;
2826
+ }
2827
+
2828
+ if (count > 1 )
2829
+ return false ;
2830
+
2831
+ // spill/fill intrinsics work at either hword/grf granularity. so even
2832
+ // if a def is unique, if it only writes part of a GRF (say upper 8 words)
2833
+ // then a fill is still needed to preserve rest of GRF written by another
2834
+ // def.
2835
+ bool partialGRFDef = ((dst->getRightBound () + 1 ) % getGRFSize ()) != 0 ||
2836
+ (dst->getLeftBound () % getGRFSize ()) != 0 ;
2837
+ if (partialGRFDef)
2838
+ return false ;
2839
+ }
2840
+
2841
+ // check whether variable is live-in to BB
2842
+ if (lvInfo_->isLiveAtEntry (bb, spillDcl->getRegVar ()->getId ()))
2843
+ return false ;
2844
+
2845
+ return true ;
2846
+ };
2847
+
2848
+ auto PseudoKillFound = [spilledInstIter, bb, spillDcl]()
2849
+ {
2850
+ // Search upwards from spilledInstIter to find a pseudo kill.
2851
+ // Return true if one is found, false otherwise.
2852
+ // When a pseudo kill is found, it means read-modify-write is
2853
+ // not needed.
2854
+ auto bbBegin = bb->begin ();
2855
+ if (spilledInstIter == bbBegin)
2856
+ return false ;
2857
+ auto it = spilledInstIter;
2858
+ --it;
2859
+ unsigned int pseudoKillDist = 0 ;
2860
+ auto dst = (*spilledInstIter)->getDst ();
2861
+ bool partialGRFDef = ((dst->getRightBound () + 1 ) % getGRFSize ()) != 0 ||
2862
+ (dst->getLeftBound () % getGRFSize ()) != 0 ;
2863
+ while (it != bbBegin)
2864
+ {
2865
+ auto inst = *it;
2866
+ // check if adjacent instruction is a pseudo kill
2867
+ if (inst->isPseudoKill ())
2868
+ {
2869
+ if (inst->getDst ()->getTopDcl () == spillDcl)
2870
+ {
2871
+ // if pseudo_kill is previous instruction, then no need for fill
2872
+ // however, if there are other instructions in between current and
2873
+ // pseudo_kill then a fill can be skipped only if the def defines
2874
+ // full GRF as spill/fill is currently in GRF granularity.
2875
+ if (pseudoKillDist == 0 || !partialGRFDef)
2876
+ return true ;
2877
+ return false ;
2878
+ }
2879
+ }
2880
+ else
2881
+ {
2882
+ auto dst = inst->getDst ();
2883
+ if (!dst)
2884
+ return false ;
2885
+
2886
+ // recognize pattern where V10 is spilled:
2887
+ // pseudo_kill V10
2888
+ // op (8) V10(0,0) ...
2889
+ // op (8) V10(2,0) ... <-- Current inst
2890
+ //
2891
+ // becomes:
2892
+ // pseudo_kill V10
2893
+ // op (8) SP_V10(0,0) ... [2]
2894
+ // intrinsic_spill (8) ... SP_V10 ... [1]
2895
+ // op (8) V10(2,0) <-- Current inst
2896
+ if (inst->isSpillIntrinsic () &&
2897
+ inst->asSpillIntrinsic ()->getPayload () &&
2898
+ inst->asSpillIntrinsic ()->getPayload ()->getTopDcl ()->getRegVar ()->isRegVarTransient ())
2899
+ {
2900
+ auto spillBase = ((G4_RegVarTransient*)inst->asSpillIntrinsic ()->getPayload ()->getTopDcl ()->getRegVar ())->getAbsBaseRegVar ();
2901
+ if (spillBase->getDeclare ()->getRootDeclare () != spillDcl)
2902
+ return false ;
2903
+ // Fall through for instruction [1] above
2904
+ }
2905
+ else if (dst &&
2906
+ dst->getTopDcl () &&
2907
+ dst->getTopDcl ()->getRegVar ()->isRegVarTransient ())
2908
+ {
2909
+ auto spillBase = ((G4_RegVarTransient*)dst->getTopDcl ()->getRegVar ())->getAbsBaseRegVar ();
2910
+ if (spillBase->getDeclare ()->getRootDeclare () != spillDcl)
2911
+ return false ;
2912
+ // Fall through for instruction [2] above
2913
+ }
2914
+ else if (inst->getDst ()->getTopDcl () != spillDcl)
2915
+ return false ;
2916
+
2917
+ ++pseudoKillDist;
2918
+ }
2919
+ --it;
2920
+ }
2921
+ return false ;
2922
+ };
2923
+
2807
2924
// subreg offset for new dst that replaces the spilled dst
2808
2925
auto newSubregOff = 0 ;
2809
2926
2927
+ auto pseudoKillFound = PseudoKillFound ();
2928
+ auto isUniqueDef = IsUniqueDef ();
2810
2929
if (inst->mayExceedTwoGRF ())
2811
2930
{
2812
2931
// Handle send instructions (special treatment)
@@ -2821,7 +2940,9 @@ void SpillManagerGRF::insertSpillRangeCode(
2821
2940
createAndInitMHeader (
2822
2941
(G4_RegVarTransient *) spillRangeDcl->getRegVar ());
2823
2942
2824
- bool needRMW = inst->isPartialWriteForSpill (!bb->isAllLaneActive ());
2943
+ bool needRMW = !pseudoKillFound &&
2944
+ !isUniqueDef &&
2945
+ inst->isPartialWriteForSpill (!bb->isAllLaneActive ());
2825
2946
if (needRMW)
2826
2947
{
2827
2948
sendInSpilledRegVarPortions (
@@ -2856,7 +2977,9 @@ void SpillManagerGRF::insertSpillRangeCode(
2856
2977
2857
2978
// Unaligned region specific handling.
2858
2979
unsigned int spillSendOption = InstOpt_WriteEnable;
2859
- if (shouldPreloadSpillRange (*spilledInstIter, bb)) {
2980
+ if (!pseudoKillFound &&
2981
+ !isUniqueDef &&
2982
+ shouldPreloadSpillRange (*spilledInstIter, bb)) {
2860
2983
2861
2984
// Preload the segment aligned spill range from memory to use
2862
2985
// as an overlay
@@ -2938,7 +3061,13 @@ void SpillManagerGRF::insertSpillRangeCode(
2938
3061
newSubregOff = spilledRegion->getSubRegOff ();
2939
3062
if (!bb->isAllLaneActive ())
2940
3063
{
2941
- spillSendOption = (*spilledInstIter)->getMaskOption ();
3064
+ if (!pseudoKillFound &&
3065
+ !isUniqueDef)
3066
+ {
3067
+ // If either pseudo kill is found or if the range is a unique def,
3068
+ // use WriteEnable option in spill.
3069
+ spillSendOption = (*spilledInstIter)->getMaskOption ();
3070
+ }
2942
3071
}
2943
3072
}
2944
3073
@@ -3873,6 +4002,33 @@ void SpillManagerGRF::runSpillAnalysis()
3873
4002
}
3874
4003
}
3875
4004
4005
+ void SpillManagerGRF::populateDefsTable ()
4006
+ {
4007
+ for (auto bb : gra.kernel .fg )
4008
+ {
4009
+ for (auto inst : *bb)
4010
+ {
4011
+ if (inst->isPseudoKill ())
4012
+ continue ;
4013
+
4014
+ auto dst = inst->getDst ();
4015
+
4016
+ if (dst && !dst->isNullReg ())
4017
+ {
4018
+ auto topdcl = dst->getTopDcl ();
4019
+
4020
+ if (topdcl)
4021
+ {
4022
+ auto lb = dst->getLeftBound ();
4023
+ auto rb = dst->getRightBound ();
4024
+ VarDefs[topdcl].push_back (std::make_pair (lb, rb));
4025
+ }
4026
+ }
4027
+ }
4028
+ }
4029
+ }
4030
+
4031
+
3876
4032
// Insert spill/fill code for all registers that have not been assigned
3877
4033
// physical registers in the current iteration of the graph coloring
3878
4034
// allocator.
@@ -3913,6 +4069,9 @@ bool SpillManagerGRF::insertSpillFillCode (
3913
4069
return false ;
3914
4070
}
3915
4071
4072
+ // Populate def table as it helps us decide whether read-modify-write is needed
4073
+ populateDefsTable ();
4074
+
3916
4075
// Insert spill/fill code for all basic blocks.
3917
4076
3918
4077
FlowGraph& fg = kernel->fg ;
@@ -3921,6 +4080,7 @@ bool SpillManagerGRF::insertSpillFillCode (
3921
4080
{
3922
4081
bbId_ = (*it)->getId ();
3923
4082
INST_LIST::iterator jt = (*it)->begin ();
4083
+ std::list<INST_LIST_ITER> pseudoKills;
3924
4084
3925
4085
while (jt != (*it)->end ()) {
3926
4086
INST_LIST::iterator kt = jt;
@@ -3950,7 +4110,11 @@ bool SpillManagerGRF::insertSpillFillCode (
3950
4110
{
3951
4111
if (inst->isPseudoKill ())
3952
4112
{
3953
- (*it)->erase (jt);
4113
+ // This pseudo kill corresponds to a spilled variable, so
4114
+ // it can be removed. But it is preserved till spill code
4115
+ // is inserted for the variable as it provides a hint to
4116
+ // spill insertion that read-modify-write is not needed.
4117
+ pseudoKills.push_back (jt);
3954
4118
jt = kt;
3955
4119
continue ;
3956
4120
}
@@ -4003,6 +4167,11 @@ bool SpillManagerGRF::insertSpillFillCode (
4003
4167
4004
4168
jt = kt;
4005
4169
}
4170
+
4171
+ for (auto killIt : pseudoKills)
4172
+ {
4173
+ (*it)->erase (killIt);
4174
+ }
4006
4175
}
4007
4176
4008
4177
bbId_ = UINT_MAX;
0 commit comments