@@ -815,13 +815,16 @@ SpillManagerGRF::isUnalignedRegion (
815
815
816
816
bool needs32ByteAlign = useScratchMsg_;
817
817
818
+ auto bytePerGRF = numEltPerGRF<Type_UB>();
818
819
if (needs32ByteAlign)
819
820
{
820
- if (regionDisp%numEltPerGRF<Type_UB>() == 0 && regionByteSize%numEltPerGRF<Type_UB>() == 0 )
821
+ if (regionDisp % bytePerGRF == 0 && regionByteSize % bytePerGRF == 0 )
822
+ {
821
823
return
822
- regionByteSize / numEltPerGRF<Type_UB>() != 1 &&
823
- regionByteSize / numEltPerGRF<Type_UB>() != 2 &&
824
- regionByteSize / numEltPerGRF<Type_UB>() != 4 ;
824
+ regionByteSize / bytePerGRF != 1 &&
825
+ regionByteSize / bytePerGRF != 2 &&
826
+ regionByteSize / bytePerGRF != 4 ;
827
+ }
825
828
else
826
829
return true ;
827
830
}
@@ -834,8 +837,8 @@ SpillManagerGRF::isUnalignedRegion (
834
837
// mov (16) V91(6,0)<1>:ub %retval_ub(0,0)<1;1,0>:ub {H1, Align1}
835
838
// mov (16) V91(6,16)<1>:ub %retval_ub(0,16)<1;1,0>:ub {H1, Align1}
836
839
G4_RegVar* var = getRegVar (region);
837
- if ((var->getDeclare ()->getByteSize () > numEltPerGRF<Type_UB>() ) &&
838
- (regionByteSize < numEltPerGRF<Type_UB>() || regionDisp % numEltPerGRF<Type_UB>() ))
840
+ if ((var->getDeclare ()->getByteSize () > bytePerGRF ) &&
841
+ (regionByteSize < bytePerGRF || regionDisp % bytePerGRF ))
839
842
{
840
843
return true ;
841
844
}
@@ -2512,15 +2515,13 @@ SpillManagerGRF::shouldPreloadSpillRange(
2512
2515
isUnalignedRegion (spilledRangeRegion, execSize) ||
2513
2516
instContext->isPartialWriteForSpill (!parentBB->isAllLaneActive ()))
2514
2517
{
2515
- #if 0
2516
- // special check for scalar variables: no need for pre-fill if instruction is not predicated
2517
- // FIXME: need to update this if we ever decide to pack scalar variables in memory
2518
- if (spilledRangeRegion->getTopDcl()->getNumElems() == 1 &&
2519
- instContext->getPredicate() == nullptr)
2518
+ // special check for scalar variables: no need for pre-fill if instruction writes to whole variable and is not predicated
2519
+ auto spilledDcl = spilledRangeRegion->getTopDcl ()->getRootDeclare ();
2520
+ if (execSize == g4::SIMD1 && getTypeSize (spilledRangeRegion->getType ()) == spilledDcl->getByteSize () && !instContext->getPredicate ())
2520
2521
{
2522
+ // ToDo: investigate why we are spilling so many scalar variables
2521
2523
return false ;
2522
2524
}
2523
- #endif
2524
2525
return true ;
2525
2526
}
2526
2527
// No pre-load for whole and aligned region writes
0 commit comments