@@ -10770,174 +10770,7 @@ struct DclMapInfo {
10770
10770
} // namespace
10771
10771
10772
10772
//
10773
- // mov (M1, 8) V45(0,0)<1>:q V42(0,0)<8;8,1>:d /// $7
10774
- // mov (M3, 8) V45(2,0)<1>:q V42(1,0)<8;8,1>:d /// $8
10775
- // shl (M1, 8) V46(0,0)<1>:q V45(0,0)<8;8,1>:q 0x2:q /// $9
10776
- // shl (M3, 8) V46(2,0)<1>:q V45(2,0)<8;8,1>:q 0x2:q /// $10
10777
- //
10778
- // into
10779
- //
10780
- // mov (M1, 8) V45L(0,0)<1>:q V42(0,0)<8;8,1>:d /// $7
10781
- // mov (M3, 8) V45H(0,0)<1>:q V42(1,0)<8;8,1>:d /// $8
10782
- // shl (M1, 8) V46(0,0)<1>:q V45L(0,0)<8;8,1>:q 0x2:q /// $9
10783
- // shl (M3, 8) V46(2,0)<1>:q V45H(0,0)<8;8,1>:q 0x2:q /// $10
10784
- //
10785
- void Optimizer::splitVariables ()
10786
- {
10787
- if (kernel.getIntKernelAttribute (Attributes::ATTR_Target) != VISA_3D)
10788
- {
10789
- return ;
10790
- }
10791
-
10792
- if (builder.getOption (vISA_Debug))
10793
- {
10794
- return ;
10795
- }
10796
-
10797
- // Only for simd16 and simd32.
10798
- if (kernel.getSimdSize () == 8 )
10799
- {
10800
- return ;
10801
- }
10802
-
10803
- // All declarations in this map are candidates for variable splitting.
10804
- std::map<const G4_Declare *, DclMapInfo *> DclMap;
10805
-
10806
- // All instructions to be updated, and the flag indicates if this
10807
- // instruction defines low part or ont.
10808
- std::vector<std::pair<G4_INST *, bool >> InstsToUpdate;
10809
-
10810
- for (G4_BB *bb : fg)
10811
- {
10812
- for (G4_INST *inst : *bb)
10813
- {
10814
- // Only for variables defined by non-send instructions.
10815
- if (inst->opcode () == G4_label || inst->isSend ())
10816
- continue ;
10817
-
10818
- // TODO: Allow global variables.
10819
- auto Dst = inst->getDst ();
10820
- if (!Dst || Dst->getHorzStride () != 1 ||
10821
- fg.globalOpndHT .isOpndGlobal (Dst))
10822
- continue ;
10823
- auto Dcl = Dst->getTopDcl ();
10824
- if (!Dcl || !isCandidateDecl (Dcl, builder))
10825
- continue ;
10826
-
10827
- unsigned LBound = Dst->getLeftBound ();
10828
- unsigned RBound = Dst->getRightBound ();
10829
- if ((LBound == LoLBound && RBound == LoRBound) ||
10830
- (LBound == HiLBound && RBound == HiRBound))
10831
- {
10832
- // OK, only defines low or high part.
10833
- // Bound constraints imply that def and uses are in two grfs.
10834
- }
10835
- else
10836
- {
10837
- continue ;
10838
- }
10839
-
10840
- if (inst->useEmpty ())
10841
- {
10842
- continue ;
10843
- }
10844
-
10845
- // Check all uses.
10846
- bool DoSplitting = true ;
10847
- for (auto UI = inst->use_begin (), UE = inst->use_end (); UI != UE; ++UI)
10848
- {
10849
- G4_Operand *Op = UI->first ->getOperand (UI->second );
10850
- if (Op && Op->compareOperand (Dst) != Rel_eq)
10851
- {
10852
- DoSplitting = false ;
10853
- break ;
10854
- }
10855
-
10856
- // The def-use chain is broken in the following code:
10857
- //
10858
- // mov (8, M1_NM) V33(0,0):df 0 <- not def
10859
- // mov (8, M3_NM) V33(2,0):df 0 <- not def
10860
- // mov (8, M1) V33(0,0):df 1 <- def
10861
- // mov (8, M3_NM) V33(2,0):df 1 <- def
10862
- //
10863
- // use of V33(0,0) and V33(2,0)
10864
- //
10865
- // FIXME: remove this if def-use chain is accurate.
10866
- if (UI->first ->isWriteEnableInst () && !inst->isWriteEnableInst ())
10867
- {
10868
- DoSplitting = false ;
10869
- break ;
10870
- }
10871
-
10872
- // Only allow single definition:
10873
- // mov (8, M1_NM) V33:q 0
10874
- // mov (8, M1) V33:q 1
10875
- // add (8, M1_NM V34:q V33:q V32:q
10876
- //
10877
- if (UI->first ->getSingleDef (UI->second ) != inst)
10878
- {
10879
- DoSplitting = false ;
10880
- break ;
10881
- }
10882
- }
10883
-
10884
- if (!DoSplitting)
10885
- continue ;
10886
-
10887
- G4_Type Ty = Dcl->getElemType ();
10888
- auto Iter = DclMap.find (Dcl);
10889
- if (Iter == DclMap.end ())
10890
- {
10891
- unsigned NElts = Dcl->getTotalElems ();
10892
- auto DclLow = builder.createTempVar (NElts / 2 , Ty, GRFALIGN, " Lo" );
10893
- auto DclHi = builder.createTempVar (NElts / 2 , Ty, GRFALIGN, " Hi" );
10894
- DclMap[Dcl] = new DclMapInfo (DclLow, DclHi);
10895
- }
10896
- bool IsLow = LBound == LoLBound;
10897
- InstsToUpdate.push_back (std::make_pair (inst, IsLow));
10898
- }
10899
- }
10900
-
10901
- // Iterate instructions that define hi or low parts. Update their defs and uses.
10902
- for (auto IPair : InstsToUpdate)
10903
- {
10904
- G4_INST *Inst = IPair.first ;
10905
- bool IsLow = IPair.second ;
10906
-
10907
- // Update Inst's Dst.
10908
- {
10909
- G4_DstRegRegion *Dst = Inst->getDst ();
10910
- G4_Type Ty = Dst->getType ();
10911
- auto NewDcl = DclMap[Dst->getTopDcl ()]->getDcl (builder, Ty, IsLow);
10912
- auto NewDst = builder.createDst (NewDcl->getRegVar (), 0 , 0 , 1 , Ty);
10913
- Inst->setDest (NewDst);
10914
- }
10915
-
10916
- // Update Inst's uses.
10917
- for (auto UI = Inst->use_begin (), UE = Inst->use_end (); UI != UE; ++UI)
10918
- {
10919
- G4_INST *UseInst = UI->first ;
10920
- G4_SrcRegRegion *UseOpnd = UseInst->getOperand (UI->second )->asSrcRegRegion ();
10921
- G4_Type UseTy = UseOpnd->getType ();
10922
- auto NewUseDcl = DclMap[UseOpnd->getTopDcl ()]->getDcl (builder, UseTy, IsLow);
10923
- auto NewUseOpnd = builder.createSrcRegRegion (
10924
- UseOpnd->getModifier (), UseOpnd->getRegAccess (),
10925
- NewUseDcl->getRegVar (), 0 , 0 , UseOpnd->getRegion (), UseTy);
10926
- UseInst->setSrc (NewUseOpnd, G4_INST::getSrcNum (UI->second ));
10927
- }
10928
- }
10929
-
10930
- // Cleanup.
10931
- for (auto DI : DclMap)
10932
- {
10933
- delete DI.second ;
10934
- }
10935
- }
10936
-
10937
- //
10938
- // replacement of the above that can handle global variables
10939
- // basically we split any 4GRF variables (they typically result from
10940
- // simd16 64-bit vars) into two half if
10773
+ // We split any 4GRF variables (they typically result from simd16 64-bit vars) into two half if
10941
10774
// -- they are not address taken or used in send
10942
10775
// -- none of the operands cross from the 2nd to the 3rd GRF
10943
10776
// This is intended to give RA more freedom as the split variables do
0 commit comments