@@ -725,7 +725,6 @@ void CISA_IR_Builder::LinkTimeOptimization(
725
725
{
726
726
auto & calleeBuilder = callee->fg .builder ;
727
727
auto & callerBuilder = caller->fg .builder ;
728
- const RegionDesc *rDesc = callerBuilder->getRegionStride1 ();
729
728
replacedArgDcl = replacedArgDcl ?
730
729
replacedArgDcl :
731
730
callerBuilder->createDeclareNoLookup (" newArg" , G4_GRF, callerBuilder->numEltPerGRF <Type_UD>(), 32 , Type_UD);
@@ -748,7 +747,7 @@ void CISA_IR_Builder::LinkTimeOptimization(
748
747
replacedArgDcl->getRegVar (),
749
748
src->asSrcRegRegion ()->getRegOff (),
750
749
src->asSrcRegRegion ()->getSubRegOff (),
751
- rDesc ,
750
+ src-> asSrcRegRegion ()-> getRegion () ,
752
751
src->getType ());
753
752
inst->setSrc (replacedArgSrc, i);
754
753
}
@@ -771,24 +770,47 @@ void CISA_IR_Builder::LinkTimeOptimization(
771
770
}
772
771
773
772
}
774
- for (G4_INST* inst : callerInsts)
773
+
774
+ // Trace backward from callsite to replace Arg with newArg
775
+ auto rIt = it;
776
+ rIt --;
777
+ for (; rIt != callerInsts.begin (); --rIt)
775
778
{
779
+ G4_INST *inst = *rIt;
780
+ if (inst->opcode () == G4_pseudo_fcall ||
781
+ inst->opcode () == G4_call)
782
+ {
783
+ break ;
784
+ }
776
785
G4_Operand *dst = inst->getDst ();
777
786
if (!dst) continue ;
778
787
G4_Declare* topDcl = dst->getTopDcl ();
779
788
if (!topDcl) continue ;
780
789
G4_Declare* rootDcl = topDcl->getRootDeclare ();
781
790
if (callerBuilder->isPreDefArg (rootDcl))
782
791
{
792
+ G4_Declare* dcl = dst->getBase ()->asRegVar ()->getDeclare ();
793
+ G4_Declare* newDcl = callerBuilder->createTempVar (dcl->getTotalElems (), dcl->getElemType (), Any, dcl->getName ());
794
+ newDcl->setAliasDeclare (replacedArgDcl, dcl->getAliasOffset ());
783
795
G4_DstRegRegion *replacedArgDst = callerBuilder->createDst (
784
- replacedArgDcl ->getRegVar (),
796
+ newDcl ->getRegVar (),
785
797
dst->asDstRegRegion ()->getRegOff (),
786
798
dst->asDstRegRegion ()->getSubRegOff (),
787
799
dst->asDstRegRegion ()->getHorzStride (),
788
800
dst->getType ());
789
801
inst->setDest (replacedArgDst);
790
802
}
791
-
803
+ }
804
+ auto fIt = it;
805
+ fIt ++;
806
+ for (; fIt != callerInsts.end (); ++fIt )
807
+ {
808
+ G4_INST *inst = *fIt ;
809
+ if (inst->opcode () == G4_pseudo_fcall ||
810
+ inst->opcode () == G4_call)
811
+ {
812
+ break ;
813
+ }
792
814
for (int i = 0 , numSrc = inst->getNumSrc (); i < numSrc; ++i)
793
815
{
794
816
G4_Operand *src = inst->getSrc (i);
@@ -802,7 +824,7 @@ void CISA_IR_Builder::LinkTimeOptimization(
802
824
replacedRetDcl->getRegVar (),
803
825
src->asSrcRegRegion ()->getRegOff (),
804
826
src->asSrcRegRegion ()->getSubRegOff (),
805
- rDesc ,
827
+ src-> asSrcRegRegion ()-> getRegion () ,
806
828
src->getType ());
807
829
inst->setSrc (replacedRetSrc, i);
808
830
}
@@ -1132,9 +1154,8 @@ void CISA_IR_Builder::LinkTimeOptimization(
1132
1154
}
1133
1155
else if (topDcl && (topDcl == replacedArgDcl || topDcl == replacedRetDcl))
1134
1156
{
1135
- G4_Declare* newDcl = caller->fg .builder ->createTempVar (topDcl->getTotalElems (), topDcl->getElemType (), Any, topDcl->getName ());
1136
- newDcl->setAliasDeclare (topDcl, 0 );
1137
- caller->Declares .push_back (newDcl);
1157
+ G4_Declare* newDcl = caller->fg .builder ->createTempVar (dcl->getTotalElems (), dcl->getElemType (), Any, dcl->getName ());
1158
+ newDcl->setAliasDeclare (topDcl, dcl->getAliasOffset ());
1138
1159
}
1139
1160
else
1140
1161
{
@@ -1152,6 +1173,7 @@ void CISA_IR_Builder::LinkTimeOptimization(
1152
1173
}
1153
1174
}
1154
1175
};
1176
+ std::map<G4_Label*, G4_Label*> labelMap;
1155
1177
if (inlining)
1156
1178
{
1157
1179
auto & builder = caller->fg .builder ;
@@ -1160,13 +1182,37 @@ void CISA_IR_Builder::LinkTimeOptimization(
1160
1182
G4_INST* ra = caller->fg .createNewLabelInst (raLabel);
1161
1183
// We don't need calleeLabel (first instruction) anymore after inlining
1162
1184
calleeInsts.pop_front ();
1185
+ // Iterate once to clone labels
1186
+ for (G4_INST* inst : calleeInsts)
1187
+ {
1188
+ if (inst->opcode () == G4_label)
1189
+ {
1190
+ std::string name = inst->getSrc (0 )->asLabel ()->getLabel ();
1191
+ G4_Label *newLabel = builder->createLabel (name + " _" + std::to_string (funcUID), LABEL_BLOCK);
1192
+ labelMap[inst->getSrc (0 )->asLabel ()] = newLabel;
1193
+ }
1194
+ }
1195
+
1196
+ // clone instructions
1163
1197
for (G4_INST* fret : calleeInsts)
1164
1198
{
1165
- G4_INST* inst = fret->cloneInst ();
1199
+ G4_INST* inst = nullptr ;
1200
+ if (fret->opcode () == G4_label)
1201
+ {
1202
+ inst = caller->fg .createNewLabelInst (labelMap[fret->getSrc (0 )->asLabel ()]);
1203
+ }
1204
+ else
1205
+ {
1206
+ inst = fret->cloneInst ();
1207
+ }
1166
1208
for (int i = 0 , numSrc = inst->getNumSrc (); i < numSrc; ++i)
1167
1209
{
1168
1210
cloneDcl (inst->getSrc (i));
1169
1211
}
1212
+ if (inst->opcode () == G4_goto)
1213
+ {
1214
+ inst->asCFInst ()->setUip (labelMap[fret->asCFInst ()->getUip ()]);
1215
+ }
1170
1216
cloneDcl (inst->getDst ());
1171
1217
cloneDcl (inst->getPredicate ());
1172
1218
// add predicate into declaration list
0 commit comments