Skip to content

Commit 8723db6

Browse files
Konstantin Vladimirovigcbot
authored andcommitted
refactor stack calls and add one more check to LIT
Factoring out return value processing to smaller functions Introducing vector instead of map
1 parent 6bfd67b commit 8723db6

File tree

1 file changed

+72
-41
lines changed

1 file changed

+72
-41
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPrologEpilogInsertion.cpp

Lines changed: 72 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,11 @@ class GenXPrologEpilogInsertion
135135
// caller side argument layout
136136
void generateStackCall(CallInst *CI);
137137

138-
// generateStackCall subroutine
138+
// generateStackCall subroutines: writing args, extracting args
139139
unsigned writeArgs(CallInst *CI, Value *SpArgs, IRBuilder<> &IRB);
140+
std::vector<std::pair<Instruction *, Instruction *>>
141+
buildWorkList(CallInst *CI, Value *OrigSp, bool UseMemForRet);
142+
void extractResults(CallInst *CI, Value *OrigSp, IRBuilder<> &IRB);
140143

141144
void generateAlloca(CallInst *CI);
142145

@@ -464,15 +467,15 @@ void GenXPrologEpilogInsertion::generateFunctionEpilog(Function &F,
464467
unsigned GenXPrologEpilogInsertion::writeArgs(CallInst *CI, Value *SpArgs,
465468
IRBuilder<> &IRB) {
466469
unsigned Offset = 0;
467-
std::map<Value *, Value *> ReplaceArgs;
470+
std::vector<std::pair<int, Value *>> ReplaceArgs; // ArgNo, Arg
471+
ReplaceArgs.reserve(CI->getNumArgOperands());
468472

469473
for (auto &Arg : CI->arg_operands()) {
470474
// it is tempting to skip here if Arg already is in ReplaceArgs map
471475
// but it will be wrong to do so, because consider:
472476
// foo(x, x, y, y, x, y)
473477
// on callee side we are expecting 6 positions in predef args
474478
// we can not optimize these out on caller side
475-
476479
auto *OrigTy = Arg->getType();
477480
if (OrigTy->getScalarType()->isIntegerTy(1)) {
478481
if (!HandleMaskArgs)
@@ -502,51 +505,28 @@ unsigned GenXPrologEpilogInsertion::writeArgs(CallInst *CI, Value *SpArgs,
502505
if (OrigTy->getScalarType()->isIntegerTy(1))
503506
ArgRegWrite = cast<Instruction>(
504507
IRB.CreateBitOrPointerCast(ArgRegWrite,OrigTy));
505-
ReplaceArgs[Arg] = ArgRegWrite;
508+
ReplaceArgs.emplace_back(Arg.getOperandNo(), ArgRegWrite);
506509
Offset += ArgSize;
507510
}
508511
}
509512

510-
for (auto &&Pair : ReplaceArgs)
511-
CI->replaceUsesOfWith(Pair.first, Pair.second);
513+
// here ">=" used to account for memory-passing of argument tail
514+
IGC_ASSERT_MESSAGE(CI->getNumArgOperands() >= ReplaceArgs.size(),
515+
"ReplaceArgs too large");
516+
for (auto &&NewArg : ReplaceArgs)
517+
CI->setArgOperand(NewArg.first, NewArg.second);
512518
return Offset;
513519
}
514520

515-
// generate caller site of stack call
516-
void GenXPrologEpilogInsertion::generateStackCall(CallInst *CI) {
517-
LLVM_DEBUG(dbgs() << "Generating stack call for:\n");
518-
LLVM_DEBUG(CI->dump());
519-
LLVM_DEBUG(dbgs() << "\n");
520-
IRBuilder<> IRB(CI);
521-
Value *OrigSp = buildReadPredefReg(PreDefined_Vars::PREDEFINED_FE_SP, IRB,
522-
IRB.getInt64Ty(), true);
523-
// write args, return total offset in arg register
524-
unsigned Offset = writeArgs(CI, OrigSp, IRB);
525-
526-
CI->setMetadata(
527-
InstMD::FuncArgSize,
528-
MDNode::get(CI->getContext(),
529-
ConstantAsMetadata::get(IRB.getInt32(
530-
(Offset + ST->getGRFWidth() - 1) / ST->getGRFWidth()))));
531-
bool isVoidCall = CI->getType()->isVoidTy();
532-
CI->setMetadata(
533-
InstMD::FuncRetSize,
534-
MDNode::get(CI->getContext(),
535-
ConstantAsMetadata::get(IRB.getInt32(divideCeil(
536-
(isVoidCall ? 0
537-
: (DL->getTypeSizeInBits(CI->getType())) /
538-
genx::ByteBits),
539-
ST->getGRFWidth())))));
540-
if (isVoidCall)
541-
return;
542-
IRB.SetInsertPoint(CI->getNextNode());
543-
bool UseMemForRet =
544-
ForceRetMemPassing ||
545-
DL->getTypeSizeInBits(CI->getType()) / genx::ByteBits > RetRegSize;
546-
if (UseMemForRet)
547-
OrigSp = buildReadPredefReg(PreDefined_Vars::PREDEFINED_FE_SP, IRB,
548-
IRB.getInt64Ty(), CI, true);
549-
// read retvalue
521+
// build worklist for extraction
522+
// worklist entry format:
523+
// first: actual return
524+
// second: return insertion point
525+
// this might be critical for structure return due to odd agreement of
526+
// returning structures
527+
std::vector<std::pair<Instruction *, Instruction *>>
528+
GenXPrologEpilogInsertion::buildWorkList(CallInst *CI, Value *OrigSp,
529+
bool UseMemForRet) {
550530
std::vector<std::pair<Instruction *, Instruction *>> Worklist;
551531
if (isa<StructType>(CI->getType())) {
552532
for (auto *U : CI->users()) {
@@ -556,7 +536,26 @@ void GenXPrologEpilogInsertion::generateStackCall(CallInst *CI) {
556536
Worklist.push_back({cast<Instruction>(U), cast<Instruction>(U)});
557537
}
558538
} else
539+
// OrigSP as instruction is read.predef.reg
559540
Worklist.push_back({CI, UseMemForRet ? cast<Instruction>(OrigSp) : CI});
541+
return Worklist;
542+
}
543+
544+
// extract results from stack call return
545+
void GenXPrologEpilogInsertion::extractResults(CallInst *CI, Value *OrigSp,
546+
IRBuilder<> &IRB) {
547+
IRB.SetInsertPoint(CI->getNextNode());
548+
bool UseMemForRet =
549+
ForceRetMemPassing ||
550+
DL->getTypeSizeInBits(CI->getType()) / genx::ByteBits > RetRegSize;
551+
if (UseMemForRet)
552+
OrigSp = buildReadPredefReg(PreDefined_Vars::PREDEFINED_FE_SP, IRB,
553+
IRB.getInt64Ty(), CI, true);
554+
555+
// collect return slots
556+
auto Worklist = buildWorkList(CI, OrigSp, UseMemForRet);
557+
558+
// process return slots
560559
for (auto &I : Worklist) {
561560
auto *ActualRet = I.first;
562561
IRB.SetInsertPoint(I.second->getNextNode());
@@ -628,6 +627,38 @@ void GenXPrologEpilogInsertion::generateStackCall(CallInst *CI) {
628627
}
629628
}
630629

630+
// generate caller site of stack call
631+
void GenXPrologEpilogInsertion::generateStackCall(CallInst *CI) {
632+
LLVM_DEBUG(dbgs() << "Generating stack call for:\n");
633+
LLVM_DEBUG(CI->dump());
634+
LLVM_DEBUG(dbgs() << "\n");
635+
IRBuilder<> IRB(CI);
636+
Value *OrigSp = buildReadPredefReg(PreDefined_Vars::PREDEFINED_FE_SP, IRB,
637+
IRB.getInt64Ty(), true);
638+
// write args, return total offset in arg register
639+
unsigned Offset = writeArgs(CI, OrigSp, IRB);
640+
641+
CI->setMetadata(
642+
InstMD::FuncArgSize,
643+
MDNode::get(CI->getContext(),
644+
ConstantAsMetadata::get(IRB.getInt32(
645+
(Offset + ST->getGRFWidth() - 1) / ST->getGRFWidth()))));
646+
bool isVoidCall = CI->getType()->isVoidTy();
647+
CI->setMetadata(
648+
InstMD::FuncRetSize,
649+
MDNode::get(CI->getContext(),
650+
ConstantAsMetadata::get(IRB.getInt32(divideCeil(
651+
(isVoidCall ? 0
652+
: (DL->getTypeSizeInBits(CI->getType())) /
653+
genx::ByteBits),
654+
ST->getGRFWidth())))));
655+
if (isVoidCall)
656+
return;
657+
658+
// read retvalue
659+
extractResults(CI, OrigSp, IRB);
660+
}
661+
631662
// alloca_base = FE_SP
632663
// FE_SP += sizeof(alloca)
633664
void GenXPrologEpilogInsertion::generateAlloca(CallInst *CI) {

0 commit comments

Comments
 (0)