Skip to content

Commit 2f4b9ba

Browse files
dlei6gigcbot
authored andcommitted
Fix memcpy addrspace mismatch after SubroutineInliner pass
This is a WA for SubroutineInliner pass in LLVM, which creates an alloca to copy the data of ByVal struct arguments when doing inlining. It incorrectly casts the alloca to the argument's addrspace, instead of local addrspace, which results in accessing the wrong memory.
1 parent 48e6f44 commit 2f4b9ba

File tree

1 file changed

+37
-7
lines changed

1 file changed

+37
-7
lines changed

IGC/Compiler/CISACodeGen/GenCodeGenModule.cpp

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -899,8 +899,9 @@ namespace {
899899

900900
void getAnalysisUsage(AnalysisUsage& AU) const override;
901901
bool runOnSCC(CallGraphSCC& SCC) override;
902-
void verifyIfGEPIandLoadHasTheSameAS(CallGraphSCC& SCC);
902+
void verifyAddrSpaceMismatch(CallGraphSCC& SCC);
903903
void visitGetElementPtrInst(GetElementPtrInst& I);
904+
void visitMemCpyInst(MemCpyInst& I);
904905

905906
using llvm::Pass::doFinalization;
906907
bool doFinalization(CallGraph& CG) override {
@@ -953,14 +954,43 @@ void SubroutineInliner::visitGetElementPtrInst(GetElementPtrInst& GEPI)
953954
}
954955
}
955956

957+
void SubroutineInliner::visitMemCpyInst(MemCpyInst& I)
958+
{
959+
Value* Src = I.getRawSource();
960+
Value* Dst = I.getRawDest();
961+
Value* origSrc = I.getSource();
962+
Value* origDst = I.getDest();
963+
// Copying from alloca to alloca, but has addrspace mismatch due to incorrect bitcast
964+
if (isa<AllocaInst>(origSrc) && isa<AllocaInst>(origDst))
965+
{
966+
if (origSrc->getType()->getPointerAddressSpace() != Src->getType()->getPointerAddressSpace())
967+
{
968+
Value* SrcCast = BitCastInst::Create(Instruction::BitCast, origSrc,
969+
PointerType::get(Src->getType()->getPointerElementType(), origSrc->getType()->getPointerAddressSpace()),
970+
"", &I);
971+
I.replaceUsesOfWith(Src, SrcCast);
972+
}
973+
if (origDst->getType()->getPointerAddressSpace() != Dst->getType()->getPointerAddressSpace())
974+
{
975+
Value* DstCast = BitCastInst::Create(Instruction::BitCast, origDst,
976+
PointerType::get(Dst->getType()->getPointerElementType(), origDst->getType()->getPointerAddressSpace()),
977+
"", &I);
978+
I.replaceUsesOfWith(Dst, DstCast);
979+
}
980+
}
981+
}
982+
956983
// When this pass encounters a byVal argument, it creates an alloca to then copy the data from global memory to local memory.
957984
// When creating a new alloca, it replaces all occurrences of the argument in the function with that alloca.
958-
// The problem arises when the pointer operant (or more precisely its address space) is replaced in GetElementPtrInst.
959-
// Because from now on the resulting pointer of this instruction is in a different address space.
960-
// On the other hand, a load instruction that uses the returned GetElementPtrInst pointer still operates on the old address space.
961-
// By which we are referring to the wrong area of ​​memory. The resolution for this problem is to create new load instruction.
985+
// Problems arises when the pointer operant (or more precisely its address space) is replaced:
986+
// 1. In GetElementPtrInst, the resulting pointer of this instruction is in a different address space.
987+
// On the other hand, a load instruction that uses the returned GetElementPtrInst pointer still operates on the old address space.
988+
// By which we are referring to the wrong area of ​​memory. The resolution for this problem is to create new load instruction.
989+
// 2. In MemCpyInst, specifically generated for structs used in loops, where two allocas of the same struct type are created used
990+
// to save and restore struct values. When one is copied to another, this pass incorrectly uses the addrspace of the ByVal argument
991+
// instead of the local addrspace of the alloca. We fix this by casting the src and dst of the memcpy to the correct addrspace.
962992
// This is WA for a bug in LLVM 11.
963-
void SubroutineInliner::verifyIfGEPIandLoadHasTheSameAS(CallGraphSCC& SCC)
993+
void SubroutineInliner::verifyAddrSpaceMismatch(CallGraphSCC& SCC)
964994
{
965995
for (CallGraphNode* Node : SCC)
966996
{
@@ -973,7 +1003,7 @@ bool SubroutineInliner::runOnSCC(CallGraphSCC& SCC)
9731003
{
9741004
FSA = &getAnalysis<EstimateFunctionSize>();
9751005
bool changed = LegacyInlinerBase::runOnSCC(SCC);
976-
if (changed) verifyIfGEPIandLoadHasTheSameAS(SCC);
1006+
if (changed) verifyAddrSpaceMismatch(SCC);
9771007

9781008
return changed;
9791009
}

0 commit comments

Comments
 (0)