Skip to content

Commit 3b69370

Browse files
PawelJurekigcbot
authored andcommitted
Pattern match for loads from a pointer coming from another load.
1 parent 31052fb commit 3b69370

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,10 @@ static void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSi
789789
mpm.add(llvm::createDeadCodeEliminationPass());
790790
mpm.add(createEmu64OpsPass());
791791
ctx.m_hasEmu64BitInsts = true;
792+
if(!isOptDisabled)
793+
{
794+
mpm.add(new GenSpecificPattern());
795+
}
792796
}
793797

794798
mpm.add(createInstSimplifyLegacyPass());

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2705,6 +2705,76 @@ void GenSpecificPattern::visitBitCastInst(BitCastInst& I)
27052705
}
27062706
}
27072707

2708+
/*
2709+
Matches a pattern where pointer to load instruction is fetched by other load instruction.
2710+
On targets that do not support 64 bit operations, Emu64OpsPass will insert pair_to_ptr intrinsic
2711+
between the loads and InstructionCombining will not optimize this case.
2712+
2713+
This function changes following pattern:
2714+
%3 = load <2 x i32>, <2 x i32> addrspace(1)* %2, align 64
2715+
%4 = extractelement <2 x i32> %3, i32 0
2716+
%5 = extractelement <2 x i32> %3, i32 1
2717+
%6 = call %union._XReq addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* @llvm.genx.GenISA.pair.to.ptr.p1p1p1p1p1p1p1p1union._XReq(i32 %4, i32 %5)
2718+
%7 = bitcast %union._XReq addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* %6 to i64 addrspace(1)*
2719+
%8 = bitcast i64 addrspace(1)* %7 to <2 x i32> addrspace(1)*
2720+
%9 = load <2 x i32>, <2 x i32> addrspace(1)* %8, align 64
2721+
2722+
to:
2723+
%3 = bitcast <2 x i32> addrspace(1)* %2 to <2 x i32> addrspace(1)* addrspace(1)*
2724+
%4 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)* addrspace(1)* %3, align 64
2725+
... dead code
2726+
%11 = load <2 x i32>, <2 x i32> addrspace(1)* %4, align 64
2727+
*/
2728+
void GenSpecificPattern::visitLoadInst(LoadInst &LI) {
2729+
Value* PO = LI.getPointerOperand();
2730+
std::vector<Value*> OneUseValues = { PO };
2731+
while (isa<BitCastInst>(PO)) {
2732+
PO = cast<BitCastInst>(PO)->getOperand(0);
2733+
OneUseValues.push_back(PO);
2734+
}
2735+
2736+
bool IsPairToPtrInst = (isa<GenIntrinsicInst>(PO) &&
2737+
cast<GenIntrinsicInst>(PO)->getIntrinsicID() ==
2738+
GenISAIntrinsic::GenISA_pair_to_ptr);
2739+
2740+
if (!IsPairToPtrInst)
2741+
return;
2742+
2743+
// check if this pointer comes from a load.
2744+
auto CallInst = cast<GenIntrinsicInst>(PO);
2745+
auto Op0 = dyn_cast<ExtractElementInst>(CallInst->getArgOperand(0));
2746+
auto Op1 = dyn_cast<ExtractElementInst>(CallInst->getArgOperand(1));
2747+
bool PointerComesFromALoad = (Op0 && Op1 && isa<ConstantInt>(Op0->getIndexOperand()) &&
2748+
isa<ConstantInt>(Op1->getIndexOperand()) &&
2749+
cast<ConstantInt>(Op0->getIndexOperand())->getZExtValue() == 0 &&
2750+
cast<ConstantInt>(Op1->getIndexOperand())->getZExtValue() == 1 &&
2751+
isa<LoadInst>(Op0->getVectorOperand()) &&
2752+
isa<LoadInst>(Op1->getVectorOperand()) &&
2753+
Op0->getVectorOperand() == Op1->getVectorOperand());
2754+
2755+
if (!PointerComesFromALoad)
2756+
return;
2757+
2758+
OneUseValues.insert(OneUseValues.end(), { Op0, Op1 });
2759+
2760+
if (!std::all_of(OneUseValues.begin(), OneUseValues.end(), [](auto v) { return v->hasOneUse(); }))
2761+
return;
2762+
2763+
auto VectorLoadInst = cast<LoadInst>(Op0->getVectorOperand());
2764+
if (VectorLoadInst->getNumUses() != 2)
2765+
return;
2766+
2767+
auto PointerOperand = VectorLoadInst->getPointerOperand();
2768+
PointerType* newLoadPointerType = PointerType::get(
2769+
LI.getPointerOperand()->getType(), PointerOperand->getType()->getPointerAddressSpace());
2770+
IRBuilder<> builder(VectorLoadInst);
2771+
auto CastedPointer =
2772+
builder.CreateBitCast(PointerOperand, newLoadPointerType);
2773+
auto NewLoadInst = IGC::cloneLoad(VectorLoadInst, CastedPointer);
2774+
2775+
LI.setOperand(0, NewLoadInst);
2776+
}
2777+
27082778
void GenSpecificPattern::visitZExtInst(ZExtInst& ZEI)
27092779
{
27102780
CmpInst* Cmp = dyn_cast<CmpInst>(ZEI.getOperand(0));

IGC/Compiler/CustomSafeOptPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ namespace IGC
168168
void visitSDiv(llvm::BinaryOperator& I);
169169
void visitTruncInst(llvm::TruncInst& I);
170170
void visitBitCastInst(llvm::BitCastInst& I);
171+
void visitLoadInst(llvm::LoadInst& I);
171172
#if LLVM_VERSION_MAJOR >= 10
172173
void visitFNeg(llvm::UnaryOperator& I);
173174
#endif

0 commit comments

Comments
 (0)