@@ -2705,6 +2705,76 @@ void GenSpecificPattern::visitBitCastInst(BitCastInst& I)
2705
2705
}
2706
2706
}
2707
2707
2708
+ /*
2709
+ Matches a pattern where pointer to load instruction is fetched by other load instruction.
2710
+ On targets that do not support 64 bit operations, Emu64OpsPass will insert pair_to_ptr intrinsic
2711
+ between the loads and InstructionCombining will not optimize this case.
2712
+
2713
+ This function changes following pattern:
2714
+ %3 = load <2 x i32>, <2 x i32> addrspace(1)* %2, align 64
2715
+ %4 = extractelement <2 x i32> %3, i32 0
2716
+ %5 = extractelement <2 x i32> %3, i32 1
2717
+ %6 = call %union._XReq addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* @llvm.genx.GenISA.pair.to.ptr.p1p1p1p1p1p1p1p1union._XReq(i32 %4, i32 %5)
2718
+ %7 = bitcast %union._XReq addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* %6 to i64 addrspace(1)*
2719
+ %8 = bitcast i64 addrspace(1)* %7 to <2 x i32> addrspace(1)*
2720
+ %9 = load <2 x i32>, <2 x i32> addrspace(1)* %8, align 64
2721
+
2722
+ to:
2723
+ %3 = bitcast <2 x i32> addrspace(1)* %2 to <2 x i32> addrspace(1)* addrspace(1)*
2724
+ %4 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)* addrspace(1)* %3, align 64
2725
+ ... dead code
2726
+ %11 = load <2 x i32>, <2 x i32> addrspace(1)* %4, align 64
2727
+ */
2728
+ void GenSpecificPattern::visitLoadInst (LoadInst &LI) {
2729
+ Value* PO = LI.getPointerOperand ();
2730
+ std::vector<Value*> OneUseValues = { PO };
2731
+ while (isa<BitCastInst>(PO)) {
2732
+ PO = cast<BitCastInst>(PO)->getOperand (0 );
2733
+ OneUseValues.push_back (PO);
2734
+ }
2735
+
2736
+ bool IsPairToPtrInst = (isa<GenIntrinsicInst>(PO) &&
2737
+ cast<GenIntrinsicInst>(PO)->getIntrinsicID () ==
2738
+ GenISAIntrinsic::GenISA_pair_to_ptr);
2739
+
2740
+ if (!IsPairToPtrInst)
2741
+ return ;
2742
+
2743
+ // check if this pointer comes from a load.
2744
+ auto CallInst = cast<GenIntrinsicInst>(PO);
2745
+ auto Op0 = dyn_cast<ExtractElementInst>(CallInst->getArgOperand (0 ));
2746
+ auto Op1 = dyn_cast<ExtractElementInst>(CallInst->getArgOperand (1 ));
2747
+ bool PointerComesFromALoad = (Op0 && Op1 && isa<ConstantInt>(Op0->getIndexOperand ()) &&
2748
+ isa<ConstantInt>(Op1->getIndexOperand ()) &&
2749
+ cast<ConstantInt>(Op0->getIndexOperand ())->getZExtValue () == 0 &&
2750
+ cast<ConstantInt>(Op1->getIndexOperand ())->getZExtValue () == 1 &&
2751
+ isa<LoadInst>(Op0->getVectorOperand ()) &&
2752
+ isa<LoadInst>(Op1->getVectorOperand ()) &&
2753
+ Op0->getVectorOperand () == Op1->getVectorOperand ());
2754
+
2755
+ if (!PointerComesFromALoad)
2756
+ return ;
2757
+
2758
+ OneUseValues.insert (OneUseValues.end (), { Op0, Op1 });
2759
+
2760
+ if (!std::all_of (OneUseValues.begin (), OneUseValues.end (), [](auto v) { return v->hasOneUse (); }))
2761
+ return ;
2762
+
2763
+ auto VectorLoadInst = cast<LoadInst>(Op0->getVectorOperand ());
2764
+ if (VectorLoadInst->getNumUses () != 2 )
2765
+ return ;
2766
+
2767
+ auto PointerOperand = VectorLoadInst->getPointerOperand ();
2768
+ PointerType* newLoadPointerType = PointerType::get (
2769
+ LI.getPointerOperand ()->getType (), PointerOperand->getType ()->getPointerAddressSpace ());
2770
+ IRBuilder<> builder (VectorLoadInst);
2771
+ auto CastedPointer =
2772
+ builder.CreateBitCast (PointerOperand, newLoadPointerType);
2773
+ auto NewLoadInst = IGC::cloneLoad (VectorLoadInst, CastedPointer);
2774
+
2775
+ LI.setOperand (0 , NewLoadInst);
2776
+ }
2777
+
2708
2778
void GenSpecificPattern::visitZExtInst (ZExtInst& ZEI)
2709
2779
{
2710
2780
CmpInst* Cmp = dyn_cast<CmpInst>(ZEI.getOperand (0 ));
0 commit comments