@@ -2377,6 +2377,14 @@ SymbolicPointer::decomposePointer(const Value* Ptr, SymbolicPointer& SymPtr,
2377
2377
return true ;
2378
2378
}
2379
2379
2380
+
2381
+ // Debugging
2382
+ // #define _LDST_DEBUG 1
2383
+ #undef _LDST_DEBUG
2384
+ #if defined(_LDST_DEBUG)
2385
+ static int _bundleid = 0 ;
2386
+ #endif
2387
+
2380
2388
namespace {
2381
2389
enum class AddressModel {
2382
2390
BTS, A32, SLM, A64
@@ -2765,13 +2773,6 @@ const BundleSize_t BundleConfig::m_d64VecSizes_u = { 2,3,4,8,16,32,64 };
2765
2773
const BundleSize_t BundleConfig::m_d32VecSizes_u = { 2 ,3 ,4 ,8 ,16 ,32 ,64 };
2766
2774
const BundleSize_t BundleConfig::m_d8VecSizes_u = { 2 ,4 ,8 ,16 ,32 };
2767
2775
2768
- // Debugging
2769
- // #define _LDST_DEBUG 1
2770
- #undef _LDST_DEBUG
2771
- #if defined(_LDST_DEBUG)
2772
- static int _bundleid = 0 ;
2773
- #endif
2774
-
2775
2776
bool IGC::doLdStCombine (const CodeGenContext* CGC) {
2776
2777
if (CGC->type == ShaderType::OPENCL_SHADER) {
2777
2778
auto oclCtx = (const OpenCLProgramContext*)CGC;
@@ -4753,7 +4754,10 @@ void LdStCombine::createCombinedLoads(Function& F)
4753
4754
4754
4755
// find anchor load.
4755
4756
LoadInst* anchorLoad = leadLoad;
4756
- int n = m_instOrder[anchorLoad];
4757
+ const int leadLoadNum = m_instOrder[leadLoad];
4758
+ const int leadOffset = (int )Loads[0 ].ByteOffset ;
4759
+ int anchorOffset = leadOffset;
4760
+ int n = leadLoadNum;
4757
4761
// insts are assigned order number starting from 0. Anchor load is
4758
4762
// one with the smallest inst order number.
4759
4763
for (int i = 1 , sz = (int )bundle.LoadStores .size (); i < sz; ++i) {
@@ -4763,9 +4767,11 @@ void LdStCombine::createCombinedLoads(Function& F)
4763
4767
{
4764
4768
n = LI_no;
4765
4769
anchorLoad = LI;
4770
+ anchorOffset = (int )Loads[i].ByteOffset ;
4766
4771
}
4767
4772
loadedValues.push_back (LI);
4768
4773
}
4774
+ const int anchorLoadNum = n;
4769
4775
4770
4776
int eltBytes = bundle.bundle_eltBytes ;
4771
4777
int nelts = bundle.bundle_numElts ;
@@ -4796,6 +4802,28 @@ void LdStCombine::createCombinedLoads(Function& F)
4796
4802
4797
4803
IRBuilder<> irBuilder (anchorLoad);
4798
4804
Value* Addr = leadLoad->getPointerOperand ();
4805
+ // If leadLoad is different from anchorLoad and leadLoad's addr is
4806
+ // an instruction after anchorLoad, need to re-generate the address
4807
+ // of LeadLoad at anchorLoad place.
4808
+ if (anchorLoad != leadLoad && isa<Instruction>(Addr)) {
4809
+ Instruction* aI = cast<Instruction>(Addr);
4810
+ auto MI = m_instOrder.find (aI);
4811
+ if (MI != m_instOrder.end () && MI->second > anchorLoadNum)
4812
+ {
4813
+ Value* anchorAddr = anchorLoad->getPointerOperand ();
4814
+ Type* bTy = Type::getInt8Ty (leadLoad->getContext ());
4815
+ Type* nTy = PointerType::get (bTy, leadLoad->getPointerAddressSpace ());
4816
+ Value* nAddr = irBuilder.CreateBitCast (anchorAddr, nTy);
4817
+ Value* aIdx = irBuilder.getInt64 (leadOffset - anchorOffset);
4818
+ GEPOperator* aGEP = dyn_cast<GEPOperator>(anchorAddr);
4819
+ if (aGEP && aGEP->isInBounds ()) {
4820
+ Addr = irBuilder.CreateInBoundsGEP (bTy, nAddr, aIdx, " anchorLoad" );
4821
+ }
4822
+ else {
4823
+ Addr = irBuilder.CreateGEP (bTy, nAddr, aIdx, " anchorLoad" );
4824
+ }
4825
+ };
4826
+ }
4799
4827
PointerType* PTy = cast<PointerType>(Addr->getType ());
4800
4828
PointerType* nPTy = PointerType::get (VTy, PTy->getAddressSpace ());
4801
4829
Value* nAddr = irBuilder.CreateBitCast (Addr, nPTy);
0 commit comments