Skip to content

Commit be5e9bf

Browse files
jgu222igcbot
authored andcommitted
Fix a bug when lead load isn't anchor load.
When lead load isn't anchor load, the new address will need to be recalculated based on anchor's address. This issue was exposed by ocl conformance test_conversions
1 parent f8fdc34 commit be5e9bf

File tree

2 files changed

+61
-9
lines changed

2 files changed

+61
-9
lines changed

IGC/Compiler/CISACodeGen/MemOpt.cpp

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2377,6 +2377,14 @@ SymbolicPointer::decomposePointer(const Value* Ptr, SymbolicPointer& SymPtr,
23772377
return true;
23782378
}
23792379

2380+
2381+
// Debugging
2382+
//#define _LDST_DEBUG 1
2383+
#undef _LDST_DEBUG
2384+
#if defined(_LDST_DEBUG)
2385+
static int _bundleid = 0;
2386+
#endif
2387+
23802388
namespace {
23812389
enum class AddressModel {
23822390
BTS, A32, SLM, A64
@@ -2765,13 +2773,6 @@ const BundleSize_t BundleConfig::m_d64VecSizes_u = { 2,3,4,8,16,32,64 };
27652773
const BundleSize_t BundleConfig::m_d32VecSizes_u = { 2,3,4,8,16,32,64 };
27662774
const BundleSize_t BundleConfig::m_d8VecSizes_u = { 2,4,8,16,32 };
27672775

2768-
// Debugging
2769-
//#define _LDST_DEBUG 1
2770-
#undef _LDST_DEBUG
2771-
#if defined(_LDST_DEBUG)
2772-
static int _bundleid = 0;
2773-
#endif
2774-
27752776
bool IGC::doLdStCombine(const CodeGenContext* CGC) {
27762777
if (CGC->type == ShaderType::OPENCL_SHADER) {
27772778
auto oclCtx = (const OpenCLProgramContext*)CGC;
@@ -4753,7 +4754,10 @@ void LdStCombine::createCombinedLoads(Function& F)
47534754

47544755
// find anchor load.
47554756
LoadInst* anchorLoad = leadLoad;
4756-
int n = m_instOrder[anchorLoad];
4757+
const int leadLoadNum = m_instOrder[leadLoad];
4758+
const int leadOffset = (int)Loads[0].ByteOffset;
4759+
int anchorOffset = leadOffset;
4760+
int n = leadLoadNum;
47574761
// insts are assigned order number starting from 0. Anchor load is
47584762
// one with the smallest inst order number.
47594763
for (int i = 1, sz = (int)bundle.LoadStores.size(); i < sz; ++i) {
@@ -4763,9 +4767,11 @@ void LdStCombine::createCombinedLoads(Function& F)
47634767
{
47644768
n = LI_no;
47654769
anchorLoad = LI;
4770+
anchorOffset = (int)Loads[i].ByteOffset;
47664771
}
47674772
loadedValues.push_back(LI);
47684773
}
4774+
const int anchorLoadNum = n;
47694775

47704776
int eltBytes = bundle.bundle_eltBytes;
47714777
int nelts = bundle.bundle_numElts;
@@ -4796,6 +4802,28 @@ void LdStCombine::createCombinedLoads(Function& F)
47964802

47974803
IRBuilder<> irBuilder(anchorLoad);
47984804
Value* Addr = leadLoad->getPointerOperand();
4805+
// If leadLoad is different from anchorLoad and leadLoad's addr is
4806+
// an instruction after anchorLoad, need to re-generate the address
4807+
// of LeadLoad at anchorLoad place.
4808+
if (anchorLoad != leadLoad && isa<Instruction>(Addr)) {
4809+
Instruction* aI = cast<Instruction>(Addr);
4810+
auto MI = m_instOrder.find(aI);
4811+
if (MI != m_instOrder.end() && MI->second > anchorLoadNum)
4812+
{
4813+
Value* anchorAddr = anchorLoad->getPointerOperand();
4814+
Type* bTy = Type::getInt8Ty(leadLoad->getContext());
4815+
Type* nTy = PointerType::get(bTy, leadLoad->getPointerAddressSpace());
4816+
Value* nAddr = irBuilder.CreateBitCast(anchorAddr, nTy);
4817+
Value* aIdx = irBuilder.getInt64(leadOffset - anchorOffset);
4818+
GEPOperator* aGEP = dyn_cast<GEPOperator>(anchorAddr);
4819+
if (aGEP && aGEP->isInBounds()) {
4820+
Addr = irBuilder.CreateInBoundsGEP(bTy, nAddr, aIdx, "anchorLoad");
4821+
}
4822+
else {
4823+
Addr = irBuilder.CreateGEP(bTy, nAddr, aIdx, "anchorLoad");
4824+
}
4825+
};
4826+
}
47994827
PointerType* PTy = cast<PointerType>(Addr->getType());
48004828
PointerType* nPTy = PointerType::get(VTy, PTy->getAddressSpace());
48014829
Value* nAddr = irBuilder.CreateBitCast(Addr, nPTy);

IGC/Compiler/tests/LdStCombine/load_basic.ll

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ entry:
132132
;
133133
; CHECK-LABEL: store <2 x i64>
134134
; CHECK: load <4 x i32>
135-
; CHECK-LABEL: ret void
136135
;
137136
%c5.baseidx = add i64 %conv.i.i, 512
138137
%c5.arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %si, i64 %c5.baseidx
@@ -156,5 +155,30 @@ entry:
156155
%c5.addr = bitcast i32 addrspace(1)* %c5.arrayidx1 to <4 x i32> addrspace(1)*
157156
store <4 x i32> %c5.e0.3, <4 x i32> addrspace(1)* %c5.addr, align 4
158157

158+
;
159+
; case 6: load i32 p+1; load i32 p -> load <2 x i32>
160+
; This is to test that lead load is not the first and therefore, an address of
161+
; of anchor load is used instead.
162+
;
163+
; CHECK-LABEL: c6.baseidx
164+
; CHECK: %c6.arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %si, i64 %c6.baseidx
165+
; CHECK: [[T6_0:%.*]] = bitcast i32 addrspace(1)* %c6.arrayidx to i8 addrspace(1)*
166+
; CHECK: [[T6_1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[T6_0]], i64 -4
167+
; CHECK: [[T6_2:%.*]] = bitcast i8 addrspace(1)* [[T6_1]] to <2 x i32> addrspace(1)*
168+
; CHECK: {{.*}} = load <2 x i32>, <2 x i32> addrspace(1)* [[T6_2]], align 4
169+
; CHECK-LABEL: ret void
170+
;
171+
%c6.baseidx = add i64 %conv.i.i, 577
172+
%c6.arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %si, i64 %c6.baseidx
173+
%c6.0 = load i32, i32 addrspace(1)* %c6.arrayidx, align 4
174+
%c6.baseidx.1 = add i64 %conv.i.i, 576
175+
%c6.arrayidx.1 = getelementptr inbounds i32, i32 addrspace(1)* %si, i64 %c6.baseidx.1
176+
%c6.1 = load i32, i32 addrspace(1)* %c6.arrayidx.1, align 4
177+
%c6.v.0 = insertelement <2 x i32> undef, i32 %c6.0, i64 0
178+
%c6.v.1 = insertelement <2 x i32> %c6.v.0, i32 %c6.1, i64 1
179+
%c6.arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %d, i64 %c6.baseidx
180+
%c6.addr = bitcast i32 addrspace(1)* %c6.arrayidx1 to <2 x i32> addrspace(1)*
181+
store <2 x i32> %c6.v.1, <2 x i32> addrspace(1)* %c6.addr, align 4
182+
159183
ret void
160184
}

0 commit comments

Comments
 (0)