Skip to content

Commit a56f313

Browse files
sys-d3djenkinsgfxbot
authored andcommitted
Backout of 81afe8e due to Functional Regression
Change-Id: I8b1416ef2e3b2592556f2694460defe1afb6b5be
1 parent b243272 commit a56f313

File tree

4 files changed

+57
-120
lines changed

4 files changed

+57
-120
lines changed

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,12 @@ bool LowerGEPForPrivMem::CheckIfAllocaPromotable(llvm::AllocaInst* pAlloca)
159159
allowedAllocaSizeInBytes = allowedAllocaSizeInBytes / d;
160160
}
161161

162-
bool allocaCandidate = ValidUses(pAlloca);
162+
bool isUniformAlloca = true;
163+
bool allocaCandidate = ValidUses(pAlloca, isUniformAlloca);
163164
if(!allocaCandidate)
164165
{
165166
return false;
166167
}
167-
bool isUniformAlloca = getAnalysis<WIAnalysis>().whichDepend(pAlloca) == WIAnalysis::UNIFORM;
168168
if(isUniformAlloca)
169169
{
170170
// Heuristic: for uniform alloca we divide the size by 8 to adjust the pressure
@@ -241,6 +241,50 @@ bool LowerGEPForPrivMem::CheckIfAllocaPromotable(llvm::AllocaInst* pAlloca)
241241
return true;
242242
}
243243

244+
bool LowerGEPForPrivMem::IsUniformAddress(Value* val)
245+
{
246+
if(isa<Constant>(val))
247+
{
248+
return true;
249+
}
250+
else if(isa<AllocaInst>(val))
251+
{
252+
// once we found the alloca that mean all the calculation was uniform
253+
return true;
254+
}
255+
else if(BitCastInst* bitcast = dyn_cast<BitCastInst>(val))
256+
{
257+
return IsUniformAddress(bitcast->getOperand(0));
258+
}
259+
else if(GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(val))
260+
{
261+
for(unsigned int i = 0; i < gep->getNumOperands(); i++)
262+
{
263+
if(!IsUniformAddress(gep->getOperand(i)))
264+
{
265+
return false;
266+
}
267+
}
268+
return true;
269+
}
270+
return false;
271+
}
272+
273+
bool LowerGEPForPrivMem::IsUniformStore(StoreInst* pStore)
274+
{
275+
if(pStore->getParent() != &pStore->getParent()->getParent()->getEntryBlock())
276+
{
277+
// Conservative logic, only consider the entry block for now
278+
// We could improve it with dominator analysis or uniform analysis
279+
return false;
280+
}
281+
if(!IsUniformAddress(pStore->getPointerOperand()) || !IsUniformAddress(pStore->getValueOperand()))
282+
{
283+
return false;
284+
}
285+
return true;
286+
}
287+
244288
static Type* GetBaseType(Type* pType)
245289
{
246290
if(pType->isStructTy())
@@ -278,13 +322,13 @@ static Type* GetBaseType(Type* pType)
278322
return pBaseType;
279323
}
280324

281-
bool LowerGEPForPrivMem::ValidUses(Instruction* I)
325+
bool LowerGEPForPrivMem::ValidUses(Instruction* I, bool& IsUniform)
282326
{
283327
for(Value::user_iterator use_it = I->user_begin(), use_e = I->user_end(); use_it != use_e; ++use_it)
284328
{
285329
if(GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(*use_it))
286330
{
287-
if(ValidUses(gep))
331+
if(ValidUses(gep, IsUniform))
288332
continue;
289333
}
290334
if(llvm::LoadInst* pLoad = llvm::dyn_cast<llvm::LoadInst>(*use_it))
@@ -302,6 +346,7 @@ bool LowerGEPForPrivMem::ValidUses(Instruction* I)
302346
// GEP instruction is the stored value of the StoreInst (not supported case)
303347
return false;
304348
}
349+
IsUniform &= IsUniformStore(pStore);
305350
}
306351
else if(llvm::BitCastInst *pBitCast = llvm::dyn_cast<llvm::BitCastInst>(*use_it))
307352
{
@@ -315,7 +360,7 @@ bool LowerGEPForPrivMem::ValidUses(Instruction* I)
315360
baseT->getPrimitiveSizeInBits() != 0 &&
316361
baseT->getPrimitiveSizeInBits() == sourceType->getPrimitiveSizeInBits() )
317362
{
318-
if(ValidUses(pBitCast))
363+
if(ValidUses(pBitCast, IsUniform))
319364
continue;
320365
}
321366
else if(IsBitCastForLifetimeMark(pBitCast))

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2828
#include "Compiler/CodeGenContextWrapper.hpp"
2929
#include "Compiler/MetaDataUtilsWrapper.h"
3030
#include "Compiler/CISACodeGen/RegisterPressureEstimate.hpp"
31-
#include "Compiler/CISACodeGen/WIAnalysis.hpp"
3231

3332
#include "common/LLVMWarningsPush.hpp"
3433
#include <llvm/Pass.h>
@@ -61,7 +60,6 @@ namespace IGC
6160
AU.addRequired<RegisterPressureEstimate>();
6261
AU.addRequired<MetaDataUtilsWrapper>();
6362
AU.addRequired<CodeGenContextWrapper>();
64-
AU.addRequired<WIAnalysis>();
6563
AU.setPreservesCFG();
6664
}
6765

@@ -77,7 +75,7 @@ namespace IGC
7775
llvm::Type *pBaseType);
7876

7977
// return true if the use of the pointer allow promotion
80-
bool ValidUses(llvm::Instruction* inst);
78+
bool ValidUses(llvm::Instruction* inst, bool& IsUniform);
8179

8280
void handleAllocaInst(llvm::AllocaInst *pAlloca);
8381

@@ -107,6 +105,9 @@ namespace IGC
107105

108106
/// Conservatively check if a store allow an Alloca to be uniform
109107
bool IsUniformStore(llvm::StoreInst* pStore);
108+
/// Check if the pointer arithmetic after the alloca is uniform
109+
bool IsUniformAddress(llvm::Value* val);
110+
110111
public:
111112
static char ID;
112113

IGC/Compiler/CISACodeGen/WIAnalysis.cpp

Lines changed: 3 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@ void WIAnalysis::calculate_dep(const Value* val)
610610
else if (isa<InsertValueInst>(inst)) dep = calculate_dep_simple(inst);
611611
else if (const PHINode *Phi = dyn_cast<PHINode>(inst)) dep = calculate_dep(Phi);
612612
else if (isa<ShuffleVectorInst>(inst)) dep = calculate_dep_simple(inst);
613-
else if (isa<StoreInst>(inst)) dep = calculate_dep_simple(inst);
613+
else if (isa<StoreInst>(inst)) dep = RANDOM; // calculate_dep_simple(inst);
614614
else if (const TerminatorInst *TI = dyn_cast<TerminatorInst>(inst)) dep = calculate_dep(TI);
615615
else if (const SelectInst *SI = dyn_cast<SelectInst>(inst)) dep = calculate_dep(SI);
616616
else if (const AllocaInst *AI = dyn_cast<AllocaInst>(inst)) dep = calculate_dep(AI);
@@ -810,14 +810,7 @@ void WIAnalysis::updateDepMap(const Instruction *inst, WIAnalysis::WIDependancy
810810
{
811811
m_pChangedNew->push_back(*it);
812812
}
813-
if(const StoreInst* st = dyn_cast<StoreInst>(inst))
814-
{
815-
auto it = m_storeDepMap.find(st);
816-
if(it != m_storeDepMap.end())
817-
{
818-
m_pChangedNew->push_back(it->second);
819-
}
820-
}
813+
821814
// accumulate work-list for backward adjustment
822815
if (dep == RANDOM)
823816
{
@@ -1211,100 +1204,9 @@ WIAnalysis::WIDependancy WIAnalysis::calculate_dep(const SelectInst* inst)
12111204
return WIAnalysis::RANDOM;
12121205
}
12131206

1214-
bool WIAnalysis::TrackAllocaDep(const Value* I, AllocaDep& dep)
1215-
{
1216-
for(Value::const_user_iterator use_it = I->user_begin(), use_e = I->user_end(); use_it != use_e; ++use_it)
1217-
{
1218-
if(const GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(*use_it))
1219-
{
1220-
if(TrackAllocaDep(gep, dep))
1221-
continue;
1222-
}
1223-
if(const llvm::LoadInst* pLoad = llvm::dyn_cast<llvm::LoadInst>(*use_it))
1224-
{
1225-
if(!pLoad->isSimple())
1226-
return false;
1227-
}
1228-
else if(const llvm::StoreInst* pStore = llvm::dyn_cast<llvm::StoreInst>(*use_it))
1229-
{
1230-
if(!pStore->isSimple())
1231-
return false;
1232-
const llvm::Value* pValueOp = pStore->getValueOperand();
1233-
if(pValueOp == I)
1234-
{
1235-
// GEP instruction is the stored value of the StoreInst (not supported case)
1236-
return false;
1237-
}
1238-
dep.stores.push_back(pStore);
1239-
}
1240-
else if(const llvm::BitCastInst *pBitCast = llvm::dyn_cast<llvm::BitCastInst>(*use_it))
1241-
{
1242-
if(TrackAllocaDep(pBitCast, dep))
1243-
{
1244-
continue;
1245-
}
1246-
// Not a candidate.
1247-
return false;
1248-
}
1249-
else if(const IntrinsicInst* intr = dyn_cast<IntrinsicInst>(*use_it))
1250-
{
1251-
llvm::Intrinsic::ID IID = intr->getIntrinsicID();
1252-
if(IID == llvm::Intrinsic::lifetime_start ||
1253-
IID == llvm::Intrinsic::lifetime_end)
1254-
{
1255-
continue;
1256-
}
1257-
return false;
1258-
}
1259-
else
1260-
{
1261-
// This is some other instruction. Right now we don't want to handle these
1262-
return false;
1263-
}
1264-
}
1265-
return true;
1266-
}
1267-
1268-
12691207
WIAnalysis::WIDependancy WIAnalysis::calculate_dep(const AllocaInst* inst)
12701208
{
1271-
1272-
if(!hasDependency(inst))
1273-
{
1274-
AllocaDep dep;
1275-
if(TrackAllocaDep(inst, dep))
1276-
{
1277-
m_allocaDepMap.insert(std::make_pair(inst, dep));
1278-
for(auto it : dep.stores)
1279-
{
1280-
m_storeDepMap.insert(std::make_pair(&(*it), inst));
1281-
}
1282-
}
1283-
}
1284-
auto depIt = m_allocaDepMap.find(inst);
1285-
if(depIt == m_allocaDepMap.end())
1286-
{
1287-
// If we haven't been able to track the dependency of the alloca make it random
1288-
return WIAnalysis::RANDOM;
1289-
}
1290-
// otherwise assume the alloca is uniform by default
1291-
WIDependancy dep = WIAnalysis::UNIFORM;
1292-
for(auto it : depIt->second.stores)
1293-
{
1294-
if(hasDependency(&(*it)))
1295-
{
1296-
WIAnalysis::WIDependancy dep2 = getDependency(&(*it));
1297-
if(dep2 != WIAnalysis::UNIFORM)
1298-
{
1299-
return WIAnalysis::RANDOM;
1300-
}
1301-
if(insideDivergentCF(&(*it)))
1302-
{
1303-
return WIAnalysis::RANDOM;
1304-
}
1305-
}
1306-
}
1307-
return dep;
1209+
return WIAnalysis::RANDOM;
13081210
}
13091211

13101212
WIAnalysis::WIDependancy WIAnalysis::calculate_dep(const CastInst* inst)

IGC/Compiler/CISACodeGen/WIAnalysis.hpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,6 @@ class WIAnalysis : public llvm::FunctionPass, public WIBaseClass
184184
void dump() const;
185185

186186
private:
187-
struct AllocaDep
188-
{
189-
std::vector<const llvm::StoreInst*> stores;
190-
};
191187
/*! \name Dependency Calculation Functions
192188
* \{ */
193189
/// @brief Calculate the dependency type for the instruction
@@ -249,8 +245,6 @@ class WIAnalysis : public llvm::FunctionPass, public WIBaseClass
249245
/// @brief return true if all the source operands are defined outside the region
250246
bool isRegionInvariant(const llvm::Instruction* inst, BranchInfo *brInfo, unsigned level);
251247

252-
/// @brief update dependency structure for Alloca
253-
bool TrackAllocaDep(const llvm::Value* I, AllocaDep& dep);
254248
/// @brief LLVM Interface
255249
/// @param AU Analysis
256250
/// WIAnalysis requires dominator and post dominator analysis
@@ -295,11 +289,6 @@ class WIAnalysis : public llvm::FunctionPass, public WIBaseClass
295289
llvm::PostDominatorTree *PDT;
296290
IGC::IGCMD::MetaDataUtils *m_pMdUtils;
297291

298-
// Allow access to all the store into an alloca if we were able to track it
299-
llvm::DenseMap<const llvm::AllocaInst*, AllocaDep> m_allocaDepMap;
300-
// reverse map to allow to know what alloca to update when store changes
301-
llvm::DenseMap<const llvm::StoreInst*, const llvm::AllocaInst*> m_storeDepMap;
302-
303292
IGC::TranslationTable *m_pTT;
304293
IGC::FastValueMap<WIAnalysis::WIDependancy, FastValueMapAttributeInfo<WIBaseClass::WIDependancy>> m_depMap;
305294
};

0 commit comments

Comments
 (0)