-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LV] Vectorize histogram operations #99851
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ac6f152
07eaea2
d01a0ee
00f1197
afe4005
29716be
bb450c0
e23a36f
5b5123b
39c9368
18de68f
bbf27cf
8afd8fd
20fa46f
fcf048f
f1a3ed2
65d7ab9
c98f9c9
0754c33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,6 +79,10 @@ static cl::opt<LoopVectorizeHints::ScalableForceKind> | |
"Scalable vectorization is available and favored when the " | ||
"cost is inconclusive."))); | ||
|
||
static cl::opt<bool> EnableHistogramVectorization( | ||
"enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, | ||
cl::desc("Enables autovectorization of some loops containing histograms")); | ||
|
||
/// Maximum vectorization interleave count. | ||
static const unsigned MaxInterleaveFactor = 16; | ||
|
||
|
@@ -1051,6 +1055,133 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { | |
return true; | ||
} | ||
|
||
/// Find histogram operations that match high-level code in loops: | ||
/// \code | ||
/// buckets[indices[i]]+=step; | ||
/// \endcode | ||
/// | ||
/// It matches a pattern starting from \p HSt, which Stores to the 'buckets' | ||
/// array the computed histogram. It uses a BinOp to sum all counts, storing | ||
/// them using a loop-variant index Load from the 'indices' input array. | ||
/// | ||
/// On successful matches it updates the STATISTIC 'HistogramsDetected', | ||
/// regardless of hardware support. When there is support, it additionally | ||
/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers | ||
/// used to update histogram in \p HistogramPtrs. | ||
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, | ||
const PredicatedScalarEvolution &PSE, | ||
SmallVectorImpl<HistogramInfo> &Histograms) { | ||
|
||
// Store value must come from a Binary Operation. | ||
Instruction *HPtrInstr = nullptr; | ||
BinaryOperator *HBinOp = nullptr; | ||
if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr)))) | ||
return false; | ||
|
||
// BinOp must be an Add or a Sub modifying the bucket value by a | ||
// loop invariant amount. | ||
// FIXME: We assume the loop invariant term is on the RHS. | ||
// Fine for an immediate/constant, but maybe not a generic value? | ||
Value *HIncVal = nullptr; | ||
if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) && | ||
!match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal)))) | ||
return false; | ||
|
||
// Make sure the increment value is loop invariant. | ||
if (!TheLoop->isLoopInvariant(HIncVal)) | ||
return false; | ||
|
||
// The address to store is calculated through a GEP Instruction. | ||
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(HPtrInstr); | ||
if (!GEP) | ||
return false; | ||
|
||
// Restrict address calculation to constant indices except for the last term. | ||
Value *HIdx = nullptr; | ||
for (Value *Index : GEP->indices()) { | ||
if (HIdx) | ||
return false; | ||
if (!isa<ConstantInt>(Index)) | ||
HIdx = Index; | ||
} | ||
|
||
if (!HIdx) | ||
return false; | ||
|
||
// Check that the index is calculated by loading from another array. Ignore | ||
// any extensions. | ||
// FIXME: Support indices from other sources than a linear load from memory? | ||
// We're currently trying to match an operation looping over an array | ||
// of indices, but there could be additional levels of indirection | ||
// in place, or possibly some additional calculation to form the index | ||
// from the loaded data. | ||
Value *VPtrVal; | ||
if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal))))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a tests where the index isn't extended? Coudlnt' find any in sve2-histcnt.ll but might have missed them There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! |
||
return false; | ||
|
||
// Make sure the index address varies in this loop, not an outer loop. | ||
const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal)); | ||
if (!AR || AR->getLoop() != TheLoop) | ||
return false; | ||
|
||
// Ensure we'll have the same mask by checking that all parts of the histogram | ||
// (gather load, update, scatter store) are in the same block. | ||
LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0)); | ||
BasicBlock *LdBB = IndexedLoad->getParent(); | ||
if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent()) | ||
return false; | ||
|
||
LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n"); | ||
|
||
// Store the operations that make up the histogram. | ||
Histograms.emplace_back(IndexedLoad, HBinOp, HSt); | ||
return true; | ||
} | ||
|
||
bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() { | ||
// For now, we only support an IndirectUnsafe dependency that calculates | ||
// a histogram | ||
if (!EnableHistogramVectorization) | ||
return false; | ||
|
||
// Find a single IndirectUnsafe dependency. | ||
const MemoryDepChecker::Dependence *IUDep = nullptr; | ||
const MemoryDepChecker &DepChecker = LAI->getDepChecker(); | ||
const auto *Deps = DepChecker.getDependences(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, |
||
// If there were too many dependences, LAA abandons recording them. We can't | ||
// proceed safely if we don't know what the dependences are. | ||
if (!Deps) | ||
return false; | ||
|
||
for (const MemoryDepChecker::Dependence &Dep : *Deps) { | ||
// Ignore dependencies that are either known to be safe or can be | ||
// checked at runtime. | ||
if (MemoryDepChecker::Dependence::isSafeForVectorization(Dep.Type) != | ||
MemoryDepChecker::VectorizationSafetyStatus::Unsafe) | ||
continue; | ||
|
||
// We're only interested in IndirectUnsafe dependencies here, where the | ||
// address might come from a load from memory. We also only want to handle | ||
// one such dependency, at least for now. | ||
if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep) | ||
return false; | ||
|
||
IUDep = &Dep; | ||
} | ||
if (!IUDep) | ||
return false; | ||
|
||
// For now only normal loads and stores are supported. | ||
LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker)); | ||
StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker)); | ||
|
||
if (!LI || !SI) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just checking if we have a test where the source is a store and dest is a load, possibly for loops stepping backwards? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't. I tried creating one but was never able to get LAA to recognize the store as a source -- it always gave up before that point, possibly due to the indirection. Maybe I'm missing something. |
||
return false; | ||
|
||
LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n"); | ||
return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms); | ||
} | ||
|
||
bool LoopVectorizationLegality::canVectorizeMemory() { | ||
LAI = &LAIs.getInfo(*TheLoop); | ||
const OptimizationRemarkAnalysis *LAR = LAI->getReport(); | ||
|
@@ -1062,7 +1193,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { | |
} | ||
|
||
if (!LAI->canVectorizeMemory()) | ||
return false; | ||
return canVectorizeIndirectUnsafeDependences(); | ||
|
||
if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) { | ||
reportVectorizationFailure("We don't allow storing to uniform addresses", | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -6508,8 +6508,33 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, | |||||||||||||||||||||||||||||||||||||||||||||||||||||
// We've proven all lanes safe to speculate, fall through. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
[[fallthrough]]; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
case Instruction::Add: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
case Instruction::Sub: { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
auto Info = Legal->getHistogramInfo(I); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (Info && VF.isVector()) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
const HistogramInfo *HGram = Info.value(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Assume that a non-constant update value (or a constant != 1) requires | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
// a multiply, and add that into the cost. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
InstructionCost MulCost = TTI::TCC_Free; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1)); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (!RHS || RHS->getZExtValue() != 1) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
MulCost = TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Find the cost of the histogram operation itself. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
Type *PtrTy = VectorType::get(HGram->Load->getPointerOperandType(), VF); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
Type *ScalarTy = I->getType(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
Type *MaskTy = VectorType::get(Type::getInt1Ty(I->getContext()), VF); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
Type::getVoidTy(I->getContext()), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
{PtrTy, ScalarTy, MaskTy}); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Add the costs together with the add/sub operation. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
return TTI.getIntrinsicInstrCost( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
ICA, TargetTransformInfo::TCK_RecipThroughput) + | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
MulCost + TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
[[fallthrough]]; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
Comment on lines
+6511
to
+6536
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this still needed in the latest version with `::computeCost?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's still needed for now -- the two cost models disagree and assert if I remove that code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for checking, I though the assert wouldn't trigger due to |
||||||||||||||||||||||||||||||||||||||||||||||||||||||
case Instruction::FAdd: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
case Instruction::Sub: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
case Instruction::FSub: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
case Instruction::Mul: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
case Instruction::FMul: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
@@ -8426,6 +8451,30 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, | |||||||||||||||||||||||||||||||||||||||||||||||||||||
}; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
VPHistogramRecipe * | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
ArrayRef<VPValue *> Operands) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
// FIXME: Support other operations. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
unsigned Opcode = HI->Update->getOpcode(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
assert((Opcode == Instruction::Add || Opcode == Instruction::Sub) && | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
"Histogram update operation must be an Add or Sub"); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
SmallVector<VPValue *, 3> HGramOps; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Bucket address. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
HGramOps.push_back(Operands[1]); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Increment value. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
HGramOps.push_back(getVPValueOrAddLiveIn(HI->Update->getOperand(1))); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
// In case of predicated execution (due to tail-folding, or conditional | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
// execution, or both), pass the relevant mask. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (Legal->isMaskRequired(HI->Store)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
HGramOps.push_back(getBlockInMask(HI->Store->getParent())); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
return new VPHistogramRecipe(Opcode, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
make_range(HGramOps.begin(), HGramOps.end()), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
HI->Store->getDebugLoc()); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
void VPRecipeBuilder::fixHeaderPhis() { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
BasicBlock *OrigLatch = OrigLoop->getLoopLatch(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
for (VPHeaderPHIRecipe *R : PhisToFix) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
@@ -8549,6 +8598,10 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, | |||||||||||||||||||||||||||||||||||||||||||||||||||||
if (auto *CI = dyn_cast<CallInst>(Instr)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
return tryToWidenCall(CI, Operands, Range); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (StoreInst *SI = dyn_cast<StoreInst>(Instr)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (auto HistInfo = Legal->getHistogramInfo(SI)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
return tryToWidenHistogram(*HistInfo, Operands); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
return tryToWidenMemory(Instr, Operands, Range); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
@@ -9998,6 +10051,19 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |||||||||||||||||||||||||||||||||||||||||||||||||||||
InterleaveLoop = false; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
// If there is a histogram in the loop, do not just interleave without | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
// vectorizing. The order of operations will be incorrect without the | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
// histogram intrinsics, which are only used for recipes with VF > 1. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (!VectorizeLoop && InterleaveLoop && LVL.hasHistograms()) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
LLVM_DEBUG(dbgs() << "LV: Not interleaving without vectorization due " | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
<< "to histogram operations.\n"); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
IntDiagMsg = std::make_pair( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
"HistogramPreventsScalarInterleaving", | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
"Unable to interleave without vectorization due to constraints on " | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
"the order of histogram operations"); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
InterleaveLoop = false; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Override IC if user provided an interleave count. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
IC = UserIC > 0 ? UserIC : IC; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -907,6 +907,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |
case VPRecipeBase::VPWidenLoadSC: | ||
case VPRecipeBase::VPWidenStoreEVLSC: | ||
case VPRecipeBase::VPWidenStoreSC: | ||
case VPRecipeBase::VPHistogramSC: | ||
// TODO: Widened stores don't define a value, but widened loads do. Split | ||
// the recipes to be able to make widened loads VPSingleDefRecipes. | ||
return false; | ||
|
@@ -1664,6 +1665,51 @@ class VPWidenCallRecipe : public VPSingleDefRecipe { | |
#endif | ||
}; | ||
|
||
/// A recipe representing a sequence of load -> update -> store as part of | ||
/// a histogram operation. This means there may be aliasing between vector | ||
/// lanes, which is handled by the llvm.experimental.vector.histogram family | ||
/// of intrinsics. The only update operations currently supported are | ||
/// 'add' and 'sub' where the other term is loop-invariant. | ||
class VPHistogramRecipe : public VPRecipeBase { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please document the new recipe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
/// Opcode of the update operation, currently either add or sub. | ||
unsigned Opcode; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: document that this is the opcode of the increment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
public: | ||
template <typename IterT> | ||
VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands, | ||
DebugLoc DL = {}) | ||
: VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {} | ||
|
||
~VPHistogramRecipe() override = default; | ||
|
||
VPHistogramRecipe *clone() override { | ||
return new VPHistogramRecipe(Opcode, operands(), getDebugLoc()); | ||
} | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPHistogramSC); | ||
|
||
/// Produce a vectorized histogram operation. | ||
void execute(VPTransformState &State) override; | ||
|
||
/// Return the cost of this VPHistogramRecipe. | ||
InstructionCost computeCost(ElementCount VF, | ||
VPCostContext &Ctx) const override; | ||
|
||
unsigned getOpcode() const { return Opcode; } | ||
|
||
/// Return the mask operand if one was provided, or a null pointer if all | ||
/// lanes should be executed unconditionally. | ||
VPValue *getMask() const { | ||
return getNumOperands() == 3 ? getOperand(2) : nullptr; | ||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the recipe | ||
void print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const override; | ||
#endif | ||
}; | ||
|
||
/// A recipe for widening select instructions. | ||
struct VPWidenSelectRecipe : public VPSingleDefRecipe { | ||
template <typename IterT> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are all cases exercised in tests? Looks like the tests are all using
%arrayidx2 = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
? Should at least have some with additional constant indices?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had a test (
histogram_array_3op_gep
) which exercised this with global arrays, requiring one extra constant index argument.I've added a second (
histogram_array_4op_gep_nonzero_const_idx
) with a struct as well, with one of the constant terms being nonzero.Are there other cases you would like to see?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for adding the extra tests.