Skip to content

Commit fe79348

Browse files
committed
[llvm][SLPVectorizer] Fix a bad cast assertion (llvm#97621)
Fixes: rdar://128092379 (cherry picked from commit d3a76b0)
1 parent 5360090 commit fe79348

File tree

2 files changed

+90
-32
lines changed

2 files changed

+90
-32
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,14 @@ static bool isCommutative(Instruction *I) {
361361
return I->isCommutative();
362362
}
363363

364-
/// \returns inserting index of InsertElement or InsertValue instruction,
365-
/// using Offset as base offset for index.
366-
static std::optional<unsigned> getInsertIndex(const Value *InsertInst,
367-
unsigned Offset = 0) {
364+
template <typename T>
365+
static std::optional<unsigned> getInsertExtractIndex(const Value *Inst,
366+
unsigned Offset) {
367+
static_assert(std::is_same_v<T, InsertElementInst> ||
368+
std::is_same_v<T, ExtractElementInst>,
369+
"unsupported T");
368370
int Index = Offset;
369-
if (const auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
371+
if (const auto *IE = dyn_cast<T>(Inst)) {
370372
const auto *VT = dyn_cast<FixedVectorType>(IE->getType());
371373
if (!VT)
372374
return std::nullopt;
@@ -379,8 +381,25 @@ static std::optional<unsigned> getInsertIndex(const Value *InsertInst,
379381
Index += CI->getZExtValue();
380382
return Index;
381383
}
384+
return std::nullopt;
385+
}
386+
387+
/// \returns inserting or extracting index of InsertElement, ExtractElement or
388+
/// InsertValue instruction, using Offset as base offset for index.
389+
/// \returns std::nullopt if the index is not an immediate.
390+
static std::optional<unsigned> getElementIndex(const Value *Inst,
391+
unsigned Offset = 0) {
392+
if (auto Index = getInsertExtractIndex<InsertElementInst>(Inst, Offset))
393+
return Index;
394+
if (auto Index = getInsertExtractIndex<ExtractElementInst>(Inst, Offset))
395+
return Index;
396+
397+
int Index = Offset;
398+
399+
const auto *IV = dyn_cast<InsertValueInst>(Inst);
400+
if (!IV)
401+
return std::nullopt;
382402

383-
const auto *IV = cast<InsertValueInst>(InsertInst);
384403
Type *CurrentType = IV->getType();
385404
for (unsigned I : IV->indices()) {
386405
if (const auto *ST = dyn_cast<StructType>(CurrentType)) {
@@ -454,7 +473,7 @@ static SmallBitVector isUndefVector(const Value *V,
454473
Base = II->getOperand(0);
455474
if (isa<T>(II->getOperand(1)))
456475
continue;
457-
std::optional<unsigned> Idx = getInsertIndex(II);
476+
std::optional<unsigned> Idx = getElementIndex(II);
458477
if (!Idx) {
459478
Res.reset();
460479
return Res;
@@ -4707,8 +4726,8 @@ static bool areTwoInsertFromSameBuildVector(
47074726
return false;
47084727
auto *IE1 = VU;
47094728
auto *IE2 = V;
4710-
std::optional<unsigned> Idx1 = getInsertIndex(IE1);
4711-
std::optional<unsigned> Idx2 = getInsertIndex(IE2);
4729+
std::optional<unsigned> Idx1 = getElementIndex(IE1);
4730+
std::optional<unsigned> Idx2 = getElementIndex(IE2);
47124731
if (Idx1 == std::nullopt || Idx2 == std::nullopt)
47134732
return false;
47144733
// Go through the vector operand of insertelement instructions trying to find
@@ -4723,7 +4742,7 @@ static bool areTwoInsertFromSameBuildVector(
47234742
if (IE1 == V && !IE2)
47244743
return V->hasOneUse();
47254744
if (IE1 && IE1 != V) {
4726-
unsigned Idx1 = getInsertIndex(IE1).value_or(*Idx2);
4745+
unsigned Idx1 = getElementIndex(IE1).value_or(*Idx2);
47274746
IsReusedIdx |= ReusedIdx.test(Idx1);
47284747
ReusedIdx.set(Idx1);
47294748
if ((IE1 != VU && !IE1->hasOneUse()) || IsReusedIdx)
@@ -4732,7 +4751,7 @@ static bool areTwoInsertFromSameBuildVector(
47324751
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
47334752
}
47344753
if (IE2 && IE2 != VU) {
4735-
unsigned Idx2 = getInsertIndex(IE2).value_or(*Idx1);
4754+
unsigned Idx2 = getElementIndex(IE2).value_or(*Idx1);
47364755
IsReusedIdx |= ReusedIdx.test(Idx2);
47374756
ReusedIdx.set(Idx2);
47384757
if ((IE2 != V && !IE2->hasOneUse()) || IsReusedIdx)
@@ -4891,13 +4910,13 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
48914910
IE1, IE2,
48924911
[](InsertElementInst *II) { return II->getOperand(0); }))
48934912
return I1 < I2;
4894-
return getInsertIndex(IE1) < getInsertIndex(IE2);
4913+
return getElementIndex(IE1) < getElementIndex(IE2);
48954914
}
48964915
if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
48974916
if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
48984917
if (EE1->getOperand(0) != EE2->getOperand(0))
48994918
return I1 < I2;
4900-
return getInsertIndex(EE1) < getInsertIndex(EE2);
4919+
return getElementIndex(EE1) < getElementIndex(EE2);
49014920
}
49024921
return I1 < I2;
49034922
};
@@ -6159,7 +6178,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
61596178
ValueSet SourceVectors;
61606179
for (Value *V : VL) {
61616180
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
6162-
assert(getInsertIndex(V) != std::nullopt &&
6181+
assert(getElementIndex(V) != std::nullopt &&
61636182
"Non-constant or undef index?");
61646183
}
61656184

@@ -6926,7 +6945,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
69266945
decltype(OrdCompare)>
69276946
Indices(OrdCompare);
69286947
for (int I = 0, E = VL.size(); I < E; ++I) {
6929-
unsigned Idx = *getInsertIndex(VL[I]);
6948+
unsigned Idx = *getElementIndex(VL[I]);
69306949
Indices.emplace(Idx, I);
69316950
}
69326951
OrdersType CurrentOrder(VL.size(), VL.size());
@@ -9271,11 +9290,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
92719290
unsigned NumOfParts = TTI->getNumberOfParts(SrcVecTy);
92729291

92739292
SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
9274-
unsigned OffsetBeg = *getInsertIndex(VL.front());
9293+
unsigned OffsetBeg = *getElementIndex(VL.front());
92759294
unsigned OffsetEnd = OffsetBeg;
92769295
InsertMask[OffsetBeg] = 0;
92779296
for (auto [I, V] : enumerate(VL.drop_front())) {
9278-
unsigned Idx = *getInsertIndex(V);
9297+
unsigned Idx = *getElementIndex(V);
92799298
if (OffsetBeg > Idx)
92809299
OffsetBeg = Idx;
92819300
else if (OffsetEnd < Idx)
@@ -9316,7 +9335,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
93169335
SmallVector<int> PrevMask(InsertVecSz, PoisonMaskElem);
93179336
Mask.swap(PrevMask);
93189337
for (unsigned I = 0; I < NumScalars; ++I) {
9319-
unsigned InsertIdx = *getInsertIndex(VL[PrevMask[I]]);
9338+
unsigned InsertIdx = *getElementIndex(VL[PrevMask[I]]);
93209339
DemandedElts.setBit(InsertIdx);
93219340
IsIdentity &= InsertIdx - OffsetBeg == I;
93229341
Mask[InsertIdx - OffsetBeg] = I;
@@ -10070,8 +10089,8 @@ static bool isFirstInsertElement(const InsertElementInst *IE1,
1007010089
const auto *I2 = IE2;
1007110090
const InsertElementInst *PrevI1;
1007210091
const InsertElementInst *PrevI2;
10073-
unsigned Idx1 = *getInsertIndex(IE1);
10074-
unsigned Idx2 = *getInsertIndex(IE2);
10092+
unsigned Idx1 = *getElementIndex(IE1);
10093+
unsigned Idx2 = *getElementIndex(IE2);
1007510094
do {
1007610095
if (I2 == IE1)
1007710096
return true;
@@ -10080,10 +10099,10 @@ static bool isFirstInsertElement(const InsertElementInst *IE1,
1008010099
PrevI1 = I1;
1008110100
PrevI2 = I2;
1008210101
if (I1 && (I1 == IE1 || I1->hasOneUse()) &&
10083-
getInsertIndex(I1).value_or(Idx2) != Idx2)
10102+
getElementIndex(I1).value_or(Idx2) != Idx2)
1008410103
I1 = dyn_cast<InsertElementInst>(I1->getOperand(0));
1008510104
if (I2 && ((I2 == IE2 || I2->hasOneUse())) &&
10086-
getInsertIndex(I2).value_or(Idx1) != Idx1)
10105+
getElementIndex(I2).value_or(Idx1) != Idx1)
1008710106
I2 = dyn_cast<InsertElementInst>(I2->getOperand(0));
1008810107
} while ((I1 && PrevI1 != I1) || (I2 && PrevI2 != I2));
1008910108
llvm_unreachable("Two different buildvectors not expected.");
@@ -10274,7 +10293,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1027410293
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
1027510294
if (!UsedInserts.insert(VU).second)
1027610295
continue;
10277-
std::optional<unsigned> InsertIdx = getInsertIndex(VU);
10296+
std::optional<unsigned> InsertIdx = getElementIndex(VU);
1027810297
if (InsertIdx) {
1027910298
const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
1028010299
auto *It = find_if(
@@ -10300,14 +10319,14 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1030010319
while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
1030110320
if (IEBase != EU.User &&
1030210321
(!IEBase->hasOneUse() ||
10303-
getInsertIndex(IEBase).value_or(*InsertIdx) == *InsertIdx))
10322+
getElementIndex(IEBase).value_or(*InsertIdx) == *InsertIdx))
1030410323
break;
1030510324
// Build the mask for the vectorized insertelement instructions.
1030610325
if (const TreeEntry *E = getTreeEntry(IEBase)) {
1030710326
VU = IEBase;
1030810327
do {
1030910328
IEBase = cast<InsertElementInst>(Base);
10310-
int Idx = *getInsertIndex(IEBase);
10329+
int Idx = *getElementIndex(IEBase);
1031110330
assert(Mask[Idx] == PoisonMaskElem &&
1031210331
"InsertElementInstruction used already.");
1031310332
Mask[Idx] = Idx;
@@ -12721,7 +12740,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1272112740
cast<FixedVectorType>(FirstInsert->getType())->getNumElements();
1272212741
const unsigned NumScalars = E->Scalars.size();
1272312742

12724-
unsigned Offset = *getInsertIndex(VL0);
12743+
unsigned Offset = *getElementIndex(VL0);
1272512744
assert(Offset < NumElts && "Failed to find vector index offset");
1272612745

1272712746
// Create shuffle to resize vector
@@ -12739,7 +12758,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1273912758
Mask.swap(PrevMask);
1274012759
for (unsigned I = 0; I < NumScalars; ++I) {
1274112760
Value *Scalar = E->Scalars[PrevMask[I]];
12742-
unsigned InsertIdx = *getInsertIndex(Scalar);
12761+
unsigned InsertIdx = *getElementIndex(Scalar);
1274312762
IsIdentity &= InsertIdx - Offset == I;
1274412763
Mask[InsertIdx - Offset] = I;
1274512764
}
@@ -12752,7 +12771,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1275212771
// sequence.
1275312772
InsertElementInst *Ins = cast<InsertElementInst>(VL0);
1275412773
do {
12755-
std::optional<unsigned> InsertIdx = getInsertIndex(Ins);
12774+
std::optional<unsigned> InsertIdx = getElementIndex(Ins);
1275612775
if (!InsertIdx)
1275712776
break;
1275812777
if (InsertMask[*InsertIdx] == PoisonMaskElem)
@@ -13800,7 +13819,7 @@ Value *BoUpSLP::vectorizeTree(
1380013819
}
1380113820
}
1380213821

13803-
std::optional<unsigned> InsertIdx = getInsertIndex(VU);
13822+
std::optional<unsigned> InsertIdx = getElementIndex(VU);
1380413823
if (InsertIdx) {
1380513824
auto *It =
1380613825
find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
@@ -13823,13 +13842,13 @@ Value *BoUpSLP::vectorizeTree(
1382313842
while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
1382413843
if (IEBase != User &&
1382513844
(!IEBase->hasOneUse() ||
13826-
getInsertIndex(IEBase).value_or(Idx) == Idx))
13845+
getElementIndex(IEBase).value_or(Idx) == Idx))
1382713846
break;
1382813847
// Build the mask for the vectorized insertelement instructions.
1382913848
if (const TreeEntry *E = getTreeEntry(IEBase)) {
1383013849
do {
1383113850
IEBase = cast<InsertElementInst>(Base);
13832-
int IEIdx = *getInsertIndex(IEBase);
13851+
int IEIdx = *getElementIndex(IEBase);
1383313852
assert(Mask[IEIdx] == PoisonMaskElem &&
1383413853
"InsertElementInstruction used already.");
1383513854
Mask[IEIdx] = IEIdx;
@@ -17781,7 +17800,7 @@ static void findBuildAggregate_rec(Instruction *LastInsertInst,
1778117800
do {
1778217801
Value *InsertedOperand = LastInsertInst->getOperand(1);
1778317802
std::optional<unsigned> OperandIndex =
17784-
getInsertIndex(LastInsertInst, OperandOffset);
17803+
getElementIndex(LastInsertInst, OperandOffset);
1778517804
if (!OperandIndex)
1778617805
return;
1778717806
if (isa<InsertElementInst, InsertValueInst>(InsertedOperand)) {
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=slp-vectorizer < %s -o - -S | FileCheck %s
3+
4+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-apple-macosx15.4.0"
6+
7+
define fastcc i32 @rdar128092379(i8 %index) {
8+
; CHECK-LABEL: define fastcc i32 @rdar128092379(
9+
; CHECK-SAME: i8 [[INDEX:%.*]]) {
10+
; CHECK-NEXT: [[BLOCK:.*]]:
11+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[INDEX]] to i64
12+
; CHECK-NEXT: [[ZEXT1:%.*]] = zext i8 [[INDEX]] to i64
13+
; CHECK-NEXT: br label %[[BLOCK3:.*]]
14+
; CHECK: [[BLOCK2:.*]]:
15+
; CHECK-NEXT: br label %[[BLOCK3]]
16+
; CHECK: [[BLOCK3]]:
17+
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, %[[BLOCK2]] ], [ [[ZEXT1]], %[[BLOCK]] ]
18+
; CHECK-NEXT: [[PHI4:%.*]] = phi i64 [ 0, %[[BLOCK2]] ], [ [[ZEXT]], %[[BLOCK]] ]
19+
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <16 x i32> zeroinitializer, i64 [[PHI4]]
20+
; CHECK-NEXT: [[EXTRACTELEMENT5:%.*]] = extractelement <16 x i32> zeroinitializer, i64 [[PHI]]
21+
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT5]]
22+
; CHECK-NEXT: ret i32 [[SUM]]
23+
;
24+
block:
25+
%zext = zext i8 %index to i64
26+
%zext1 = zext i8 %index to i64
27+
br label %block3
28+
29+
block2:
30+
br label %block3
31+
32+
block3:
33+
%phi = phi i64 [ 0, %block2 ], [ %zext1, %block ]
34+
%phi4 = phi i64 [ 0, %block2 ], [ %zext, %block ]
35+
%extractelement = extractelement <16 x i32> zeroinitializer, i64 %phi4
36+
%extractelement5 = extractelement <16 x i32> zeroinitializer, i64 %phi
37+
%sum = add i32 %extractelement, %extractelement5
38+
ret i32 %sum
39+
}

0 commit comments

Comments
 (0)