Skip to content

Commit 6e071cf

Browse files
alexey-bataevtstellar
authored andcommitted
[SLP]Fix a crash if the argument of call was affected by minbitwidth analysis.
Need to support proper type conversion for function arguments to avoid compiler crash.
1 parent d89da2a commit 6e071cf

File tree

2 files changed

+102
-1
lines changed

2 files changed

+102
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1165311653
if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
1165411654
TysForDecl.push_back(
1165511655
FixedVectorType::get(CI->getType(), E->Scalars.size()));
11656+
auto *CEI = cast<CallInst>(VL0);
1165611657
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
1165711658
ValueList OpVL;
1165811659
// Some intrinsics have scalar arguments. This argument should not be
1165911660
// vectorized.
1166011661
if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
11661-
CallInst *CEI = cast<CallInst>(VL0);
1166211662
ScalarArg = CEI->getArgOperand(I);
1166311663
OpVecs.push_back(CEI->getArgOperand(I));
1166411664
if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
@@ -11671,6 +11671,25 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1167111671
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
1167211672
return E->VectorizedValue;
1167311673
}
11674+
auto GetOperandSignedness = [&](unsigned Idx) {
11675+
const TreeEntry *OpE = getOperandEntry(E, Idx);
11676+
bool IsSigned = false;
11677+
auto It = MinBWs.find(OpE);
11678+
if (It != MinBWs.end())
11679+
IsSigned = It->second.second;
11680+
else
11681+
IsSigned = any_of(OpE->Scalars, [&](Value *R) {
11682+
return !isKnownNonNegative(R, SimplifyQuery(*DL));
11683+
});
11684+
return IsSigned;
11685+
};
11686+
ScalarArg = CEI->getArgOperand(I);
11687+
if (cast<VectorType>(OpVec->getType())->getElementType() !=
11688+
ScalarArg->getType()) {
11689+
auto *CastTy = FixedVectorType::get(ScalarArg->getType(),
11690+
VecTy->getNumElements());
11691+
OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I));
11692+
}
1167411693
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
1167511694
OpVecs.push_back(OpVec);
1167611695
if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-pc-windows-msvc19.34.0 < %s | FileCheck %s
3+
4+
define void @test(ptr %0, i8 %1, i1 %cmp12.i) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[CMP12_I:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[CMP12_I]], i32 0
9+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0
11+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer
12+
; CHECK-NEXT: br label [[PRE:%.*]]
13+
; CHECK: pre:
14+
; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32>
15+
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
16+
; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> [[TMP7]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
17+
; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP3]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]]
18+
; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i32> [[TMP9]] to <8 x i8>
19+
; CHECK-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1
20+
; CHECK-NEXT: br label [[PRE]]
21+
;
22+
entry:
23+
%idx11 = getelementptr i8, ptr %0, i64 1
24+
%idx22 = getelementptr i8, ptr %0, i64 2
25+
%idx33 = getelementptr i8, ptr %0, i64 3
26+
%idx44 = getelementptr i8, ptr %0, i64 4
27+
%idx55 = getelementptr i8, ptr %0, i64 5
28+
%idx66 = getelementptr i8, ptr %0, i64 6
29+
%idx77 = getelementptr i8, ptr %0, i64 7
30+
br label %pre
31+
32+
pre:
33+
%conv.i = zext i8 %1 to i32
34+
%2 = tail call i32 @llvm.umax.i32(i32 %conv.i, i32 1)
35+
%.sroa.speculated.i = add i32 %2, 1
36+
%intensity.0.i = select i1 %cmp12.i, i32 %.sroa.speculated.i, i32 %conv.i
37+
%conv14.i = trunc i32 %intensity.0.i to i8
38+
store i8 %conv14.i, ptr %0, align 1
39+
%conv.i.1 = zext i8 %1 to i32
40+
%3 = tail call i32 @llvm.umax.i32(i32 %conv.i.1, i32 1)
41+
%ss1 = add i32 %3, 1
42+
%ii1 = select i1 %cmp12.i, i32 %ss1, i32 %conv.i.1
43+
%conv14.i.1 = trunc i32 %ii1 to i8
44+
store i8 %conv14.i.1, ptr %idx11, align 1
45+
%conv.i.2 = zext i8 %1 to i32
46+
%4 = tail call i32 @llvm.umax.i32(i32 %conv.i.2, i32 1)
47+
%ss2 = add i32 %4, 1
48+
%ii2 = select i1 %cmp12.i, i32 %ss2, i32 %conv.i.2
49+
%conv14.i.2 = trunc i32 %ii2 to i8
50+
store i8 %conv14.i.2, ptr %idx22, align 1
51+
%conv.i.3 = zext i8 %1 to i32
52+
%5 = tail call i32 @llvm.umax.i32(i32 %conv.i.3, i32 1)
53+
%ss3 = add i32 %5, 1
54+
%ii3 = select i1 %cmp12.i, i32 %ss3, i32 %conv.i.3
55+
%conv14.i.3 = trunc i32 %ii3 to i8
56+
store i8 %conv14.i.3, ptr %idx33, align 1
57+
%conv.i.4 = zext i8 %1 to i32
58+
%6 = tail call i32 @llvm.umax.i32(i32 %conv.i.4, i32 1)
59+
%ss4 = add i32 %6, 1
60+
%ii4 = select i1 %cmp12.i, i32 %ss4, i32 %conv.i.4
61+
%conv14.i.4 = trunc i32 %ii4 to i8
62+
store i8 %conv14.i.4, ptr %idx44, align 1
63+
%conv.i.5 = zext i8 %1 to i32
64+
%7 = tail call i32 @llvm.umax.i32(i32 %conv.i.5, i32 1)
65+
%ss5 = add i32 %7, 1
66+
%ii5 = select i1 %cmp12.i, i32 %ss5, i32 %conv.i.5
67+
%conv14.i.5 = trunc i32 %ii5 to i8
68+
store i8 %conv14.i.5, ptr %idx55, align 1
69+
%conv.i.6 = zext i8 %1 to i32
70+
%8 = tail call i32 @llvm.umax.i32(i32 %conv.i.6, i32 1)
71+
%ss6 = add i32 %8, 1
72+
%ii6 = select i1 %cmp12.i, i32 %ss6, i32 %conv.i.6
73+
%conv14.i.6 = trunc i32 %ii6 to i8
74+
store i8 %conv14.i.6, ptr %idx66, align 1
75+
%conv.i.7 = zext i8 %1 to i32
76+
%9 = tail call i32 @llvm.umax.i32(i32 %conv.i.7, i32 1)
77+
%ss7 = add i32 %9, 1
78+
%ii7 = select i1 %cmp12.i, i32 %ss7, i32 %conv.i.7
79+
%conv14.i.7 = trunc i32 %ii7 to i8
80+
store i8 %conv14.i.7, ptr %idx77, align 1
81+
br label %pre
82+
}

0 commit comments

Comments
 (0)