Skip to content

Commit 63d8058

Browse files
authored
LoopVectorize: guard appending InstsToScalarize; fix bug (#88720)
In the process of collecting instructions to scalarize, LoopVectorize uses faulty reasoning whereby it also adds instructions that will be scalar after vectorization. If an instruction satisfies isScalarAfterVectorization() for the given VF, it should not be appended to InstsToScalarize. Add this extra guard, fixing a crash. Fixes #55096.
1 parent 61f4001 commit 63d8058

File tree

2 files changed

+50
-9
lines changed

2 files changed

+50
-9
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5814,7 +5814,8 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
58145814
// invalid scalarization costs.
58155815
// Do not apply discount logic if hacked cost is needed
58165816
// for emulated masked memrefs.
5817-
if (!VF.isScalable() && !useEmulatedMaskMemRefHack(&I, VF) &&
5817+
if (!isScalarAfterVectorization(&I, VF) && !VF.isScalable() &&
5818+
!useEmulatedMaskMemRefHack(&I, VF) &&
58185819
computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
58195820
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
58205821
// Remember that BB will remain after vectorization.

llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,48 @@
1-
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
2-
3-
; REQUIRES: asserts
4-
; XFAIL: *
5-
6-
target triple = "x86_64-apple-macosx"
7-
8-
; CHECK: vector.body
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -mtriple=x86_64-apple-macosx -passes=loop-vectorize,simplifycfg,dce -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
93

104
define void @test_pr55096(i64 %c, ptr %p) {
5+
; CHECK-LABEL: define void @test_pr55096(
6+
; CHECK-SAME: i64 [[C:%.*]], ptr [[P:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[C]], i64 0
9+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
10+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
11+
; CHECK: vector.body:
12+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
13+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 122, i64 123>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
14+
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i16
15+
; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DOTCAST]], 2008
16+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 6229, [[TMP0]]
17+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
18+
; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], <i1 true, i1 true>
19+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
20+
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
21+
; CHECK: pred.store.if:
22+
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0
23+
; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[TMP4]], 2008
24+
; CHECK-NEXT: [[TMP6:%.*]] = udiv i16 4943, [[TMP5]]
25+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[P]], i16 [[TMP6]]
26+
; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 2
27+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
28+
; CHECK: pred.store.continue:
29+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
30+
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
31+
; CHECK: pred.store.if2:
32+
; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 2008
33+
; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[TMP10]], 2008
34+
; CHECK-NEXT: [[TMP12:%.*]] = udiv i16 4943, [[TMP11]]
35+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[P]], i16 [[TMP12]]
36+
; CHECK-NEXT: store i16 0, ptr [[TMP13]], align 2
37+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
38+
; CHECK: pred.store.continue3:
39+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
40+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
41+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 340
42+
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
43+
; CHECK: exit:
44+
; CHECK-NEXT: ret void
45+
;
1146
entry:
1247
br label %loop.header
1348

@@ -32,3 +67,8 @@ loop.latch:
3267
exit:
3368
ret void
3469
}
70+
;.
71+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
72+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
73+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
74+
;.

0 commit comments

Comments
 (0)