Skip to content

Commit 4652ec0

Browse files
[SLP] Delete vectorized users when tree contains an invalid cost (#86344)
1 parent 5d0d9eb commit 4652ec0

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15959,7 +15959,7 @@ class HorizontalReduction {
1595915959
LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost
1596015960
<< " for reduction\n");
1596115961
if (!Cost.isValid())
15962-
return nullptr;
15962+
break;
1596315963
if (Cost >= -SLPCostThreshold) {
1596415964
V.getORE()->emit([&]() {
1596515965
return OptimizationRemarkMissed(
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
3+
4+
target triple = "riscv64-unknown-linux-gnu"
5+
6+
define void @partial_vec_invalid_cost() #0 {
7+
; CHECK-LABEL: define void @partial_vec_invalid_cost(
8+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: entry:
10+
; CHECK-NEXT: [[LSHR_1:%.*]] = lshr i96 0, 0
11+
; CHECK-NEXT: [[LSHR_2:%.*]] = lshr i96 0, 0
12+
; CHECK-NEXT: [[TRUNC_I96_1:%.*]] = trunc i96 [[LSHR_1]] to i32
13+
; CHECK-NEXT: [[TRUNC_I96_2:%.*]] = trunc i96 [[LSHR_2]] to i32
14+
; CHECK-NEXT: [[TRUNC_I96_3:%.*]] = trunc i96 0 to i32
15+
; CHECK-NEXT: [[TRUNC_I96_4:%.*]] = trunc i96 0 to i32
16+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
17+
; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP0]], [[TRUNC_I96_2]]
18+
; CHECK-NEXT: [[OP_RDX1:%.*]] = or i32 [[TRUNC_I96_1]], [[TRUNC_I96_3]]
19+
; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX]], [[OP_RDX1]]
20+
; CHECK-NEXT: [[OP_RDX3:%.*]] = or i32 [[OP_RDX2]], [[TRUNC_I96_4]]
21+
; CHECK-NEXT: [[STORE_THIS:%.*]] = zext i32 [[OP_RDX3]] to i96
22+
; CHECK-NEXT: store i96 [[STORE_THIS]], ptr null, align 16
23+
; CHECK-NEXT: ret void
24+
;
25+
entry:
26+
27+
%lshr.1 = lshr i96 0, 0 ; These ops
28+
%lshr.2 = lshr i96 0, 0 ; return an
29+
%add.0 = add i96 0, 0 ; invalid
30+
%add.1 = add i96 0, 0 ; vector cost.
31+
32+
%trunc.i96.1 = trunc i96 %lshr.1 to i32 ; These ops
33+
%trunc.i96.2 = trunc i96 %lshr.2 to i32 ; return an
34+
%trunc.i96.3 = trunc i96 %add.0 to i32 ; invalid
35+
%trunc.i96.4 = trunc i96 %add.1 to i32 ; vector cost.
36+
37+
%or.0 = or i32 %trunc.i96.1, %trunc.i96.2
38+
%or.1 = or i32 %or.0, %trunc.i96.3
39+
%or.2 = or i32 %or.1, %trunc.i96.4
40+
41+
%zext.0 = zext i1 0 to i32 ; These
42+
%zext.1 = zext i1 0 to i32 ; ops
43+
%zext.2 = zext i1 0 to i32 ; are
44+
%zext.3 = zext i1 0 to i32 ; vectorized
45+
46+
%or.3 = or i32 %or.2, %zext.0 ; users
47+
%or.4 = or i32 %or.3, %zext.1 ; of
48+
%or.5 = or i32 %or.4, %zext.2 ; vectorized
49+
%or.6 = or i32 %or.5, %zext.3 ; ops
50+
51+
%store.this = zext i32 %or.6 to i96
52+
53+
store i96 %store.this, ptr null, align 16
54+
ret void
55+
}
56+
57+
attributes #0 = { "target-features"="+v" }

0 commit comments

Comments
 (0)