Skip to content

Commit 6fdc2ce

Browse files
committed
[SLP]Fix PR77916: transform the whole mask, not only the elements for
the second vector. Need to transform all elements in the long mask, if we decided to produce shorter version, some elements may still have incorrect inifices after transformation for the first vector in the permutation.
1 parent 5dbf178 commit 6fdc2ce

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7379,6 +7379,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
73797379
continue;
73807380
if (Idx >= static_cast<int>(CommonVF))
73817381
Idx = E1Mask[Idx - CommonVF] + VF;
7382+
else
7383+
Idx = E1Mask[Idx];
73827384
}
73837385
CommonVF = VF;
73847386
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-10 < %s | FileCheck %s
3+
4+
define i32 @test() {
5+
; CHECK-LABEL: define i32 @test() {
6+
; CHECK-NEXT: bb:
7+
; CHECK-NEXT: br label [[BB1:%.*]]
8+
; CHECK: bb1:
9+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB3:%.*]] ], [ zeroinitializer, [[BB:%.*]] ]
10+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1>
11+
; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3]]
12+
; CHECK: bb3:
13+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
14+
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]]
15+
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]]
16+
; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
17+
; CHECK-NEXT: br label [[BB1]]
18+
; CHECK: bb4:
19+
; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]
20+
; CHECK-NEXT: ret i32 0
21+
;
22+
bb:
23+
br label %bb1
24+
25+
bb1:
26+
%phi = phi i32 [ %or, %bb3 ], [ 0, %bb ]
27+
%phi2 = phi i32 [ %add, %bb3 ], [ 0, %bb ]
28+
br i1 false, label %bb4, label %bb3
29+
30+
bb3:
31+
%or = or i32 0, %phi
32+
%add = add i32 0, 0
33+
br label %bb1
34+
35+
bb4:
36+
%phi5 = phi i32 [ %phi2, %bb1 ]
37+
%phi6 = phi i32 [ %phi2, %bb1 ]
38+
%phi7 = phi i32 [ %phi2, %bb1 ]
39+
%phi8 = phi i32 [ %phi2, %bb1 ]
40+
%phi9 = phi i32 [ %phi2, %bb1 ]
41+
%phi10 = phi i32 [ %phi2, %bb1 ]
42+
%phi11 = phi i32 [ %phi, %bb1 ]
43+
%phi12 = phi i32 [ %phi, %bb1 ]
44+
ret i32 0
45+
}

0 commit comments

Comments
 (0)