Skip to content

Commit 4aca20c

Browse files
committed
[SLP]Pre-cache the last instruction for all entries before vectorization
Need to pre-cache last instruction to avoid unexpected changes in the last instruction detection during the vectorization, caused by adding the new vector instructions, which add new uses and may affect the analysis.
1 parent e77ef7b commit 4aca20c

File tree

2 files changed

+98
-1
lines changed

2 files changed

+98
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18414,8 +18414,14 @@ Value *BoUpSLP::vectorizeTree(
1841418414
// need to rebuild it.
1841518415
EntryToLastInstruction.clear();
1841618416
// All blocks must be scheduled before any instructions are inserted.
18417-
for (auto &BSIter : BlocksSchedules) {
18417+
for (auto &BSIter : BlocksSchedules)
1841818418
scheduleBlock(BSIter.second.get());
18419+
// Cache last instructions for the nodes to avoid side effects, which may
18420+
// appear during vectorization, like extra uses, etc.
18421+
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
18422+
if (TE->isGather())
18423+
continue;
18424+
(void)getLastInstructionInBundle(TE.get());
1841918425
}
1842018426

1842118427
if (ReductionRoot)
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-generic-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(ptr %nExp, float %0, i1 %cmp, float %1) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[NEXP:%.*]], float [[TMP0:%.*]], i1 [[CMP:%.*]], float [[TMP1:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0x7FF8000000000000, float poison, float poison>, float [[TMP1]], i32 2
9+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
10+
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
11+
; CHECK: [[IF_THEN]]:
12+
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
13+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 3, i32 3>
14+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i32 0
15+
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer
16+
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
17+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP1]], i32 3
18+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
19+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
20+
; CHECK-NEXT: br label %[[IF_END]]
21+
; CHECK: [[IF_END]]:
22+
; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x float> [ [[TMP11]], %[[IF_THEN]] ], [ [[TMP3]], %[[ENTRY]] ]
23+
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x float> [ [[TMP8]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
24+
; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x float> [ zeroinitializer, %[[IF_THEN]] ], [ <float 0x7FF8000000000000, float 1.000000e+00>, %[[ENTRY]] ]
25+
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x float> [ [[TMP7]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
26+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> <float poison, float 0.000000e+00>, <2 x i32> <i32 1, i32 3>
27+
; CHECK-NEXT: [[TMP17:%.*]] = fmul <2 x float> [[TMP15]], [[TMP16]]
28+
; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x float> [[TMP13]], [[TMP14]]
29+
; CHECK-NEXT: [[TMP19:%.*]] = fmul <4 x float> [[TMP12]], zeroinitializer
30+
; CHECK-NEXT: [[CALL25:%.*]] = load volatile ptr, ptr null, align 8
31+
; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x float> [[TMP18]], [[TMP17]]
32+
; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x float> [[TMP20]], zeroinitializer
33+
; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x float> [[TMP21]], zeroinitializer
34+
; CHECK-NEXT: [[TMP23:%.*]] = fmul <4 x float> [[TMP19]], zeroinitializer
35+
; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x float> [[TMP19]], zeroinitializer
36+
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
37+
; CHECK-NEXT: [[TMP26:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> <float 0.000000e+00, float 1.000000e+00, float poison, float poison>, <2 x float> [[TMP22]], i64 2)
38+
; CHECK-NEXT: [[TMP27:%.*]] = fadd <4 x float> [[TMP25]], [[TMP26]]
39+
; CHECK-NEXT: store <4 x float> [[TMP27]], ptr [[CALL25]], align 4
40+
; CHECK-NEXT: ret void
41+
;
42+
entry:
43+
br i1 %cmp, label %if.then, label %if.end
44+
45+
if.then:
46+
%div.i41 = fmul float %0, 0.000000e+00
47+
%2 = load float, ptr %nExp, align 4
48+
%div.1.i.i = fmul float %2, 0.000000e+00
49+
%div.2.i.i = fmul float %0, 0.000000e+00
50+
br label %if.end
51+
52+
if.end:
53+
%3 = phi float [ %1, %if.then ], [ %0, %entry ]
54+
%4 = phi float [ 0.000000e+00, %if.then ], [ %1, %entry ]
55+
%5 = phi float [ 0.000000e+00, %if.then ], [ 0x7FF8000000000000, %entry ]
56+
%6 = phi float [ 0.000000e+00, %if.then ], [ 1.000000e+00, %entry ]
57+
%fa.sroa.9.0 = phi float [ %div.2.i.i, %if.then ], [ 0.000000e+00, %entry ]
58+
%fa.sroa.7.0 = phi float [ %div.1.i.i, %if.then ], [ 0.000000e+00, %entry ]
59+
%fa.sroa.0.0 = phi float [ %div.i41, %if.then ], [ 0.000000e+00, %entry ]
60+
%mul.1.i.i58 = fmul float %fa.sroa.7.0, %6
61+
%mul.2.i.i60 = fmul float %fa.sroa.9.0, %6
62+
%mul.1.i.i.i63 = fmul float %fa.sroa.0.0, %5
63+
%mul.2.i.i.i65 = fmul float %fa.sroa.0.0, 0.000000e+00
64+
%mul.i66 = fmul float %fa.sroa.0.0, 0.000000e+00
65+
%add.1.i.i = fadd float %mul.1.i.i58, %mul.1.i.i.i63
66+
%add.2.i.i = fadd float %mul.2.i.i60, %mul.2.i.i.i65
67+
%mul.1.i.i74 = fmul float %add.1.i.i, 0.000000e+00
68+
%mul.2.i.i76 = fmul float %add.2.i.i, 0.000000e+00
69+
%mul.i.i.i78 = fmul float %mul.i66, 0.000000e+00
70+
%add.1.i.i85 = fadd float %mul.1.i.i74, 0.000000e+00
71+
%add.2.i.i86 = fadd float %mul.2.i.i76, 0.000000e+00
72+
%mul.i.i.i97 = fmul float %5, 0.000000e+00
73+
%mul.1.i.i.i99 = fmul float %4, 0.000000e+00
74+
%mul.2.i.i.i101 = fmul float %3, 0.000000e+00
75+
%add.i.i103 = fadd float %mul.i.i.i97, 0.000000e+00
76+
%add.1.i.i104 = fadd float %mul.1.i.i.i99, 0.000000e+00
77+
%add.2.i.i105 = fadd float %mul.2.i.i.i101, 0.000000e+00
78+
%add = fadd float %mul.i.i.i78, 0.000000e+00
79+
%add.i = fadd float %add.i.i103, 1.000000e+00
80+
%add.1.i = fadd float %add.1.i.i104, %add.1.i.i85
81+
%add.2.i = fadd float %add.2.i.i105, %add.2.i.i86
82+
%call25 = load volatile ptr, ptr null, align 8
83+
store float %add, ptr %call25, align 4
84+
%__trans_tmp_29.sroa.5.0.call25.sroa_idx = getelementptr i8, ptr %call25, i64 4
85+
store float %add.i, ptr %__trans_tmp_29.sroa.5.0.call25.sroa_idx, align 4
86+
%__trans_tmp_29.sroa.6.0.call25.sroa_idx = getelementptr i8, ptr %call25, i64 8
87+
store float %add.1.i, ptr %__trans_tmp_29.sroa.6.0.call25.sroa_idx, align 4
88+
%__trans_tmp_29.sroa.7.0.call25.sroa_idx = getelementptr i8, ptr %call25, i64 12
89+
store float %add.2.i, ptr %__trans_tmp_29.sroa.7.0.call25.sroa_idx, align 4
90+
ret void
91+
}

0 commit comments

Comments
 (0)