Skip to content

Commit d65cc85

Browse files
committed
[SLP]Do not schedule instructions with constants/argument/phi operands and external users.
No need to schedule entry nodes where all instructions are not memory read/write instructions and their operands are either constants, or arguments, or phis, or instructions from others blocks, or their users are phis or from the other blocks. The resulting vector instructions can be placed at the beginning of the basic block without scheduling (if operands does not need to be scheduled) or at the end of the block (if users are outside of the block). It may save some compile time and scheduling resources. Differential Revision: https://reviews.llvm.org/D121121
1 parent 58b6521 commit d65cc85

File tree

13 files changed

+320
-225
lines changed

13 files changed

+320
-225
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 174 additions & 36 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture re
3636
; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
3737
; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
3838
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi i16* [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
39+
; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
3940
; GENERIC-NEXT: [[TMP0:%.*]] = bitcast i16* [[A_ADDR_0101]] to <8 x i16>*
4041
; GENERIC-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
4142
; GENERIC-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
@@ -85,7 +86,6 @@ define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture re
8586
; GENERIC-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX55]], align 2
8687
; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP27]] to i32
8788
; GENERIC-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
88-
; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
8989
; GENERIC-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
9090
; GENERIC-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
9191
; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP29]]
@@ -111,6 +111,7 @@ define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture re
111111
; KRYO-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
112112
; KRYO-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
113113
; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi i16* [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
114+
; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
114115
; KRYO-NEXT: [[TMP0:%.*]] = bitcast i16* [[A_ADDR_0101]] to <8 x i16>*
115116
; KRYO-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
116117
; KRYO-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
@@ -160,7 +161,6 @@ define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture re
160161
; KRYO-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX55]], align 2
161162
; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP27]] to i32
162163
; KRYO-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
163-
; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
164164
; KRYO-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
165165
; KRYO-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
166166
; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP29]]
@@ -297,6 +297,7 @@ define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture re
297297
; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
298298
; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
299299
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi i16* [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
300+
; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
300301
; GENERIC-NEXT: [[TMP0:%.*]] = bitcast i16* [[A_ADDR_0101]] to <8 x i16>*
301302
; GENERIC-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
302303
; GENERIC-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
@@ -346,7 +347,6 @@ define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture re
346347
; GENERIC-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX55]], align 2
347348
; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP27]] to i32
348349
; GENERIC-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
349-
; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
350350
; GENERIC-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
351351
; GENERIC-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
352352
; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP29]]
@@ -372,6 +372,7 @@ define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture re
372372
; KRYO-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
373373
; KRYO-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
374374
; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi i16* [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
375+
; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
375376
; KRYO-NEXT: [[TMP0:%.*]] = bitcast i16* [[A_ADDR_0101]] to <8 x i16>*
376377
; KRYO-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
377378
; KRYO-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
@@ -421,7 +422,6 @@ define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture re
421422
; KRYO-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX55]], align 2
422423
; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP27]] to i32
423424
; KRYO-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
424-
; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
425425
; KRYO-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
426426
; KRYO-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
427427
; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP29]]

llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll

Lines changed: 12 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -35,41 +35,14 @@ define void @PR28330(i32 %n) {
3535
;
3636
; MAX-COST-LABEL: @PR28330(
3737
; MAX-COST-NEXT: entry:
38-
; MAX-COST-NEXT: [[P0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
39-
; MAX-COST-NEXT: [[P1:%.*]] = icmp eq i8 [[P0]], 0
40-
; MAX-COST-NEXT: [[P2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
41-
; MAX-COST-NEXT: [[P3:%.*]] = icmp eq i8 [[P2]], 0
42-
; MAX-COST-NEXT: [[P4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
43-
; MAX-COST-NEXT: [[P5:%.*]] = icmp eq i8 [[P4]], 0
44-
; MAX-COST-NEXT: [[P6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
45-
; MAX-COST-NEXT: [[P7:%.*]] = icmp eq i8 [[P6]], 0
46-
; MAX-COST-NEXT: [[P8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
47-
; MAX-COST-NEXT: [[P9:%.*]] = icmp eq i8 [[P8]], 0
48-
; MAX-COST-NEXT: [[P10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
49-
; MAX-COST-NEXT: [[P11:%.*]] = icmp eq i8 [[P10]], 0
50-
; MAX-COST-NEXT: [[P12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
51-
; MAX-COST-NEXT: [[P13:%.*]] = icmp eq i8 [[P12]], 0
52-
; MAX-COST-NEXT: [[P14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
53-
; MAX-COST-NEXT: [[P15:%.*]] = icmp eq i8 [[P14]], 0
38+
; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
39+
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
5440
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
5541
; MAX-COST: for.body:
56-
; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
57-
; MAX-COST-NEXT: [[P19:%.*]] = select i1 [[P1]], i32 -720, i32 -80
58-
; MAX-COST-NEXT: [[P20:%.*]] = add i32 [[P17]], [[P19]]
59-
; MAX-COST-NEXT: [[P21:%.*]] = select i1 [[P3]], i32 -720, i32 -80
60-
; MAX-COST-NEXT: [[P22:%.*]] = add i32 [[P20]], [[P21]]
61-
; MAX-COST-NEXT: [[P23:%.*]] = select i1 [[P5]], i32 -720, i32 -80
62-
; MAX-COST-NEXT: [[P24:%.*]] = add i32 [[P22]], [[P23]]
63-
; MAX-COST-NEXT: [[P25:%.*]] = select i1 [[P7]], i32 -720, i32 -80
64-
; MAX-COST-NEXT: [[P26:%.*]] = add i32 [[P24]], [[P25]]
65-
; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
66-
; MAX-COST-NEXT: [[P28:%.*]] = add i32 [[P26]], [[P27]]
67-
; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
68-
; MAX-COST-NEXT: [[P30:%.*]] = add i32 [[P28]], [[P29]]
69-
; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
70-
; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[P30]], [[P31]]
71-
; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
72-
; MAX-COST-NEXT: [[P34]] = add i32 [[P32]], [[P33]]
42+
; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
43+
; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
44+
; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
45+
; MAX-COST-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
7346
; MAX-COST-NEXT: br label [[FOR_BODY]]
7447
;
7548
entry:
@@ -139,30 +112,14 @@ define void @PR32038(i32 %n) {
139112
;
140113
; MAX-COST-LABEL: @PR32038(
141114
; MAX-COST-NEXT: entry:
142-
; MAX-COST-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <4 x i8>*), align 1
143-
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer
144-
; MAX-COST-NEXT: [[P8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
145-
; MAX-COST-NEXT: [[P9:%.*]] = icmp eq i8 [[P8]], 0
146-
; MAX-COST-NEXT: [[P10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
147-
; MAX-COST-NEXT: [[P11:%.*]] = icmp eq i8 [[P10]], 0
148-
; MAX-COST-NEXT: [[P12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
149-
; MAX-COST-NEXT: [[P13:%.*]] = icmp eq i8 [[P12]], 0
150-
; MAX-COST-NEXT: [[P14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
151-
; MAX-COST-NEXT: [[P15:%.*]] = icmp eq i8 [[P14]], 0
115+
; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
116+
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
152117
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
153118
; MAX-COST: for.body:
154-
; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
155-
; MAX-COST-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
156-
; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
157-
; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
158-
; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
159-
; MAX-COST-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], [[P27]]
160-
; MAX-COST-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[P29]]
161-
; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP5]], -5
162-
; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
163-
; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
164-
; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
165-
; MAX-COST-NEXT: [[P34]] = add i32 [[P32]], [[P33]]
119+
; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
120+
; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
121+
; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
122+
; MAX-COST-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5
166123
; MAX-COST-NEXT: br label [[FOR_BODY]]
167124
;
168125
entry:

llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ define void @patatino(i64 %n, i64 %i, %struct.S* %p) !dbg !7 {
1414
; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]]
1515
; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg [[DBG26:![0-9]+]]
1616
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]]
17-
; CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg [[DBG28:![0-9]+]]
17+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]]
18+
; CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg [[DBG29:![0-9]+]]
1819
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg [[DBG26]]
19-
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg [[DBG26]], !tbaa [[TBAA29:![0-9]+]]
20-
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG33:![0-9]+]]
20+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg [[DBG26]], !tbaa [[TBAA30:![0-9]+]]
2121
; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg [[DBG34:![0-9]+]]
2222
; CHECK-NEXT: [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg [[DBG35:![0-9]+]]
2323
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg [[DBG36:![0-9]+]]
24-
; CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[TMP2]], align 8, !dbg [[DBG36]], !tbaa [[TBAA29]]
24+
; CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[TMP2]], align 8, !dbg [[DBG36]], !tbaa [[TBAA30]]
2525
; CHECK-NEXT: ret void, !dbg [[DBG37:![0-9]+]]
2626
;
2727
entry:

llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ define void @test() #0 {
99
; CHECK: loop:
1010
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA1:%.*]], [[LOOP]] ]
1111
; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ]
12-
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
1312
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
1413
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
1514
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], <i64 3, i64 2, i64 1, i64 0>
1615
; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3
16+
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
1717
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
1818
; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32
1919
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP2]]

llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
1010
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
1111
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
1212
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
13-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
14-
; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4
15-
; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 -1>
16-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP4]])
13+
; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 -1>
14+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
15+
; CHECK-NEXT: store atomic i32 [[TMP4]], i32* [[VALS:%.*]] unordered, align 4
16+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP3]])
1717
; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
1818
; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16
1919
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[V44]], i32 0

llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ define void @exceed(double %0, double %1) {
2929
; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef
3030
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
3131
; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]]
32-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
33-
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]]
34-
; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
35-
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]]
32+
; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
33+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
34+
; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP10]]
35+
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]]
3636
; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef
3737
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1
3838
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[TMP7]], i32 0

llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 {
5858

5959
define void @test2(i64* %a, i64* %b) {
6060
; CHECK-LABEL: @test2(
61-
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 2
62-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
61+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A:%.*]], i32 0
6362
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1
6463
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 1, i64 3>
64+
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 2
6565
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
6666
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
6767
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8

0 commit comments

Comments
 (0)