Skip to content

Commit 894935c

Browse files
[SLP]Represent SLP graph as a tree
We can stop using a graph representation of the SLP structure and switch directly to tree by relying on a single user of each tree node. If the node has multiple uses, other uses must be represented as a separate gather/buildvector node, which then will be combined with the existing vectorized node(s) uoon cost estimation/codegen. This allow to simplify inner structure and turn in some extra optimizations, which could not be turned on for the nodes with multi users (reordering, minbitwidth analysis). AVX512, -O3+LTO Metric: size..text results results0 diff test-suite :: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000.test 253453.00 254253.00 0.3% test-suite :: External/SPEC/CFP2006/444.namd/444.namd.test 251411.00 252051.00 0.3% test-suite :: SingleSource/Benchmarks/Misc/oourafft.test 19114.00 19146.00 0.2% test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1399200.00 1399520.00 0.0% test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1399200.00 1399520.00 0.0% test-suite :: MicroBenchmarks/LCALS/SubsetALambdaLoops/lcalsALambda.test 304310.00 304326.00 0.0% test-suite :: MicroBenchmarks/LCALS/SubsetARawLoops/lcalsARaw.test 304662.00 304678.00 0.0% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12566919.00 12567511.00 0.0% test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test 1146300.00 1146316.00 0.0% test-suite :: External/SPEC/CFP2017rate/511.povray_r/511.povray_r.test 1159864.00 1159880.00 0.0% test-suite :: External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r.test 9407880.00 9407864.00 -0.0% test-suite :: External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s.test 9407880.00 9407864.00 -0.0% test-suite :: MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.test 1011612.00 1011596.00 -0.0% test-suite :: MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc.test 280584.00 280536.00 -0.0% test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.test 93016.00 93000.00 -0.0% ASCI_Purple/SMG2000 - extra code vectorized, small variations CFP2006/444.namd - small variations, less shuffles Benchmarks/Misc/oourafft - small variations CFP2017rate/538.imagick_r CFP2017speed/638.imagick_s - small variations, less shuffles LCALS/SubsetALambdaLoops - less shuffles LCALS/SubsetARawLoops - less shuffles CFP2017rate/526.blender_r - small variations, extra vector code CFP2006/453.povray - small variations CFP2017rate/511.povray_r - small variations CINT2017rate/502.gcc_r CINT2017speed/602.gcc_s - small variations Benchmarks/tramp3d-v4 - small variations Prolangs-C/TimberWolfMC - small variations DOE-ProxyApps-C++/miniFE - extra code vectorized, small variations DOE-ProxyApps-C++/CLAMR - extra code vectorized, small variations ASCI_Purple/SMG2000 - no significant changes RISCV, -O3+LTO Metric: size..text results results0 diff test-suite :: SingleSource/Regression/C/gcc-c-torture/execute/GCC-C-execute-pr28982b.test 1812.00 1866.00 3.0% test-suite :: MultiSource/Benchmarks/Olden/health/health.test 3946.00 4016.00 1.8% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 513180.00 513550.00 0.1% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 513180.00 513550.00 0.1% test-suite :: External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r.test 7672198.00 7672202.00 0.0% test-suite :: External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s.test 7672198.00 7672202.00 0.0% test-suite :: External/SPEC/CFP2017rate/508.namd_r/508.namd_r.test 746060.00 746044.00 -0.0% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 9497716.00 9497364.00 -0.0% test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test 948266.00 948214.00 -0.0% test-suite :: External/SPEC/CFP2006/433.milc/433.milc.test 89874.00 89862.00 -0.0% test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test 835492.00 835346.00 -0.0% test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.test 66230.00 66202.00 -0.0% test-suite :: External/SPEC/CFP2017rate/511.povray_r/511.povray_r.test 946090.00 944206.00 -0.2% test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1136404.00 1131854.00 -0.4% test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1136404.00 1131854.00 -0.4% gcc-c-torture/execute/GCC-C-execute-pr28982b - better vector code Olden/health - extra vector code CINT2017speed/625.x264_s CINT2017rate/525.x264_r - small variation + improvements in reordering, @pixel_hadamard_ac stopped being vectorized because of some non-effective shuffle recognition by the compiler CINT2017rate/502.gcc_r CINT2017speed/602.gcc_s - small variations CFP2017rate/508.namd_r - small variations CFP2017rate/526.blender_r - small variations CFP2006/453.povray - extra vector code Benchmarks/7zip - extra vector code DOE-ProxyApps-C++/miniFE - small variations CFP2017rate/511.povray_r - extra vector code CFP2017speed/638.imagick_s CFP2017rate/538.imagick_r - extra vector code Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: #126771
1 parent 71af48f commit 894935c

26 files changed

+549
-500
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 459 additions & 400 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ target triple = "aarch64--linux"
1717
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
1818
; YAML-NEXT: - Cost: '-19'
1919
; YAML-NEXT: - String: ' and with tree size '
20-
; YAML-NEXT: - TreeSize: '8'
20+
; YAML-NEXT: - TreeSize: '10'
2121

2222
define i32 @test_select(ptr noalias nocapture readonly %blk1, ptr noalias nocapture readonly %blk2, i32 %lx, i32 %h) {
2323
; CHECK-LABEL: @test_select(
@@ -230,7 +230,7 @@ for.end: ; preds = %for.end.loopexit, %
230230
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
231231
; YAML-NEXT: - Cost: '-44'
232232
; YAML-NEXT: - String: ' and with tree size '
233-
; YAML-NEXT: - TreeSize: '10'
233+
; YAML-NEXT: - TreeSize: '12'
234234

235235
define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 {
236236
; CHECK-LABEL: @test_unrolled_select(

llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define void @f(ptr %r, ptr %w) {
77
%add0 = fadd double %f0, %f0
88
%add1 = fadd double %f1, %f1
99
%w1 = getelementptr inbounds double, ptr %w, i64 1
10-
; CHECK: remark: /tmp/s.c:5:10: Stores SLP vectorized with cost -3 and with tree size 3
10+
; CHECK: remark: /tmp/s.c:5:10: Stores SLP vectorized with cost -3 and with tree size 4
1111
store double %add0, ptr %w, !dbg !9
1212
store double %add1, ptr %w1
1313
ret void

llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ define i32 @foo(i32 %v1, double %v2, i1 %arg, i32 %arg2) {
66
; CHECK-NEXT: entry:
77
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 undef>, i32 [[V1:%.*]], i32 0
88
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <2 x i32> [[TMP0]] to <2 x double>
9-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
9+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
1010
; CHECK-NEXT: br label [[FOR_COND15_PREHEADER:%.*]]
1111
; CHECK: for.cond15.preheader:
1212
; CHECK-NEXT: br label [[IF_END:%.*]]
@@ -15,7 +15,7 @@ define i32 @foo(i32 %v1, double %v2, i1 %arg, i32 %arg2) {
1515
; CHECK: if.end:
1616
; CHECK-NEXT: br label [[FOR_COND15:%.*]]
1717
; CHECK: for.end39:
18-
; CHECK-NEXT: switch i32 %arg2, label [[DO_BODY:%.*]] [
18+
; CHECK-NEXT: switch i32 [[ARG2:%.*]], label [[DO_BODY:%.*]] [
1919
; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
2020
; CHECK-NEXT: i32 1, label [[SW_BB195:%.*]]
2121
; CHECK-NEXT: ]
@@ -26,8 +26,7 @@ define i32 @foo(i32 %v1, double %v2, i1 %arg, i32 %arg2) {
2626
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
2727
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, ptr [[ARRAYIDX43]], align 8
2828
; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP5]]
29-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
30-
; CHECK-NEXT: [[TMP9:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> [[TMP2]], <4 x double> [[TMP8]])
29+
; CHECK-NEXT: [[TMP9:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> [[TMP2]], <4 x double> [[TMP7]])
3130
; CHECK-NEXT: br label [[SW_EPILOG:%.*]]
3231
; CHECK: sw.bb195:
3332
; CHECK-NEXT: br label [[SW_EPILOG]]
@@ -39,7 +38,7 @@ define i32 @foo(i32 %v1, double %v2, i1 %arg, i32 %arg2) {
3938
; CHECK: if.end.1:
4039
; CHECK-NEXT: br label [[FOR_COND15_1:%.*]]
4140
; CHECK: for.cond15.1:
42-
; CHECK-NEXT: br i1 %arg, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]]
41+
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]]
4342
;
4443
entry:
4544
%conv = sitofp i32 undef to double

llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,15 +311,15 @@ define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) {
311311
; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5
312312
; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
313313
; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
314-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 0, i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 1>
315-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 2, i32 1, i32 0, i32 2, i32 1>
316314
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 1>
315+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 0, i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 1>
317316
; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]]
318317
; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]]
319318
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>
320319
; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP3]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
321320
; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8
322321
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 6, i32 7, i32 8, i32 0, i32 1, i32 2, i32 3, i32 4>
322+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 2, i32 1, i32 0, i32 2, i32 1>
323323
; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]]
324324
; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]]
325325
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>
@@ -401,10 +401,10 @@ define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) {
401401
; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5
402402
; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
403403
; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
404-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 2, i32 1, i32 0, i32 2>
404+
; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
405405
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 4, i32 3, i32 6, i32 5, i32 8, i32 7, i32 1, i32 0>
406+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 2, i32 1, i32 0, i32 2>
406407
; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]]
407-
; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
408408
; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]]
409409
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>
410410
; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP3]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
@@ -491,16 +491,16 @@ define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) {
491491
; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4
492492
; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
493493
; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
494-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 0, i32 2, i32 1, i32 2, i32 1, i32 0, i32 2, i32 1>
495-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 1, i32 0>
496494
; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
497495
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 4, i32 3, i32 5, i32 6, i32 8, i32 7, i32 1, i32 0>
496+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 0, i32 2, i32 1, i32 2, i32 1, i32 0, i32 2, i32 1>
498497
; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]]
499498
; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]]
500499
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>
501500
; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP3]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
502501
; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8
503502
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 7, i32 6, i32 8, i32 1, i32 0, i32 3, i32 2, i32 5>
503+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 1, i32 0>
504504
; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]]
505505
; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]]
506506
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>

llvm/test/Transforms/SLPVectorizer/RISCV/remarks_cmp_sel_min_max.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
1111
; YAML-NEXT: - Cost: '-1'
1212
; YAML-NEXT: - String: ' and with tree size '
13-
; YAML-NEXT: - TreeSize: '6'
13+
; YAML-NEXT: - TreeSize: '7'
1414
define i32 @min_double(ptr noalias nocapture %A, ptr noalias nocapture %B) {
1515
; CHECK-LABEL: @min_double(
1616
; CHECK-NEXT: entry:
@@ -44,7 +44,7 @@ entry:
4444
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
4545
; YAML-NEXT: - Cost: '-2'
4646
; YAML-NEXT: - String: ' and with tree size '
47-
; YAML-NEXT: - TreeSize: '6'
47+
; YAML-NEXT: - TreeSize: '7'
4848
define i32 @min_float(ptr noalias nocapture %A, ptr noalias nocapture %B) {
4949
; CHECK-LABEL: @min_float(
5050
; CHECK-NEXT: entry:
@@ -78,7 +78,7 @@ entry:
7878
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
7979
; YAML-NEXT: - Cost: '-1'
8080
; YAML-NEXT: - String: ' and with tree size '
81-
; YAML-NEXT: - TreeSize: '6'
81+
; YAML-NEXT: - TreeSize: '7'
8282
define i32 @max_double(ptr noalias nocapture %A, ptr noalias nocapture %B) {
8383
; CHECK-LABEL: @max_double(
8484
; CHECK-NEXT: entry:
@@ -112,7 +112,7 @@ entry:
112112
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
113113
; YAML-NEXT: - Cost: '-2'
114114
; YAML-NEXT: - String: ' and with tree size '
115-
; YAML-NEXT: - TreeSize: '6'
115+
; YAML-NEXT: - TreeSize: '7'
116116
define i32 @max_float(ptr noalias nocapture %A, ptr noalias nocapture %B) {
117117
; CHECK-LABEL: @max_float(
118118
; CHECK-NEXT: entry:

llvm/test/Transforms/SLPVectorizer/RISCV/small-phi-tree.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,19 @@
44
define float @test(ptr %call78) {
55
; CHECK-LABEL: @test(
66
; CHECK-NEXT: entry:
7-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> <ptr null, ptr poison>, ptr [[CALL78:%.*]], i32 1
7+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[CALL78:%.*]], i32 0
88
; CHECK-NEXT: br label [[FOR_BODY194:%.*]]
99
; CHECK: for.body194:
1010
; CHECK-NEXT: [[INDVARS_IV132:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[FOR_BODY194]] ]
1111
; CHECK-NEXT: [[CURRENTW_031:%.*]] = phi ptr [ [[CALL78]], [[ENTRY]] ], [ [[PREVIOUSW_030:%.*]], [[FOR_BODY194]] ]
1212
; CHECK-NEXT: [[PREVIOUSW_030]] = phi ptr [ null, [[ENTRY]] ], [ [[CURRENTW_031]], [[FOR_BODY194]] ]
1313
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP0]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY194]] ]
14+
; CHECK-NEXT: [[TMP3]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> <i32 1, i32 0>
1415
; CHECK-NEXT: store float 0.000000e+00, ptr [[CURRENTW_031]], align 4
1516
; CHECK-NEXT: tail call void null(ptr [[PREVIOUSW_030]], ptr null, ptr null, i32 0, i32 0, ptr null, ptr null, i32 0)
16-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[CURRENTW_031]], i32 0
17-
; CHECK-NEXT: [[TMP3]] = insertelement <2 x ptr> [[TMP2]], ptr [[PREVIOUSW_030]], i32 1
1817
; CHECK-NEXT: br i1 false, label [[FOR_END286_LOOPEXIT:%.*]], label [[FOR_BODY194]]
1918
; CHECK: for.end286.loopexit:
20-
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x ptr> [ [[TMP1]], [[FOR_BODY194]] ]
19+
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x ptr> [ [[TMP3]], [[FOR_BODY194]] ]
2120
; CHECK-NEXT: ret float 0.000000e+00
2221
;
2322
entry:

llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@ define void @fun(ptr nocapture, i32 zeroext) local_unnamed_addr #0 {
3131
._crit_edge: ; preds = %.lr.ph
3232
ret void
3333

34-
; CHECK: SLP: Adding cost -1 for bundle Idx: 3, n=2 [ %4 = icmp ult i32 %2, %1, ..]
34+
; CHECK: SLP: Adding cost -1 for bundle Idx: 4, n=2 [ %4 = icmp ult i32 %2, %1, ..]
3535
}
3636

llvm/test/Transforms/SLPVectorizer/X86/cmp-after-intrinsic-call-minbitwidth.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@ define void @test() {
55
; CHECK-LABEL: define void @test(
66
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i2> @llvm.smin.v2i2(<2 x i2> zeroinitializer, <2 x i2> zeroinitializer)
9-
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> zeroinitializer, <2 x i2> zeroinitializer, <2 x i2> [[TMP0]]
10-
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i2> [[TMP1]], zeroinitializer
11-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i2> [[TMP2]], i32 1
12-
; CHECK-NEXT: [[ADD:%.*]] = zext i2 [[TMP3]] to i32
8+
; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> zeroinitializer, <2 x i32> zeroinitializer)
9+
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> [[TMP0]]
10+
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], zeroinitializer
11+
; CHECK-NEXT: [[ADD:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
1312
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[ADD]], 0
14-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i2> [[TMP2]], i32 0
15-
; CHECK-NEXT: [[ADD45:%.*]] = zext i2 [[TMP5]] to i32
13+
; CHECK-NEXT: [[ADD45:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
1614
; CHECK-NEXT: [[ADD152:%.*]] = or i32 [[ADD45]], [[ADD]]
1715
; CHECK-NEXT: [[IDXPROM153:%.*]] = sext i32 [[ADD152]] to i64
1816
; CHECK-NEXT: [[ARRAYIDX154:%.*]] = getelementptr i8, ptr null, i64 [[IDXPROM153]]

llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define void @test() {
3131
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
3232
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> <i32 poison, i32 0>
3333
; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0
34-
; CHECK-NEXT: [[TMP10]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
34+
; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
3535
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
3636
;
3737
entry:

llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ define i32 @test(ptr %c, i16 %a, i16 %0) {
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
99
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> zeroinitializer
1010
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
11-
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i16>
11+
; CHECK-NEXT: [[TMP16:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8>
1212
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <2 x i32> <i32 poison, i32 0>
1313
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[TMP0]], i32 0
1414
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
15+
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP16]] to <4 x i16>
1516
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i16> [[TMP7]], [[TMP4]]
1617
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i16 [[A]], -2
1718
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison>

llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ define i32 @test() {
55
; CHECK-LABEL: define i32 @test() {
66
; CHECK-NEXT: [[ENTRY:.*:]]
77
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr null, align 16
8-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> <i32 1, i32 2, i32 2, i32 3, i32 3, i32 3, i32 2, i32 1>
98
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP0]], i32 1
109
; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], 0
1110
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
@@ -14,6 +13,7 @@ define i32 @test() {
1413
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP4]], <4 x i64> [[TMP0]], i64 0)
1514
; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i32>
1615
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5>
16+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> <i32 1, i32 2, i32 2, i32 3, i32 3, i32 3, i32 2, i32 1>
1717
; CHECK-NEXT: [[TMP14:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
1818
; CHECK-NEXT: [[TMP15:%.*]] = add <8 x i32> [[TMP14]], zeroinitializer
1919
; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i32> [[TMP7]], zeroinitializer

0 commit comments

Comments
 (0)