1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
- ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s
2
+ ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE
3
+ ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s --check-prefixes=CHECK,AVX
3
4
;
4
5
; This file tests the look-ahead operand reordering heuristic.
5
6
;
@@ -498,19 +499,37 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl
498
499
499
500
500
501
define i1 @ExtractIdxNotConstantInt1 (float %a , float %b , float %c , <4 x float > %vec , i64 %idx2 ) {
501
- ; CHECK-LABEL: @ExtractIdxNotConstantInt1(
502
- ; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 undef
503
- ; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
504
- ; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
505
- ; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
506
- ; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
507
- ; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
508
- ; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
509
- ; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
510
- ; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
511
- ; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
512
- ; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
513
- ; CHECK-NEXT: ret i1 [[CMP_I185]]
502
+ ; SSE-LABEL: @ExtractIdxNotConstantInt1(
503
+ ; SSE-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 undef
504
+ ; SSE-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
505
+ ; SSE-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
506
+ ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
507
+ ; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[C:%.*]], i32 1
508
+ ; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[SUB14_I167]], i32 0
509
+ ; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_I276_I169]], i32 1
510
+ ; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
511
+ ; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+01>, float [[B:%.*]], i32 0
512
+ ; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP5]], [[TMP6]]
513
+ ; SSE-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP7]], <float 1.000000e+01, float 2.000000e+00>
514
+ ; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
515
+ ; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
516
+ ; SSE-NEXT: [[MUL123_I184:%.*]] = fmul float [[TMP9]], [[TMP10]]
517
+ ; SSE-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
518
+ ; SSE-NEXT: ret i1 [[CMP_I185]]
519
+ ;
520
+ ; AVX-LABEL: @ExtractIdxNotConstantInt1(
521
+ ; AVX-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 undef
522
+ ; AVX-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
523
+ ; AVX-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
524
+ ; AVX-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
525
+ ; AVX-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
526
+ ; AVX-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
527
+ ; AVX-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
528
+ ; AVX-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
529
+ ; AVX-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
530
+ ; AVX-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
531
+ ; AVX-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
532
+ ; AVX-NEXT: ret i1 [[CMP_I185]]
514
533
;
515
534
%vecext.i291.i166 = extractelement <4 x float > %vec , i64 undef
516
535
%sub14.i167 = fsub float undef , %vecext.i291.i166
@@ -528,19 +547,37 @@ define i1 @ExtractIdxNotConstantInt1(float %a, float %b, float %c, <4 x float> %
528
547
529
548
530
549
define i1 @ExtractIdxNotConstantInt2 (float %a , float %b , float %c , <4 x float > %vec , i64 %idx2 ) {
531
- ; CHECK-LABEL: @ExtractIdxNotConstantInt2(
532
- ; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 1
533
- ; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
534
- ; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
535
- ; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
536
- ; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
537
- ; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
538
- ; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
539
- ; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
540
- ; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
541
- ; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
542
- ; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
543
- ; CHECK-NEXT: ret i1 [[CMP_I185]]
550
+ ; SSE-LABEL: @ExtractIdxNotConstantInt2(
551
+ ; SSE-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 1
552
+ ; SSE-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
553
+ ; SSE-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
554
+ ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
555
+ ; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[C:%.*]], i32 1
556
+ ; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[SUB14_I167]], i32 0
557
+ ; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_I276_I169]], i32 1
558
+ ; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
559
+ ; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+01>, float [[B:%.*]], i32 0
560
+ ; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP5]], [[TMP6]]
561
+ ; SSE-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP7]], <float 1.000000e+01, float 2.000000e+00>
562
+ ; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
563
+ ; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
564
+ ; SSE-NEXT: [[MUL123_I184:%.*]] = fmul float [[TMP9]], [[TMP10]]
565
+ ; SSE-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
566
+ ; SSE-NEXT: ret i1 [[CMP_I185]]
567
+ ;
568
+ ; AVX-LABEL: @ExtractIdxNotConstantInt2(
569
+ ; AVX-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 1
570
+ ; AVX-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
571
+ ; AVX-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
572
+ ; AVX-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
573
+ ; AVX-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
574
+ ; AVX-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
575
+ ; AVX-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
576
+ ; AVX-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
577
+ ; AVX-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
578
+ ; AVX-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
579
+ ; AVX-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
580
+ ; AVX-NEXT: ret i1 [[CMP_I185]]
544
581
;
545
582
%vecext.i291.i166 = extractelement <4 x float > %vec , i64 1
546
583
%sub14.i167 = fsub float undef , %vecext.i291.i166
@@ -558,19 +595,37 @@ define i1 @ExtractIdxNotConstantInt2(float %a, float %b, float %c, <4 x float> %
558
595
559
596
560
597
define i1 @foo (float %a , float %b , float %c , <4 x float > %vec , i64 %idx2 ) {
561
- ; CHECK-LABEL: @foo(
562
- ; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
563
- ; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
564
- ; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
565
- ; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
566
- ; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 1
567
- ; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
568
- ; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
569
- ; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
570
- ; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
571
- ; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
572
- ; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
573
- ; CHECK-NEXT: ret i1 [[CMP_I185]]
598
+ ; SSE-LABEL: @foo(
599
+ ; SSE-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
600
+ ; SSE-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
601
+ ; SSE-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 1
602
+ ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
603
+ ; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[C:%.*]], i32 1
604
+ ; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[SUB14_I167]], i32 0
605
+ ; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_I276_I169]], i32 1
606
+ ; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
607
+ ; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+01>, float [[B:%.*]], i32 0
608
+ ; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP5]], [[TMP6]]
609
+ ; SSE-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP7]], <float 1.000000e+01, float 2.000000e+00>
610
+ ; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
611
+ ; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
612
+ ; SSE-NEXT: [[MUL123_I184:%.*]] = fmul float [[TMP9]], [[TMP10]]
613
+ ; SSE-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
614
+ ; SSE-NEXT: ret i1 [[CMP_I185]]
615
+ ;
616
+ ; AVX-LABEL: @foo(
617
+ ; AVX-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
618
+ ; AVX-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
619
+ ; AVX-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
620
+ ; AVX-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
621
+ ; AVX-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 1
622
+ ; AVX-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
623
+ ; AVX-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
624
+ ; AVX-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
625
+ ; AVX-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
626
+ ; AVX-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
627
+ ; AVX-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
628
+ ; AVX-NEXT: ret i1 [[CMP_I185]]
574
629
;
575
630
%vecext.i291.i166 = extractelement <4 x float > %vec , i64 0
576
631
%sub14.i167 = fsub float undef , %vecext.i291.i166
0 commit comments