|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2 | 2 | ; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
|
3 | 3 | ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
|
4 |
| -; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX |
| 4 | +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX2 |
5 | 5 | ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
|
6 | 6 |
|
7 | 7 | @arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
|
@@ -777,6 +777,25 @@ define i32 @maxi8_mutiple_uses(i32) {
|
777 | 777 | ; AVX-NEXT: store i32 [[TMP14]], i32* @var, align 8
|
778 | 778 | ; AVX-NEXT: ret i32 [[TMP13]]
|
779 | 779 | ;
|
| 780 | +; AVX2-LABEL: @maxi8_mutiple_uses( |
| 781 | +; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 |
| 782 | +; AVX2-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 |
| 783 | +; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] |
| 784 | +; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] |
| 785 | +; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 |
| 786 | +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 |
| 787 | +; AVX2-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]]) |
| 788 | +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]] |
| 789 | +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]] |
| 790 | +; AVX2-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]] |
| 791 | +; AVX2-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP10]], i32 [[TMP5]] |
| 792 | +; AVX2-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 |
| 793 | +; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[OP_EXTRA1]], [[TMP11]] |
| 794 | +; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[OP_EXTRA1]], i32 [[TMP11]] |
| 795 | +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP4]], i32 3, i32 4 |
| 796 | +; AVX2-NEXT: store i32 [[TMP14]], i32* @var, align 8 |
| 797 | +; AVX2-NEXT: ret i32 [[TMP13]] |
| 798 | +; |
780 | 799 | ; THRESH-LABEL: @maxi8_mutiple_uses(
|
781 | 800 | ; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
|
782 | 801 | ; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
|
@@ -829,6 +848,88 @@ define i32 @maxi8_mutiple_uses(i32) {
|
829 | 848 | ret i32 %23
|
830 | 849 | }
|
831 | 850 |
|
| 851 | +define i32 @maxi8_mutiple_uses2(i32) { |
| 852 | +; SSE-LABEL: @maxi8_mutiple_uses2( |
| 853 | +; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 |
| 854 | +; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 |
| 855 | +; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] |
| 856 | +; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] |
| 857 | +; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 |
| 858 | +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] |
| 859 | +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] |
| 860 | +; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 |
| 861 | +; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] |
| 862 | +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] |
| 863 | +; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 |
| 864 | +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] |
| 865 | +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] |
| 866 | +; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 |
| 867 | +; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] |
| 868 | +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] |
| 869 | +; SSE-NEXT: [[TMP18:%.*]] = select i1 [[TMP10]], i32 3, i32 4 |
| 870 | +; SSE-NEXT: store i32 [[TMP18]], i32* @var, align 8 |
| 871 | +; SSE-NEXT: ret i32 [[TMP17]] |
| 872 | +; |
| 873 | +; AVX-LABEL: @maxi8_mutiple_uses2( |
| 874 | +; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 |
| 875 | +; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) |
| 876 | +; AVX-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 |
| 877 | +; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] |
| 878 | +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]] |
| 879 | +; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 |
| 880 | +; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] |
| 881 | +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 [[TMP7]] |
| 882 | +; AVX-NEXT: [[TMP10:%.*]] = select i1 undef, i32 3, i32 4 |
| 883 | +; AVX-NEXT: store i32 [[TMP10]], i32* @var, align 8 |
| 884 | +; AVX-NEXT: ret i32 [[TMP9]] |
| 885 | +; |
| 886 | +; AVX2-LABEL: @maxi8_mutiple_uses2( |
| 887 | +; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 |
| 888 | +; AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) |
| 889 | +; AVX2-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 |
| 890 | +; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] |
| 891 | +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]] |
| 892 | +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 |
| 893 | +; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] |
| 894 | +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 [[TMP7]] |
| 895 | +; AVX2-NEXT: [[TMP10:%.*]] = select i1 undef, i32 3, i32 4 |
| 896 | +; AVX2-NEXT: store i32 [[TMP10]], i32* @var, align 8 |
| 897 | +; AVX2-NEXT: ret i32 [[TMP9]] |
| 898 | +; |
| 899 | +; THRESH-LABEL: @maxi8_mutiple_uses2( |
| 900 | +; THRESH-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 |
| 901 | +; THRESH-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) |
| 902 | +; THRESH-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 |
| 903 | +; THRESH-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] |
| 904 | +; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]] |
| 905 | +; THRESH-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 |
| 906 | +; THRESH-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] |
| 907 | +; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 [[TMP7]] |
| 908 | +; THRESH-NEXT: [[TMP10:%.*]] = select i1 undef, i32 3, i32 4 |
| 909 | +; THRESH-NEXT: store i32 [[TMP10]], i32* @var, align 8 |
| 910 | +; THRESH-NEXT: ret i32 [[TMP9]] |
| 911 | +; |
| 912 | + %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 |
| 913 | + %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 |
| 914 | + %4 = icmp sgt i32 %2, %3 |
| 915 | + %5 = select i1 %4, i32 %2, i32 %3 |
| 916 | + %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 |
| 917 | + %7 = icmp sgt i32 %5, %6 |
| 918 | + %8 = select i1 %7, i32 %5, i32 %6 |
| 919 | + %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 |
| 920 | + %10 = icmp sgt i32 %8, %9 |
| 921 | + %11 = select i1 %10, i32 %8, i32 %9 |
| 922 | + %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 |
| 923 | + %13 = icmp sgt i32 %11, %12 |
| 924 | + %14 = select i1 %13, i32 %11, i32 %12 |
| 925 | + %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 |
| 926 | + %16 = icmp sgt i32 %14, %15 |
| 927 | + %17 = select i1 %16, i32 %14, i32 %15 |
| 928 | + %18 = select i1 %10, i32 3, i32 4 |
| 929 | + store i32 %18, i32* @var, align 8 |
| 930 | + ret i32 %17 |
| 931 | +} |
| 932 | + |
832 | 933 | define i32 @maxi8_wrong_parent(i32) {
|
833 | 934 | ; SSE-LABEL: @maxi8_wrong_parent(
|
834 | 935 | ; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
|
@@ -876,6 +977,25 @@ define i32 @maxi8_wrong_parent(i32) {
|
876 | 977 | ; AVX-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP13]], i32 [[TMP5]]
|
877 | 978 | ; AVX-NEXT: ret i32 [[OP_EXTRA1]]
|
878 | 979 | ;
|
| 980 | +; AVX2-LABEL: @maxi8_wrong_parent( |
| 981 | +; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 |
| 982 | +; AVX2-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 |
| 983 | +; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] |
| 984 | +; AVX2-NEXT: br label [[PP:%.*]] |
| 985 | +; AVX2: pp: |
| 986 | +; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] |
| 987 | +; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 |
| 988 | +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 |
| 989 | +; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 |
| 990 | +; AVX2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]]) |
| 991 | +; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] |
| 992 | +; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] |
| 993 | +; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] |
| 994 | +; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] |
| 995 | +; AVX2-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] |
| 996 | +; AVX2-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP13]], i32 [[TMP5]] |
| 997 | +; AVX2-NEXT: ret i32 [[OP_EXTRA1]] |
| 998 | +; |
879 | 999 | ; THRESH-LABEL: @maxi8_wrong_parent(
|
880 | 1000 | ; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
|
881 | 1001 | ; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
|
|
0 commit comments