Skip to content

Commit 197fb27

Browse files
authored
[AArch64][NFC] NFC for const vector as Instruction operand (#116790)
Current cost-modelling does not take into account cost of materializing const vector. This results in some cases, as the test shows, being vectorized but this may not always be profitable. Future patch will try to address this issue.
1 parent c4be13c commit 197fb27

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -mtriple=aarch64 -S %s | FileCheck %s %}
3+
4+
define <2 x float> @v2f32_diff_consts(float %a, float %b)
5+
; CHECK-LABEL: define <2 x float> @v2f32_diff_consts(
6+
; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
8+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B]], i32 1
9+
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], <float 2.200000e+01, float 2.300000e+01>
10+
; CHECK-NEXT: ret <2 x float> [[TMP3]]
11+
;
12+
{
13+
%1 = fmul float %a, 22.0
14+
%2 = fmul float %b, 23.0
15+
%3 = insertelement <2 x float> poison, float %1, i32 0
16+
%4 = insertelement <2 x float> %3, float %2, i32 1
17+
ret <2 x float> %4
18+
}
19+
20+
define <2 x float> @v2f32_const_splat(float %a, float %b)
21+
; CHECK-LABEL: define <2 x float> @v2f32_const_splat(
22+
; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) {
23+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
24+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B]], i32 1
25+
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], splat (float 2.200000e+01)
26+
; CHECK-NEXT: ret <2 x float> [[TMP3]]
27+
;
28+
{
29+
%1 = fmul float %a, 22.0
30+
%2 = fmul float %b, 22.0
31+
%3 = insertelement <2 x float> poison, float %1, i32 0
32+
%4 = insertelement <2 x float> %3, float %2, i32 1
33+
ret <2 x float> %4
34+
}
35+
36+
define <4 x double> @v4f64_illegal_type(double %a, double %b, double %c, double %d)
37+
; CHECK-LABEL: define <4 x double> @v4f64_illegal_type(
38+
; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
39+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
40+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[B]], i32 1
41+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[C]], i32 2
42+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[D]], i32 3
43+
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], <double 2.100000e+01, double 2.200000e+01, double 2.300000e+01, double 2.400000e+01>
44+
; CHECK-NEXT: ret <4 x double> [[TMP5]]
45+
;
46+
{
47+
%1 = fmul double %a, 21.0
48+
%2 = fmul double %b, 22.0
49+
%3 = fmul double %c, 23.0
50+
%4 = fmul double %d, 24.0
51+
%5 = insertelement <4 x double> poison, double %1, i32 0
52+
%6 = insertelement <4 x double> %5, double %2, i32 1
53+
%7 = insertelement <4 x double> %6, double %3, i32 2
54+
%8 = insertelement <4 x double> %7, double %4, i32 3
55+
ret <4 x double> %8
56+
}
57+
58+
define <2 x double> @v2f64_dup_const_vector_case1(double %a, double %b, double %c, double %d)
59+
; CHECK-LABEL: define <2 x double> @v2f64_dup_const_vector_case1(
60+
; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
61+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A]], i32 0
62+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B]], i32 1
63+
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 2.100000e+01, double 2.200000e+01>
64+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
65+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[D]], i32 1
66+
; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], <double 2.100000e+01, double 2.200000e+01>
67+
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]]
68+
; CHECK-NEXT: ret <2 x double> [[TMP7]]
69+
;
70+
{
71+
%1 = fmul double %a, 21.0
72+
%2 = fmul double %b, 22.0
73+
%3 = fmul double %c, 21.0
74+
%4 = fmul double %d, 22.0
75+
%5 = insertelement <2 x double> poison, double %1, i32 0
76+
%6 = insertelement <2 x double> %5, double %2, i32 1
77+
%7 = insertelement <2 x double> poison, double %3, i32 0
78+
%8 = insertelement <2 x double> %7, double %4, i32 1
79+
%9 = fadd <2 x double> %6, %8
80+
ret <2 x double> %9
81+
}
82+
83+
define <2 x double> @v2f64_dup_const_vector_case2(double %a, double %b, double %c, double %d)
84+
; CHECK-LABEL: define <2 x double> @v2f64_dup_const_vector_case2(
85+
; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
86+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A]], i32 0
87+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B]], i32 1
88+
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 2.100000e+01, double 2.200000e+01>
89+
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], <double 2.100000e+01, double 2.200000e+01>
90+
; CHECK-NEXT: ret <2 x double> [[TMP4]]
91+
;
92+
{
93+
%1 = fmul double %a, 21.0
94+
%2 = fmul double %b, 22.0
95+
%3 = fadd double %1, 21.0
96+
%4 = fadd double %2, 22.0
97+
%5 = insertelement <2 x double> poison, double %3, i32 0
98+
%6 = insertelement <2 x double> %5, double %4, i32 1
99+
ret <2 x double> %6
100+
}

0 commit comments

Comments
 (0)