@@ -95,10 +95,94 @@ define <16 x i8> @vpperm_shuffle_general(<16 x i8> %a0, <16 x i8> %a1) {
95
95
; VPERMIL2
96
96
;
97
97
98
- declare <2 x double > @llvm.x86.xop.vpermil2pd (<2 x double >, <2 x double >, <2 x double >, i8 ) nounwind readnone
99
- declare <4 x double > @llvm.x86.xop.vpermil2pd.256 (<4 x double >, <4 x double >, <4 x double >, i8 ) nounwind readnone
98
+ define <2 x double > @vpermil2pd_21 (<2 x double > %a0 , <2 x double > %a1 ) {
99
+ ; X32-LABEL: vpermil2pd_21:
100
+ ; X32: # BB#0:
101
+ ; X32-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
102
+ ; X32-NEXT: retl
103
+ ;
104
+ ; X64-LABEL: vpermil2pd_21:
105
+ ; X64: # BB#0:
106
+ ; X64-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
107
+ ; X64-NEXT: retq
108
+ %1 = call <2 x double > @llvm.x86.xop.vpermil2pd (<2 x double > %a0 , <2 x double > %a1 , <2 x i64 > <i64 4 , i64 2 >, i8 0 )
109
+ ret <2 x double > %1
110
+ }
111
+
112
+ define <4 x double > @vpermil2pd256_0062 (<4 x double > %a0 , <4 x double > %a1 ) {
113
+ ; X32-LABEL: vpermil2pd256_0062:
114
+ ; X32: # BB#0:
115
+ ; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
116
+ ; X32-NEXT: retl
117
+ ;
118
+ ; X64-LABEL: vpermil2pd256_0062:
119
+ ; X64: # BB#0:
120
+ ; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
121
+ ; X64-NEXT: retq
122
+ %1 = call <4 x double > @llvm.x86.xop.vpermil2pd.256 (<4 x double > %a0 , <4 x double > %a1 , <4 x i64 > <i64 0 , i64 0 , i64 4 , i64 0 >, i8 0 )
123
+ ret <4 x double > %1
124
+ }
125
+
126
+ define <4 x double > @vpermil2pd256_zz73 (<4 x double > %a0 , <4 x double > %a1 ) {
127
+ ; X32-LABEL: vpermil2pd256_zz73:
128
+ ; X32: # BB#0:
129
+ ; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
130
+ ; X32-NEXT: retl
131
+ ;
132
+ ; X64-LABEL: vpermil2pd256_zz73:
133
+ ; X64: # BB#0:
134
+ ; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
135
+ ; X64-NEXT: retq
136
+ %1 = call <4 x double > @llvm.x86.xop.vpermil2pd.256 (<4 x double > %a0 , <4 x double > %a1 , <4 x i64 > <i64 0 , i64 0 , i64 14 , i64 10 >, i8 3 )
137
+ ret <4 x double > %1
138
+ }
139
+
140
+ define <4 x float > @vpermil2ps_0561 (<4 x float > %a0 , <4 x float > %a1 ) {
141
+ ; X32-LABEL: vpermil2ps_0561:
142
+ ; X32: # BB#0:
143
+ ; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
144
+ ; X32-NEXT: retl
145
+ ;
146
+ ; X64-LABEL: vpermil2ps_0561:
147
+ ; X64: # BB#0:
148
+ ; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
149
+ ; X64-NEXT: retq
150
+ %1 = call <4 x float > @llvm.x86.xop.vpermil2ps (<4 x float > %a0 , <4 x float > %a1 , <4 x i32 > <i32 0 , i32 5 , i32 6 , i32 1 >, i8 0 )
151
+ ret <4 x float > %1
152
+ }
153
+
154
+ define <8 x float > @vpermil2ps256_098144FE (<8 x float > %a0 , <8 x float > %a1 ) {
155
+ ; X32-LABEL: vpermil2ps256_098144FE:
156
+ ; X32: # BB#0:
157
+ ; X32-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
158
+ ; X32-NEXT: retl
159
+ ;
160
+ ; X64-LABEL: vpermil2ps256_098144FE:
161
+ ; X64: # BB#0:
162
+ ; X64-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
163
+ ; X64-NEXT: retq
164
+ %1 = call <8 x float > @llvm.x86.xop.vpermil2ps.256 (<8 x float > %a0 , <8 x float > %a1 , <8 x i32 > <i32 0 , i32 5 , i32 4 , i32 1 , i32 0 , i32 0 , i32 7 , i32 6 >, i8 0 )
165
+ ret <8 x float > %1
166
+ }
167
+
168
+ define <8 x float > @vpermil2ps256_0zz8BzzA (<8 x float > %a0 , <8 x float > %a1 ) {
169
+ ; X32-LABEL: vpermil2ps256_0zz8BzzA:
170
+ ; X32: # BB#0:
171
+ ; X32-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
172
+ ; X32-NEXT: retl
173
+ ;
174
+ ; X64-LABEL: vpermil2ps256_0zz8BzzA:
175
+ ; X64: # BB#0:
176
+ ; X64-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
177
+ ; X64-NEXT: retq
178
+ %1 = call <8 x float > @llvm.x86.xop.vpermil2ps.256 (<8 x float > %a0 , <8 x float > %a1 , <8 x i32 > <i32 0 , i32 8 , i32 8 , i32 4 , i32 7 , i32 8 , i32 8 , i32 6 >, i8 2 )
179
+ ret <8 x float > %1
180
+ }
181
+
182
+ declare <2 x double > @llvm.x86.xop.vpermil2pd (<2 x double >, <2 x double >, <2 x i64 >, i8 ) nounwind readnone
183
+ declare <4 x double > @llvm.x86.xop.vpermil2pd.256 (<4 x double >, <4 x double >, <4 x i64 >, i8 ) nounwind readnone
100
184
101
- declare <4 x float > @llvm.x86.xop.vpermil2ps (<4 x float >, <4 x float >, <4 x float >, i8 ) nounwind readnone
102
- declare <8 x float > @llvm.x86.xop.vpermil2ps.256 (<8 x float >, <8 x float >, <8 x float >, i8 ) nounwind readnone
185
+ declare <4 x float > @llvm.x86.xop.vpermil2ps (<4 x float >, <4 x float >, <4 x i32 >, i8 ) nounwind readnone
186
+ declare <8 x float > @llvm.x86.xop.vpermil2ps.256 (<8 x float >, <8 x float >, <8 x i32 >, i8 ) nounwind readnone
103
187
104
188
declare <16 x i8 > @llvm.x86.xop.vpperm (<16 x i8 >, <16 x i8 >, <16 x i8 >) nounwind readnone
0 commit comments