@@ -30,28 +30,20 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) {
30
30
;
31
31
; AVX512VBMI-LABEL: shl_i512_1:
32
32
; AVX512VBMI: # %bb.0:
33
- ; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
34
- ; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
35
- ; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm3
36
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
37
- ; AVX512VBMI-NEXT: vpshldq $1, %xmm3, %xmm4, %xmm4
38
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
39
- ; AVX512VBMI-NEXT: vpshldq $1, %xmm2, %xmm5, %xmm6
40
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm6, %ymm4
41
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[2,3,2,3]
42
- ; AVX512VBMI-NEXT: vpshldq $1, %xmm1, %xmm6, %xmm7
43
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
44
- ; AVX512VBMI-NEXT: vpshldq $1, %xmm0, %xmm8, %xmm9
45
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm7, %ymm9, %ymm7
46
- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm4, %zmm7, %zmm4
47
- ; AVX512VBMI-NEXT: vpshldq $1, %xmm8, %xmm1, %xmm1
48
- ; AVX512VBMI-NEXT: vpsllq $1, %xmm0, %xmm0
49
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
50
- ; AVX512VBMI-NEXT: vpshldq $1, %xmm5, %xmm3, %xmm1
51
- ; AVX512VBMI-NEXT: vpshldq $1, %xmm6, %xmm2, %xmm2
33
+ ; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm1
34
+ ; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
35
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
36
+ ; AVX512VBMI-NEXT: vpshldq $1, %xmm3, %xmm2, %xmm3
37
+ ; AVX512VBMI-NEXT: vpsllq $1, %xmm0, %xmm4
38
+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
52
39
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
53
- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
54
- ; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm4[0],zmm0[2],zmm4[2],zmm0[4],zmm4[4],zmm0[6],zmm4[6]
40
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
41
+ ; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm2
42
+ ; AVX512VBMI-NEXT: vpshldq $1, %ymm1, %ymm2, %ymm1
43
+ ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
44
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
45
+ ; AVX512VBMI-NEXT: vpshldq $1, %zmm0, %zmm2, %zmm0
46
+ ; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[2],zmm0[2],zmm1[4],zmm0[4],zmm1[6],zmm0[6]
55
47
; AVX512VBMI-NEXT: retq
56
48
;
57
49
; ZNVER4-LABEL: shl_i512_1:
@@ -131,27 +123,22 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) {
131
123
;
132
124
; AVX512VBMI-LABEL: lshr_i512_1:
133
125
; AVX512VBMI: # %bb.0:
134
- ; AVX512VBMI-NEXT: vextracti128 $1 , %ymm0 , %xmm1
126
+ ; AVX512VBMI-NEXT: vextracti32x4 $3 , %zmm0 , %xmm1
135
127
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
136
- ; AVX512VBMI-NEXT: vextracti32x4 $3 , %zmm0 , %xmm3
128
+ ; AVX512VBMI-NEXT: vextracti128 $1 , %ymm0 , %xmm3
137
129
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
138
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm3, %xmm4, %xmm5
139
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
140
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm6, %xmm7
141
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5
142
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
143
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm1, %xmm7, %xmm8
144
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
145
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm0, %xmm9, %xmm0
146
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm0
147
- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
148
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm7, %xmm2, %xmm2
149
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm9, %xmm1, %xmm1
150
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
151
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm6, %xmm3, %xmm2
152
- ; AVX512VBMI-NEXT: vpsrlq $1, %xmm4, %xmm3
153
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
154
- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
130
+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
131
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
132
+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
133
+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
134
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
135
+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
136
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
137
+ ; AVX512VBMI-NEXT: vpsrlq $1, %xmm1, %xmm1
138
+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
139
+ ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
140
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
141
+ ; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
155
142
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
156
143
; AVX512VBMI-NEXT: retq
157
144
;
@@ -251,27 +238,22 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) {
251
238
;
252
239
; AVX512VBMI-LABEL: ashr_i512_1:
253
240
; AVX512VBMI: # %bb.0:
254
- ; AVX512VBMI-NEXT: vextracti128 $1 , %ymm0 , %xmm1
241
+ ; AVX512VBMI-NEXT: vextracti32x4 $3 , %zmm0 , %xmm1
255
242
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
256
- ; AVX512VBMI-NEXT: vextracti32x4 $3 , %zmm0 , %xmm3
243
+ ; AVX512VBMI-NEXT: vextracti128 $1 , %ymm0 , %xmm3
257
244
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
258
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm3, %xmm4, %xmm5
259
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
260
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm6, %xmm7
261
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5
262
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
263
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm1, %xmm7, %xmm8
264
- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
265
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm0, %xmm9, %xmm0
266
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm0
267
- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
268
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm7, %xmm2, %xmm2
269
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm9, %xmm1, %xmm1
270
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
271
- ; AVX512VBMI-NEXT: vpshldq $63, %xmm6, %xmm3, %xmm2
272
- ; AVX512VBMI-NEXT: vpsraq $1, %xmm4, %xmm3
273
- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
274
- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
245
+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
246
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
247
+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
248
+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
249
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
250
+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
251
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
252
+ ; AVX512VBMI-NEXT: vpsraq $1, %xmm1, %xmm1
253
+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
254
+ ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
255
+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
256
+ ; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
275
257
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
276
258
; AVX512VBMI-NEXT: retq
277
259
;
0 commit comments