@@ -255,35 +255,35 @@ define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) {
255
255
; SSE-LABEL: combine_vec_srem_by_pow2b:
256
256
; SSE: # %bb.0:
257
257
; SSE-NEXT: movdqa %xmm0, %xmm1
258
- ; SSE-NEXT: psrad $31, %xmm1
259
- ; SSE-NEXT: movdqa %xmm1 , %xmm2
260
- ; SSE-NEXT: psrld $29 , %xmm2
261
- ; SSE-NEXT: movdqa %xmm1 , %xmm3
262
- ; SSE-NEXT: psrld $31 , %xmm3
263
- ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3 [0,1,2,3],xmm2 [4,5,6,7]
264
- ; SSE-NEXT: psrld $30, %xmm1
265
- ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1 [0,1],xmm3[2,3],xmm1 [4,5],xmm3[6,7]
266
- ; SSE-NEXT: paddd %xmm0, %xmm1
267
- ; SSE-NEXT: movdqa %xmm1 , %xmm2
268
- ; SSE-NEXT: psrad $3, %xmm2
269
- ; SSE-NEXT: movdqa %xmm1 , %xmm3
258
+ ; SSE-NEXT: psrld $31, %xmm1
259
+ ; SSE-NEXT: movdqa %xmm0 , %xmm2
260
+ ; SSE-NEXT: psrad $31 , %xmm2
261
+ ; SSE-NEXT: movdqa %xmm2 , %xmm3
262
+ ; SSE-NEXT: psrld $29 , %xmm3
263
+ ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm1 [0,1,2,3],xmm3 [4,5,6,7]
264
+ ; SSE-NEXT: psrld $30, %xmm2
265
+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2 [0,1],xmm3[2,3],xmm2 [4,5],xmm3[6,7]
266
+ ; SSE-NEXT: paddd %xmm0, %xmm2
267
+ ; SSE-NEXT: movdqa %xmm2 , %xmm1
268
+ ; SSE-NEXT: psrad $3, %xmm1
269
+ ; SSE-NEXT: movdqa %xmm2 , %xmm3
270
270
; SSE-NEXT: psrad $1, %xmm3
271
- ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2 [4,5,6,7]
272
- ; SSE-NEXT: psrad $2, %xmm1
273
- ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1 [0,1],xmm3[2,3],xmm1 [4,5],xmm3[6,7]
274
- ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1 [2,3,4,5,6,7]
275
- ; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
276
- ; SSE-NEXT: psubd %xmm1 , %xmm0
271
+ ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1 [4,5,6,7]
272
+ ; SSE-NEXT: psrad $2, %xmm2
273
+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2 [0,1],xmm3[2,3],xmm2 [4,5],xmm3[6,7]
274
+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2 [2,3,4,5,6,7]
275
+ ; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
276
+ ; SSE-NEXT: psubd %xmm2 , %xmm0
277
277
; SSE-NEXT: retq
278
278
;
279
279
; AVX1-LABEL: combine_vec_srem_by_pow2b:
280
280
; AVX1: # %bb.0:
281
- ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
282
- ; AVX1-NEXT: vpsrld $29 , %xmm1 , %xmm2
283
- ; AVX1-NEXT: vpsrld $31 , %xmm1 , %xmm3
284
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3 [0,1,2,3],xmm2 [4,5,6,7]
285
- ; AVX1-NEXT: vpsrld $30, %xmm1 , %xmm1
286
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1 [0,1],xmm2 [2,3],xmm1 [4,5],xmm2 [6,7]
281
+ ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
282
+ ; AVX1-NEXT: vpsrad $31 , %xmm0 , %xmm2
283
+ ; AVX1-NEXT: vpsrld $29 , %xmm2 , %xmm3
284
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1 [0,1,2,3],xmm3 [4,5,6,7]
285
+ ; AVX1-NEXT: vpsrld $30, %xmm2 , %xmm2
286
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2 [0,1],xmm1 [2,3],xmm2 [4,5],xmm1 [6,7]
287
287
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
288
288
; AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
289
289
; AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
@@ -320,10 +320,10 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
320
320
; SSE-NEXT: movdqa %xmm1, %xmm3
321
321
; SSE-NEXT: psrld $30, %xmm3
322
322
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
323
- ; SSE-NEXT: movdqa %xmm1 , %xmm2
324
- ; SSE-NEXT: psrld $29 , %xmm2
325
- ; SSE-NEXT: psrld $31 , %xmm1
326
- ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1 [0,1,2,3],xmm2 [4,5,6,7]
323
+ ; SSE-NEXT: movdqa %xmm0 , %xmm2
324
+ ; SSE-NEXT: psrld $31 , %xmm2
325
+ ; SSE-NEXT: psrld $29 , %xmm1
326
+ ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2 [0,1,2,3],xmm1 [4,5,6,7]
327
327
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
328
328
; SSE-NEXT: paddd %xmm0, %xmm1
329
329
; SSE-NEXT: movdqa %xmm1, %xmm2
@@ -346,9 +346,9 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
346
346
; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2
347
347
; AVX1-NEXT: vpsrld $30, %xmm1, %xmm3
348
348
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
349
- ; AVX1-NEXT: vpsrld $29 , %xmm1 , %xmm3
350
- ; AVX1-NEXT: vpsrld $31 , %xmm1, %xmm1
351
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1 [0,1,2,3],xmm3 [4,5,6,7]
349
+ ; AVX1-NEXT: vpsrld $31 , %xmm0 , %xmm3
350
+ ; AVX1-NEXT: vpsrld $29 , %xmm1, %xmm1
351
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3 [0,1,2,3],xmm1 [4,5,6,7]
352
352
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
353
353
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
354
354
; AVX1-NEXT: vpsrad $4, %xmm1, %xmm2
0 commit comments