@@ -445,126 +445,94 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
445
445
define <8 x i16 > @pr38477 (<8 x i16 > %a0 ) {
446
446
; SSE-LABEL: pr38477:
447
447
; SSE: # %bb.0:
448
- ; SSE-NEXT: pextrw $7, %xmm0, %ecx
449
- ; SSE-NEXT: imull $2115, %ecx, %edx # imm = 0x843
450
- ; SSE-NEXT: shrl $16, %edx
451
- ; SSE-NEXT: movl %ecx, %eax
452
- ; SSE-NEXT: subl %edx, %eax
448
+ ; SSE-NEXT: movdqa %xmm0, %xmm1
449
+ ; SSE-NEXT: pxor %xmm0, %xmm0
450
+ ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
451
+ ; SSE-NEXT: pextrw $1, %xmm1, %eax
452
+ ; SSE-NEXT: imull $4957, %eax, %ecx # imm = 0x135D
453
+ ; SSE-NEXT: shrl $16, %ecx
454
+ ; SSE-NEXT: subl %ecx, %eax
453
455
; SSE-NEXT: movzwl %ax, %eax
454
456
; SSE-NEXT: shrl %eax
455
- ; SSE-NEXT: addl %edx, %eax
456
- ; SSE-NEXT: shrl $4, %eax
457
- ; SSE-NEXT: movl %eax, %edx
458
- ; SSE-NEXT: shll $5, %edx
459
- ; SSE-NEXT: subl %edx, %eax
460
457
; SSE-NEXT: addl %ecx, %eax
461
- ; SSE-NEXT: pextrw $2, %xmm0, %ecx
462
- ; SSE-NEXT: imull $57457, %ecx, %edx # imm = 0xE071
463
- ; SSE-NEXT: shrl $22, %edx
464
- ; SSE-NEXT: leal (%rdx,%rdx,8), %esi
465
- ; SSE-NEXT: leal (%rdx,%rsi,8), %edx
466
- ; SSE-NEXT: subl %edx, %ecx
467
- ; SSE-NEXT: pextrw $1, %xmm0, %edx
468
- ; SSE-NEXT: imull $4957, %edx, %esi # imm = 0x135D
469
- ; SSE-NEXT: shrl $16, %esi
470
- ; SSE-NEXT: movl %edx, %edi
471
- ; SSE-NEXT: subl %esi, %edi
472
- ; SSE-NEXT: movzwl %di, %edi
473
- ; SSE-NEXT: shrl %edi
474
- ; SSE-NEXT: addl %esi, %edi
475
- ; SSE-NEXT: shrl $6, %edi
476
- ; SSE-NEXT: imull $119, %edi, %esi
477
- ; SSE-NEXT: subl %esi, %edx
478
- ; SSE-NEXT: pxor %xmm1, %xmm1
479
- ; SSE-NEXT: pinsrw $1, %edx, %xmm1
480
- ; SSE-NEXT: pinsrw $2, %ecx, %xmm1
481
- ; SSE-NEXT: pextrw $3, %xmm0, %ecx
482
- ; SSE-NEXT: imull $4103, %ecx, %edx # imm = 0x1007
483
- ; SSE-NEXT: shrl $28, %edx
484
- ; SSE-NEXT: imull $-111, %edx, %edx
485
- ; SSE-NEXT: subl %edx, %ecx
486
- ; SSE-NEXT: pinsrw $3, %ecx, %xmm1
487
- ; SSE-NEXT: pextrw $4, %xmm0, %ecx
488
- ; SSE-NEXT: movl %ecx, %edx
489
- ; SSE-NEXT: shll $14, %edx
490
- ; SSE-NEXT: addl %ecx, %edx
491
- ; SSE-NEXT: shrl $30, %edx
492
- ; SSE-NEXT: leal (%rdx,%rdx,2), %edx
493
- ; SSE-NEXT: addl %ecx, %edx
494
- ; SSE-NEXT: pinsrw $4, %edx, %xmm1
495
- ; SSE-NEXT: pextrw $5, %xmm0, %ecx
496
- ; SSE-NEXT: imull $35545, %ecx, %edx # imm = 0x8AD9
497
- ; SSE-NEXT: shrl $22, %edx
498
- ; SSE-NEXT: imull $118, %edx, %edx
499
- ; SSE-NEXT: subl %edx, %ecx
500
- ; SSE-NEXT: pinsrw $5, %ecx, %xmm1
501
- ; SSE-NEXT: pextrw $6, %xmm0, %ecx
502
- ; SSE-NEXT: andl $31, %ecx
503
- ; SSE-NEXT: pinsrw $6, %ecx, %xmm1
504
- ; SSE-NEXT: pinsrw $7, %eax, %xmm1
505
- ; SSE-NEXT: movdqa %xmm1, %xmm0
458
+ ; SSE-NEXT: shrl $6, %eax
459
+ ; SSE-NEXT: pinsrw $1, %eax, %xmm0
460
+ ; SSE-NEXT: pextrw $2, %xmm1, %eax
461
+ ; SSE-NEXT: imull $57457, %eax, %eax # imm = 0xE071
462
+ ; SSE-NEXT: shrl $22, %eax
463
+ ; SSE-NEXT: pinsrw $2, %eax, %xmm0
464
+ ; SSE-NEXT: pextrw $3, %xmm1, %eax
465
+ ; SSE-NEXT: imull $4103, %eax, %eax # imm = 0x1007
466
+ ; SSE-NEXT: shrl $28, %eax
467
+ ; SSE-NEXT: pinsrw $3, %eax, %xmm0
468
+ ; SSE-NEXT: pextrw $4, %xmm1, %eax
469
+ ; SSE-NEXT: movl %eax, %ecx
470
+ ; SSE-NEXT: shll $14, %ecx
471
+ ; SSE-NEXT: addl %eax, %ecx
472
+ ; SSE-NEXT: shrl $30, %ecx
473
+ ; SSE-NEXT: pinsrw $4, %ecx, %xmm0
474
+ ; SSE-NEXT: pextrw $5, %xmm1, %eax
475
+ ; SSE-NEXT: imull $35545, %eax, %eax # imm = 0x8AD9
476
+ ; SSE-NEXT: shrl $22, %eax
477
+ ; SSE-NEXT: pinsrw $5, %eax, %xmm0
478
+ ; SSE-NEXT: pextrw $6, %xmm1, %eax
479
+ ; SSE-NEXT: shrl $5, %eax
480
+ ; SSE-NEXT: pinsrw $6, %eax, %xmm0
481
+ ; SSE-NEXT: pextrw $7, %xmm1, %eax
482
+ ; SSE-NEXT: imull $2115, %eax, %ecx # imm = 0x843
483
+ ; SSE-NEXT: shrl $16, %ecx
484
+ ; SSE-NEXT: subl %ecx, %eax
485
+ ; SSE-NEXT: movzwl %ax, %eax
486
+ ; SSE-NEXT: shrl %eax
487
+ ; SSE-NEXT: addl %ecx, %eax
488
+ ; SSE-NEXT: shrl $4, %eax
489
+ ; SSE-NEXT: pinsrw $7, %eax, %xmm0
506
490
; SSE-NEXT: retq
507
491
;
508
492
; AVX-LABEL: pr38477:
509
493
; AVX: # %bb.0:
510
- ; AVX-NEXT: vpextrw $7, %xmm0, %ecx
511
- ; AVX-NEXT: imull $2115, %ecx, %edx # imm = 0x843
512
- ; AVX-NEXT: shrl $16, %edx
513
- ; AVX-NEXT: movl %ecx, %eax
514
- ; AVX-NEXT: subl %edx, %eax
494
+ ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
495
+ ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
496
+ ; AVX-NEXT: vpextrw $1, %xmm0, %eax
497
+ ; AVX-NEXT: imull $4957, %eax, %ecx # imm = 0x135D
498
+ ; AVX-NEXT: shrl $16, %ecx
499
+ ; AVX-NEXT: subl %ecx, %eax
515
500
; AVX-NEXT: movzwl %ax, %eax
516
501
; AVX-NEXT: shrl %eax
517
- ; AVX-NEXT: addl %edx, %eax
518
- ; AVX-NEXT: shrl $4, %eax
519
- ; AVX-NEXT: movl %eax, %edx
520
- ; AVX-NEXT: shll $5, %edx
521
- ; AVX-NEXT: subl %edx, %eax
522
502
; AVX-NEXT: addl %ecx, %eax
523
- ; AVX-NEXT: vpextrw $2, %xmm0, %ecx
524
- ; AVX-NEXT: imull $57457, %ecx, %edx # imm = 0xE071
525
- ; AVX-NEXT: shrl $22, %edx
526
- ; AVX-NEXT: leal (%rdx,%rdx,8), %esi
527
- ; AVX-NEXT: leal (%rdx,%rsi,8), %edx
528
- ; AVX-NEXT: subl %edx, %ecx
529
- ; AVX-NEXT: vpextrw $1, %xmm0, %edx
530
- ; AVX-NEXT: imull $4957, %edx, %esi # imm = 0x135D
531
- ; AVX-NEXT: shrl $16, %esi
532
- ; AVX-NEXT: movl %edx, %edi
533
- ; AVX-NEXT: subl %esi, %edi
534
- ; AVX-NEXT: movzwl %di, %edi
535
- ; AVX-NEXT: shrl %edi
536
- ; AVX-NEXT: addl %esi, %edi
537
- ; AVX-NEXT: shrl $6, %edi
538
- ; AVX-NEXT: imull $119, %edi, %esi
539
- ; AVX-NEXT: subl %esi, %edx
540
- ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
541
- ; AVX-NEXT: vpinsrw $1, %edx, %xmm1, %xmm1
542
- ; AVX-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
543
- ; AVX-NEXT: vpextrw $3, %xmm0, %ecx
544
- ; AVX-NEXT: imull $4103, %ecx, %edx # imm = 0x1007
545
- ; AVX-NEXT: shrl $28, %edx
546
- ; AVX-NEXT: imull $-111, %edx, %edx
547
- ; AVX-NEXT: subl %edx, %ecx
548
- ; AVX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
549
- ; AVX-NEXT: vpextrw $4, %xmm0, %ecx
550
- ; AVX-NEXT: movl %ecx, %edx
551
- ; AVX-NEXT: shll $14, %edx
552
- ; AVX-NEXT: addl %ecx, %edx
553
- ; AVX-NEXT: shrl $30, %edx
554
- ; AVX-NEXT: leal (%rdx,%rdx,2), %edx
555
- ; AVX-NEXT: addl %ecx, %edx
556
- ; AVX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
557
- ; AVX-NEXT: vpextrw $5, %xmm0, %ecx
558
- ; AVX-NEXT: imull $35545, %ecx, %edx # imm = 0x8AD9
559
- ; AVX-NEXT: shrl $22, %edx
560
- ; AVX-NEXT: imull $118, %edx, %edx
561
- ; AVX-NEXT: subl %edx, %ecx
562
- ; AVX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
563
- ; AVX-NEXT: vpextrw $6, %xmm0, %ecx
564
- ; AVX-NEXT: andl $31, %ecx
565
- ; AVX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm0
566
- ; AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
503
+ ; AVX-NEXT: shrl $6, %eax
504
+ ; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
505
+ ; AVX-NEXT: vpextrw $2, %xmm0, %eax
506
+ ; AVX-NEXT: imull $57457, %eax, %eax # imm = 0xE071
507
+ ; AVX-NEXT: shrl $22, %eax
508
+ ; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
509
+ ; AVX-NEXT: vpextrw $3, %xmm0, %eax
510
+ ; AVX-NEXT: imull $4103, %eax, %eax # imm = 0x1007
511
+ ; AVX-NEXT: shrl $28, %eax
512
+ ; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
513
+ ; AVX-NEXT: vpextrw $4, %xmm0, %eax
514
+ ; AVX-NEXT: movl %eax, %ecx
515
+ ; AVX-NEXT: shll $14, %ecx
516
+ ; AVX-NEXT: addl %eax, %ecx
517
+ ; AVX-NEXT: shrl $30, %ecx
518
+ ; AVX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
519
+ ; AVX-NEXT: vpextrw $5, %xmm0, %eax
520
+ ; AVX-NEXT: imull $35545, %eax, %eax # imm = 0x8AD9
521
+ ; AVX-NEXT: shrl $22, %eax
522
+ ; AVX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
523
+ ; AVX-NEXT: vpextrw $6, %xmm0, %eax
524
+ ; AVX-NEXT: shrl $5, %eax
525
+ ; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
526
+ ; AVX-NEXT: vpextrw $7, %xmm0, %eax
527
+ ; AVX-NEXT: imull $2115, %eax, %ecx # imm = 0x843
528
+ ; AVX-NEXT: shrl $16, %ecx
529
+ ; AVX-NEXT: subl %ecx, %eax
530
+ ; AVX-NEXT: movzwl %ax, %eax
531
+ ; AVX-NEXT: shrl %eax
532
+ ; AVX-NEXT: addl %ecx, %eax
533
+ ; AVX-NEXT: shrl $4, %eax
534
+ ; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
567
535
; AVX-NEXT: retq
568
- %rem = urem <8 x i16 > %a0 , <i16 1 , i16 119 , i16 73 , i16 -111 , i16 -3 , i16 118 , i16 32 , i16 31 >
536
+ %rem = udiv <8 x i16 > %a0 , <i16 1 , i16 119 , i16 73 , i16 -111 , i16 -3 , i16 118 , i16 32 , i16 31 >
569
537
ret <8 x i16 > %rem
570
538
}
0 commit comments