@@ -452,19 +452,15 @@ define i32 @smin_known_zero(i32 %x, i32 %y) {
452
452
; X86-NEXT: cmpl $-54, %eax
453
453
; X86-NEXT: movl $-54, %ecx
454
454
; X86-NEXT: cmovll %eax, %ecx
455
- ; X86-NEXT: bsfl %ecx, %ecx
456
- ; X86-NEXT: movl $32, %eax
457
- ; X86-NEXT: cmovnel %ecx, %eax
455
+ ; X86-NEXT: rep bsfl %ecx, %eax
458
456
; X86-NEXT: retl
459
457
;
460
458
; X64-LABEL: smin_known_zero:
461
459
; X64: # %bb.0:
462
460
; X64-NEXT: cmpl $-54, %edi
463
461
; X64-NEXT: movl $-54, %eax
464
462
; X64-NEXT: cmovll %edi, %eax
465
- ; X64-NEXT: bsfl %eax, %ecx
466
- ; X64-NEXT: movl $32, %eax
467
- ; X64-NEXT: cmovnel %ecx, %eax
463
+ ; X64-NEXT: rep bsfl %eax, %eax
468
464
; X64-NEXT: retq
469
465
%z = call i32 @llvm.smin.i32 (i32 %x , i32 -54 )
470
466
%r = call i32 @llvm.cttz.i32 (i32 %z , i1 false )
@@ -482,9 +478,9 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
482
478
; X86-NEXT: por %xmm2, %xmm0
483
479
; X86-NEXT: pcmpeqd %xmm1, %xmm1
484
480
; X86-NEXT: paddd %xmm0, %xmm1
485
- ; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}} , %xmm1
486
- ; X86-NEXT: pxor %xmm1, %xmm0
487
- ; X86-NEXT: pcmpgtd %xmm1, %xmm0
481
+ ; X86-NEXT: pand %xmm1 , %xmm0
482
+ ; X86-NEXT: pxor %xmm1, %xmm1
483
+ ; X86-NEXT: pcmpeqd %xmm1, %xmm0
488
484
; X86-NEXT: psrld $31, %xmm0
489
485
; X86-NEXT: retl
490
486
;
@@ -493,10 +489,10 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
493
489
; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
494
490
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
495
491
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
496
- ; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
497
- ; X64-NEXT: vpminud %xmm1, %xmm0 , %xmm1
492
+ ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
493
+ ; X64-NEXT: vpxor %xmm1, %xmm1 , %xmm1
498
494
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
499
- ; X64-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip) , %xmm0, %xmm0
495
+ ; X64-NEXT: vpsrld $31 , %xmm0, %xmm0
500
496
; X64-NEXT: retq
501
497
%z = call <4 x i32 > @llvm.smin.v4i32 (<4 x i32 > %x , <4 x i32 > <i32 -54 , i32 -23 , i32 -12 , i32 -1 >)
502
498
%r = call <4 x i32 > @llvm.ctpop.v4i32 (<4 x i32 > %z )
@@ -577,19 +573,15 @@ define i32 @smax_maybe_zero(i32 %x, i32 %y) {
577
573
; X86-NEXT: cmpl $55, %eax
578
574
; X86-NEXT: movl $54, %ecx
579
575
; X86-NEXT: cmovgel %eax, %ecx
580
- ; X86-NEXT: bsfl %ecx, %ecx
581
- ; X86-NEXT: movl $32, %eax
582
- ; X86-NEXT: cmovnel %ecx, %eax
576
+ ; X86-NEXT: rep bsfl %ecx, %eax
583
577
; X86-NEXT: retl
584
578
;
585
579
; X64-LABEL: smax_maybe_zero:
586
580
; X64: # %bb.0:
587
581
; X64-NEXT: cmpl $55, %edi
588
582
; X64-NEXT: movl $54, %eax
589
583
; X64-NEXT: cmovgel %edi, %eax
590
- ; X64-NEXT: bsfl %eax, %ecx
591
- ; X64-NEXT: movl $32, %eax
592
- ; X64-NEXT: cmovnel %ecx, %eax
584
+ ; X64-NEXT: rep bsfl %eax, %eax
593
585
; X64-NEXT: retq
594
586
%z = call i32 @llvm.smax.i32 (i32 %x , i32 54 )
595
587
%r = call i32 @llvm.cttz.i32 (i32 %z , i1 false )
0 commit comments