@@ -462,19 +462,15 @@ define i32 @smin_known_zero(i32 %x, i32 %y) {
462
462
; X86-NEXT: cmpl $-54, %eax
463
463
; X86-NEXT: movl $-54, %ecx
464
464
; X86-NEXT: cmovll %eax, %ecx
465
- ; X86-NEXT: bsfl %ecx, %ecx
466
- ; X86-NEXT: movl $32, %eax
467
- ; X86-NEXT: cmovnel %ecx, %eax
465
+ ; X86-NEXT: rep bsfl %ecx, %eax
468
466
; X86-NEXT: retl
469
467
;
470
468
; X64-LABEL: smin_known_zero:
471
469
; X64: # %bb.0:
472
470
; X64-NEXT: cmpl $-54, %edi
473
471
; X64-NEXT: movl $-54, %eax
474
472
; X64-NEXT: cmovll %edi, %eax
475
- ; X64-NEXT: bsfl %eax, %ecx
476
- ; X64-NEXT: movl $32, %eax
477
- ; X64-NEXT: cmovnel %ecx, %eax
473
+ ; X64-NEXT: rep bsfl %eax, %eax
478
474
; X64-NEXT: retq
479
475
%z = call i32 @llvm.smin.i32 (i32 %x , i32 -54 )
480
476
%r = call i32 @llvm.cttz.i32 (i32 %z , i1 false )
@@ -508,9 +504,9 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
508
504
; X86-NEXT: por %xmm2, %xmm0
509
505
; X86-NEXT: pcmpeqd %xmm1, %xmm1
510
506
; X86-NEXT: paddd %xmm0, %xmm1
511
- ; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}} , %xmm1
512
- ; X86-NEXT: pxor %xmm1, %xmm0
513
- ; X86-NEXT: pcmpgtd %xmm1, %xmm0
507
+ ; X86-NEXT: pand %xmm1 , %xmm0
508
+ ; X86-NEXT: pxor %xmm1, %xmm1
509
+ ; X86-NEXT: pcmpeqd %xmm1, %xmm0
514
510
; X86-NEXT: psrld $31, %xmm0
515
511
; X86-NEXT: retl
516
512
;
@@ -519,10 +515,10 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
519
515
; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
520
516
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
521
517
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
522
- ; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
523
- ; X64-NEXT: vpminud %xmm1, %xmm0 , %xmm1
518
+ ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
519
+ ; X64-NEXT: vpxor %xmm1, %xmm1 , %xmm1
524
520
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
525
- ; X64-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip) , %xmm0, %xmm0
521
+ ; X64-NEXT: vpsrld $31 , %xmm0, %xmm0
526
522
; X64-NEXT: retq
527
523
%z = call <4 x i32 > @llvm.smin.v4i32 (<4 x i32 > %x , <4 x i32 > <i32 -54 , i32 -23 , i32 -12 , i32 -1 >)
528
524
%r = call <4 x i32 > @llvm.ctpop.v4i32 (<4 x i32 > %z )
@@ -603,19 +599,15 @@ define i32 @smax_maybe_zero(i32 %x, i32 %y) {
603
599
; X86-NEXT: cmpl $55, %eax
604
600
; X86-NEXT: movl $54, %ecx
605
601
; X86-NEXT: cmovgel %eax, %ecx
606
- ; X86-NEXT: bsfl %ecx, %ecx
607
- ; X86-NEXT: movl $32, %eax
608
- ; X86-NEXT: cmovnel %ecx, %eax
602
+ ; X86-NEXT: rep bsfl %ecx, %eax
609
603
; X86-NEXT: retl
610
604
;
611
605
; X64-LABEL: smax_maybe_zero:
612
606
; X64: # %bb.0:
613
607
; X64-NEXT: cmpl $55, %edi
614
608
; X64-NEXT: movl $54, %eax
615
609
; X64-NEXT: cmovgel %edi, %eax
616
- ; X64-NEXT: bsfl %eax, %ecx
617
- ; X64-NEXT: movl $32, %eax
618
- ; X64-NEXT: cmovnel %ecx, %eax
610
+ ; X64-NEXT: rep bsfl %eax, %eax
619
611
; X64-NEXT: retq
620
612
%z = call i32 @llvm.smax.i32 (i32 %x , i32 54 )
621
613
%r = call i32 @llvm.cttz.i32 (i32 %z , i1 false )
0 commit comments