-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Promote cttz_i32(x) -> cttz_i64((i64)x | (1 << 32)) #102900
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
On 64bit targets we can promote i32 CTTZ nodes to i64 by setting the 32nd bit. llvm#57811 also queried about whether we should use BTS instead of MOVABS+OR to avoid a i64 immediate - I'm willing to add a DAGToDAG isel fix for these cases if people think it worthwhile (I'm not sure if we want to introduce an entire X86ISD::BTS node type given its complexity). Fixes llvm#57811
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesOn 64bit targets we can promote i32 CTTZ nodes to i64 by setting the 32nd bit. #57811 also queried about whether we should use BTS instead of MOVABS+OR to avoid a i64 immediate - I'm willing to add a DAGToDAG isel peephole fix for these cases if reviewers think it worthwhile (I'm not sure if we want to introduce an entire X86ISD::BTS node type given its complexity). Fixes #57811 Patch is 24.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102900.diff 5 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2074fac857891..04dfd0ea0d893 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -412,6 +412,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
if (Subtarget.is64Bit()) {
+ setOperationPromotedToType(ISD::CTTZ , MVT::i32, MVT::i64);
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
}
@@ -3237,9 +3238,10 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
}
bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
- // Speculate cttz only if we can directly use TZCNT or can promote to i32.
+ // Speculate cttz only if we can directly use TZCNT or can promote to i32/i64.
return Subtarget.hasBMI() ||
- (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
+ (!Ty->isVectorTy() &&
+ Ty->getScalarSizeInBits() < (Subtarget.is64Bit() ? 64u : 32u));
}
bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
diff --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll
index 6eb748a1afbab..b35a1b72fcb6f 100644
--- a/llvm/test/CodeGen/X86/cttz.ll
+++ b/llvm/test/CodeGen/X86/cttz.ll
@@ -317,13 +317,11 @@ define i32 @cttz_i32_zero_test(i32 %n) {
;
; X64-LABEL: cttz_i32_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB6_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB6_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
;
; X86-CLZ-LABEL: cttz_i32_zero_test:
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index df11a44626e38..d5d604a138a71 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -54,13 +54,12 @@ define i32 @or_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: or_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: orl %esi, %edi
-; X64-NEXT: je .LBB1_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB1_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = or i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -115,13 +114,10 @@ define i32 @select_maybe_zero(i1 %c, i32 %x) {
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb $1, %dil
; X64-NEXT: cmovnel %esi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB3_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB3_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%y = or i32 %x, 1
%z = select i1 %c, i32 %y, i32 0
@@ -216,16 +212,14 @@ define i32 @shl_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: shl_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB7_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB7_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = shl nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -275,13 +269,10 @@ define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: addl %esi, %edi
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovael %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB9_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB9_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -334,15 +325,13 @@ define i32 @umax_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: umax_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: cmpl %esi, %edi
; X64-NEXT: cmoval %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB11_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB11_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.umax.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -401,13 +390,10 @@ define i32 @umin_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: cmpl $54, %edi
; X64-NEXT: movl $54, %eax
; X64-NEXT: cmovbl %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB13_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB13_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.umin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -522,13 +508,10 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: cmpl $54, %edi
; X64-NEXT: movl $54, %eax
; X64-NEXT: cmovll %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB17_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB17_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.smin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -643,13 +626,10 @@ define i32 @smax_known_zero(i32 %x, i32 %y) {
; X64-NEXT: testl %edi, %edi
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovnsl %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB21_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB21_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.smax.i32(i32 %x, i32 -1)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -676,16 +656,9 @@ define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB22_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB22_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%shr = lshr i32 %x, %y
@@ -714,16 +687,13 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB23_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB23_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%shr = lshr i32 %x, %y
%sub = sub i32 32, %y
@@ -775,16 +745,13 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotr_with_fshr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB25_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB25_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -811,16 +778,9 @@ define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB26_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB26_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%shl = shl i32 %x, %y
@@ -849,16 +809,13 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB27_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB27_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%shl = shl i32 %x, %y
%sub = sub i32 32, %y
@@ -910,16 +867,13 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotl_with_fshl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB29_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB29_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -989,16 +943,14 @@ define i32 @sra_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: sra_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: sarl %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB32_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB32_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = ashr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1068,16 +1020,14 @@ define i32 @srl_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: srl_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB35_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB35_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = lshr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1128,13 +1078,11 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB37_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB37_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: # kill: def $eax killed $eax def $rax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1185,13 +1133,11 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB39_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB39_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: # kill: def $eax killed $eax def $rax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = sdiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1235,14 +1181,13 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
;
; X64-LABEL: add_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: orl $1, %edi
; X64-NEXT: addl %esi, %edi
-; X64-NEXT: je .LBB41_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB41_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x = or i32 %xx, 1
%z = add nsw i32 %x, %y
@@ -1321,12 +1266,10 @@ define i32 @sub_maybe_zero(i32 %x) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: subl %edi, %eax
-; X64-NEXT: je .LBB44_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB44_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%y = or i32 %x, 64
%z = sub i32 %y, %x
@@ -1349,13 +1292,12 @@ define i32 @sub_maybe_zero2(i32 %x) {
;
; X64-LABEL: sub_maybe_zero2:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: negl %edi
-; X64-NEXT: je .LBB45_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB45_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = sub i32 0, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1379,15 +1321,13 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
;
; X64-LABEL: mul_known_nonzero_nsw:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB46_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB46_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = mul nsw i32 %y, %x
@@ -1412,15 +1352,13 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
;
; X64-LABEL: mul_known_nonzero_nuw:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB47_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB47_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = mul nuw i32 %y, %x
@@ -1444,14 +1382,12 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: mul_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: imull %esi, %edi
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB48_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB48_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = mul nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1482,9 +1418,10 @@ define i32 @bitcast_known_nonzero(<2 x i16> %xx) {
; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u]
; X64-NEXT: vmovd %xmm0, %eax
-; X64-NEXT: bsfl %eax, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x = shl nuw nsw <2 x i16> <i16 256, i16 256>, %xx
%z = bitcast <2 x i16> %x to i32
@@ -1508,13 +1445,10 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; X64-LABEL: bitcast_mayb...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 513c3726ebc0a324f7e5a11d25617bb9557324d6 62b98af72e06f44e29425e92b2f4cf348f1bfafb --extensions cpp -- llvm/lib/Target/X86/X86ISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 04dfd0ea0d..c628b3ee02 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -412,7 +412,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
if (Subtarget.is64Bit()) {
- setOperationPromotedToType(ISD::CTTZ , MVT::i32, MVT::i64);
+ setOperationPromotedToType(ISD::CTTZ, MVT::i32, MVT::i64);
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/4495 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/3277 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/3257 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/3276 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/4138 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/180/builds/3197 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/2925 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/160/builds/3199 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/5787 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/4718 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/1061 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/4826 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/3363 Here is the relevant piece of the build log for the reference:
|
…REAPPLIED) On 64bit targets we can promote i32 CTTZ nodes to i64 CTTZ_ZERO_UNDEF by setting the 32nd bit. #57811 also queried about whether we should use BTS instead of MOVABS+OR to avoid a i64 immediate - I'm willing to tweak the DAGToDAG isel peephole for these cases if reviewers think it worthwhile. But most recent CPUs can actually handle MOVABS faster than BTS/C/R....... Reapplied with missing costmodel changes - the cost tables can probably be improved in a follow up patch. Fixes #57811
On 64bit targets we can promote i32 CTTZ nodes to i64 by setting the 32nd bit.
#57811 also queried about whether we should use BTS instead of MOVABS+OR to avoid a i64 immediate - I'm willing to tweak the DAGToDAG isel peephole for these cases if reviewers think it worthwhile. But most recent CPUs can actually handle MOVABS faster than BTS/C/R.......
Fixes #57811