Skip to content

Commit f59742c

Browse files
authored
[X86] getIntImmCostInst - recognise i64 ICMP EQ/NE special cases (#142812)
If the lower 32-bits of a i64 value are known to be zero, then icmp lowering will shift+truncate down to a i32 allowing the immediate to be embedded. There's a lot more that could be done here to match icmp lowering, but this PR just focuses on known regressions. Fixes #142513 Fixes #62145
1 parent e7cd6b4 commit f59742c

File tree

3 files changed

+17
-16
lines changed

3 files changed

+17
-16
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5993,12 +5993,19 @@ InstructionCost X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
59935993
// This is an imperfect hack to prevent constant hoisting of
59945994
// compares that might be trying to check if a 64-bit value fits in
59955995
// 32-bits. The backend can optimize these cases using a right shift by 32.
5996-
// Ideally we would check the compare predicate here. There also other
5997-
// similar immediates the backend can use shifts for.
5996+
// There are other predicates and immediates the backend can use shifts for.
59985997
if (Idx == 1 && ImmBitWidth == 64) {
59995998
uint64_t ImmVal = Imm.getZExtValue();
60005999
if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
60016000
return TTI::TCC_Free;
6001+
6002+
if (auto *Cmp = dyn_cast_or_null<CmpInst>(Inst)) {
6003+
if (Cmp->isEquality()) {
6004+
KnownBits Known = computeKnownBits(Cmp->getOperand(0), DL);
6005+
if (Known.countMinTrailingZeros() >= 32)
6006+
return TTI::TCC_Free;
6007+
}
6008+
}
60026009
}
60036010
ImmIdx = 1;
60046011
break;

llvm/test/CodeGen/X86/pr142513.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ define i64 @foo(i64 %x) {
2121
; X64-NEXT: cmpl $65509, %edi # imm = 0xFFE5
2222
; X64-NEXT: je .LBB0_1
2323
; X64-NEXT: # %bb.2: # %if.end
24-
; X64-NEXT: movabsq $9219572124669181952, %rax # imm = 0x7FF2800000000000
25-
; X64-NEXT: addq $3, %rax
24+
; X64-NEXT: movabsq $9219572124669181955, %rax # imm = 0x7FF2800000000003
2625
; X64-NEXT: retq
2726
; X64-NEXT: .LBB0_1: # %if.then
2827
entry:

llvm/test/CodeGen/X86/pr62145.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,41 +5,36 @@
55
define void @f(i64 %a, i64 %b) nounwind {
66
; X86-LABEL: f:
77
; X86: # %bb.0: # %entry
8-
; X86-NEXT: pushl %ebx
98
; X86-NEXT: pushl %edi
109
; X86-NEXT: pushl %esi
1110
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1211
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
13-
; X86-NEXT: movl $-65536, %ebx # imm = 0xFFFF0000
14-
; X86-NEXT: movl $-589824, %edi # imm = 0xFFF70000
12+
; X86-NEXT: movl $-65536, %edi # imm = 0xFFFF0000
1513
; X86-NEXT: cmpl $65527, %eax # imm = 0xFFF7
1614
; X86-NEXT: jne .LBB0_2
1715
; X86-NEXT: # %bb.1: # %if.then
1816
; X86-NEXT: calll ext1@PLT
1917
; X86-NEXT: .LBB0_2: # %if.end
2018
; X86-NEXT: calll ext2@PLT
21-
; X86-NEXT: andl %ebx, %esi
22-
; X86-NEXT: xorl %edi, %esi
19+
; X86-NEXT: andl %edi, %esi
20+
; X86-NEXT: cmpl $-589824, %esi # imm = 0xFFF70000
2321
; X86-NEXT: jne .LBB0_3
2422
; X86-NEXT: # %bb.4: # %if.then2
2523
; X86-NEXT: popl %esi
2624
; X86-NEXT: popl %edi
27-
; X86-NEXT: popl %ebx
2825
; X86-NEXT: jmp ext1@PLT # TAILCALL
2926
; X86-NEXT: .LBB0_3: # %if.end3
3027
; X86-NEXT: popl %esi
3128
; X86-NEXT: popl %edi
32-
; X86-NEXT: popl %ebx
3329
; X86-NEXT: retl
3430
;
3531
; X64-LABEL: f:
3632
; X64: # %bb.0: # %entry
37-
; X64-NEXT: pushq %r15
3833
; X64-NEXT: pushq %r14
3934
; X64-NEXT: pushq %rbx
35+
; X64-NEXT: pushq %rax
4036
; X64-NEXT: movq %rsi, %rbx
4137
; X64-NEXT: movabsq $-281474976710656, %r14 # imm = 0xFFFF000000000000
42-
; X64-NEXT: movabsq $-2533274790395904, %r15 # imm = 0xFFF7000000000000
4338
; X64-NEXT: shrq $48, %rdi
4439
; X64-NEXT: cmpl $65527, %edi # imm = 0xFFF7
4540
; X64-NEXT: jne .LBB0_2
@@ -48,17 +43,17 @@ define void @f(i64 %a, i64 %b) nounwind {
4843
; X64-NEXT: .LBB0_2: # %if.end
4944
; X64-NEXT: callq ext2@PLT
5045
; X64-NEXT: andq %r14, %rbx
51-
; X64-NEXT: cmpq %r15, %rbx
46+
; X64-NEXT: movabsq $-2533274790395904, %rax # imm = 0xFFF7000000000000
47+
; X64-NEXT: addq $8, %rsp
48+
; X64-NEXT: cmpq %rax, %rbx
5249
; X64-NEXT: jne .LBB0_3
5350
; X64-NEXT: # %bb.4: # %if.then2
5451
; X64-NEXT: popq %rbx
5552
; X64-NEXT: popq %r14
56-
; X64-NEXT: popq %r15
5753
; X64-NEXT: jmp ext1@PLT # TAILCALL
5854
; X64-NEXT: .LBB0_3: # %if.end3
5955
; X64-NEXT: popq %rbx
6056
; X64-NEXT: popq %r14
61-
; X64-NEXT: popq %r15
6257
; X64-NEXT: retq
6358
entry:
6459
%shr.mask.i = and i64 %a, -281474976710656

0 commit comments

Comments
 (0)