Skip to content

Commit ab17ed0

Browse files
committed
[X86] Don't fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y)) on BMI2 targets
With BMI2 we have SHRX which is a lot quicker than regular x86 shifts. Fixes #55138
1 parent 181dcbd commit ab17ed0

File tree

2 files changed

+38
-19
lines changed

2 files changed

+38
-19
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47380,11 +47380,13 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
4738047380
Src.getOpcode() == ISD::TRUNCATE) &&
4738147381
Src.getOperand(0)->hasOneUse())
4738247382
Src = Src.getOperand(0);
47383+
bool ContainsNOT = false;
4738347384
X86::CondCode X86CC = X86::COND_B;
4738447385
// Peek through AND(NOT(SRL(X,Y)),1).
4738547386
if (isBitwiseNot(Src)) {
4738647387
Src = Src.getOperand(0);
4738747388
X86CC = X86::COND_AE;
47389+
ContainsNOT = true;
4738847390
}
4738947391
if (Src.getOpcode() == ISD::SRL &&
4739047392
!isa<ConstantSDNode>(Src.getOperand(1))) {
@@ -47394,9 +47396,12 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
4739447396
if (isBitwiseNot(Src)) {
4739547397
Src = Src.getOperand(0);
4739647398
X86CC = X86CC == X86::COND_AE ? X86::COND_B : X86::COND_AE;
47399+
ContainsNOT = true;
4739747400
}
47398-
if (SDValue BT = getBT(Src, BitNo, dl, DAG))
47399-
return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT);
47401+
// If we have BMI2 then SHRX should be faster for i32/i64 cases.
47402+
if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.getSizeInBits() >= 32))
47403+
if (SDValue BT = getBT(Src, BitNo, dl, DAG))
47404+
return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT);
4740047405
}
4740147406
}
4740247407

llvm/test/CodeGen/X86/setcc.ll

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefixes=X86
3-
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefixes=X64,X64-NOTBM
4-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+tbm | FileCheck %s --check-prefixes=X64,X64-TBM
5-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+bmi2 | FileCheck %s --check-prefixes=X64,X64-NOTBM
3+
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefixes=X64,X64-NOTBM,X64-NOBMI2
4+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+tbm | FileCheck %s --check-prefixes=X64,X64-NOBMI2,X64-TBM
5+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+bmi2 | FileCheck %s --check-prefixes=X64,X64-NOTBM,X64-BMI2
66
; rdar://7329206
77

88
define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
@@ -191,12 +191,18 @@ define i64 @t9(i32 %0, i32 %1) {
191191
; X86-NEXT: xorl %edx, %edx
192192
; X86-NEXT: retl
193193
;
194-
; X64-LABEL: t9:
195-
; X64: ## %bb.0:
196-
; X64-NEXT: xorl %eax, %eax
197-
; X64-NEXT: btl %esi, %edi
198-
; X64-NEXT: setb %al
199-
; X64-NEXT: retq
194+
; X64-NOBMI2-LABEL: t9:
195+
; X64-NOBMI2: ## %bb.0:
196+
; X64-NOBMI2-NEXT: xorl %eax, %eax
197+
; X64-NOBMI2-NEXT: btl %esi, %edi
198+
; X64-NOBMI2-NEXT: setb %al
199+
; X64-NOBMI2-NEXT: retq
200+
;
201+
; X64-BMI2-LABEL: t9:
202+
; X64-BMI2: ## %bb.0:
203+
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
204+
; X64-BMI2-NEXT: andl $1, %eax
205+
; X64-BMI2-NEXT: retq
200206
%3 = lshr i32 %0, %1
201207
%4 = and i32 %3, 1
202208
%5 = icmp ne i32 %4, 0
@@ -311,14 +317,22 @@ define i32 @PR55138(i32 %x) {
311317
; X86-NEXT: setb %al
312318
; X86-NEXT: retl
313319
;
314-
; X64-LABEL: PR55138:
315-
; X64: ## %bb.0:
316-
; X64-NEXT: andl $15, %edi
317-
; X64-NEXT: movl $27030, %ecx ## imm = 0x6996
318-
; X64-NEXT: xorl %eax, %eax
319-
; X64-NEXT: btl %edi, %ecx
320-
; X64-NEXT: setb %al
321-
; X64-NEXT: retq
320+
; X64-NOBMI2-LABEL: PR55138:
321+
; X64-NOBMI2: ## %bb.0:
322+
; X64-NOBMI2-NEXT: andl $15, %edi
323+
; X64-NOBMI2-NEXT: movl $27030, %ecx ## imm = 0x6996
324+
; X64-NOBMI2-NEXT: xorl %eax, %eax
325+
; X64-NOBMI2-NEXT: btl %edi, %ecx
326+
; X64-NOBMI2-NEXT: setb %al
327+
; X64-NOBMI2-NEXT: retq
328+
;
329+
; X64-BMI2-LABEL: PR55138:
330+
; X64-BMI2: ## %bb.0:
331+
; X64-BMI2-NEXT: andb $15, %dil
332+
; X64-BMI2-NEXT: movl $27030, %eax ## imm = 0x6996
333+
; X64-BMI2-NEXT: shrxl %edi, %eax, %eax
334+
; X64-BMI2-NEXT: andl $1, %eax
335+
; X64-BMI2-NEXT: retq
322336
%urem = and i32 %x, 15
323337
%shr = lshr i32 27030, %urem
324338
%and = and i32 %shr, 1

0 commit comments

Comments
 (0)