-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codegen to handle ADD/SUB as well #107612
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…n to handle ADD/SUB as well Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB. As SUB is not commutative, we have to be more careful and only accept LHS matches.
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesExtend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB. As SUB is not commutative, we have to be more careful and only accept LHS matches. Full diff: https://github.com/llvm/llvm-project/pull/107612.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e20633e041072f..839b87dd5d4dd8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24084,18 +24084,32 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
if (!CmpVT.isScalarInteger() || !VT.isScalarInteger())
return SDValue();
- // Convert OR/XOR 'identity' patterns (iff X is 0 or 1):
- // select (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
- // select (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
+ // Convert 'identity' patterns (iff X is 0 or 1):
+ // SELECT (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
+ // SELECT (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
+ // SELECT (X != 0), Y, (ADD Y, Z) -> (ADD Y, (AND (0 - X), Z))
+ // SELECT (X != 0), Y, (SUB Y, Z) -> (SUB Y, (AND (0 - X), Z))
if (!Subtarget.canUseCMOV() && X86CC == X86::COND_E &&
CmpVal.getOpcode() == ISD::AND && isOneConstant(CmpVal.getOperand(1))) {
SDValue Src1, Src2;
auto isIdentityPattern = [&]() {
- if ((RHS.getOpcode() == ISD::XOR || RHS.getOpcode() == ISD::OR) &&
- (RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS)) {
- Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
- Src2 = LHS;
- return true;
+ switch (RHS.getOpcode()) {
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ if (RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS) {
+ Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
+ Src2 = LHS;
+ return true;
+ }
+ break;
+ case ISD::SUB:
+ if (RHS.getOperand(0) == LHS) {
+ Src1 = RHS.getOperand(1);
+ Src2 = LHS;
+ return true;
+ }
+ break;
}
return false;
};
@@ -24113,7 +24127,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
DAG.getConstant(1, DL, VT));
SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
- return DAG.getNode(RHS.getOpcode(), DL, VT, And, Src2); // And Op y
+ return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And); // y Op And
}
}
diff --git a/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll b/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
index def4c08a3592ec..86a8a6a53248b7 100644
--- a/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
+++ b/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
@@ -191,12 +191,11 @@ define i32 @add_signbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB6_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT: .LBB6_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -220,12 +219,11 @@ define i32 @add_nosignbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB7_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT: .LBB7_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -425,12 +423,11 @@ define i32 @add_signbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_lshr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB14_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT: .LBB14_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -454,12 +451,11 @@ define i32 @add_nosignbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_lshr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB15_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT: .LBB15_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -659,12 +655,11 @@ define i32 @add_signbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_ashr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB22_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT: .LBB22_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -688,12 +683,11 @@ define i32 @add_nosignbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_ashr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB23_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT: .LBB23_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index aa7f509327002f..760c8355aa4572 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -1725,11 +1725,10 @@ define i32 @select_add(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_add:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB36_2
-; MCU-NEXT: # %bb.1: # %entry
-; MCU-NEXT: addl %edx, %eax
-; MCU-NEXT: .LBB36_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %edx, %ecx
+; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i8 %cond, 1
@@ -1773,11 +1772,10 @@ define i32 @select_add_b(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_add_b:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB37_2
-; MCU-NEXT: # %bb.1:
-; MCU-NEXT: addl %edx, %eax
-; MCU-NEXT: .LBB37_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %edx, %ecx
+; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i8 %cond, 1
@@ -1819,11 +1817,10 @@ define i32 @select_add_1(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_add_1:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB38_2
-; MCU-NEXT: # %bb.1: # %entry
-; MCU-NEXT: addl %edx, %eax
-; MCU-NEXT: .LBB38_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %edx, %ecx
+; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i32 %cond, 1
@@ -1867,11 +1864,10 @@ define i32 @select_add_1b(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_add_1b:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB39_2
-; MCU-NEXT: # %bb.1:
-; MCU-NEXT: addl %edx, %eax
-; MCU-NEXT: .LBB39_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %edx, %ecx
+; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i32 %cond, 1
@@ -1901,11 +1897,10 @@ define i32 @select_sub(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_sub:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB40_2
-; MCU-NEXT: # %bb.1: # %entry
-; MCU-NEXT: subl %eax, %edx
-; MCU-NEXT: .LBB40_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %eax, %ecx
+; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
@@ -1938,11 +1933,10 @@ define i32 @select_sub_b(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_sub_b:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB41_2
-; MCU-NEXT: # %bb.1:
-; MCU-NEXT: subl %eax, %edx
-; MCU-NEXT: .LBB41_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %eax, %ecx
+; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
@@ -1973,11 +1967,10 @@ define i32 @select_sub_1(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_sub_1:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB42_2
-; MCU-NEXT: # %bb.1: # %entry
-; MCU-NEXT: subl %eax, %edx
-; MCU-NEXT: .LBB42_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %eax, %ecx
+; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
@@ -2010,11 +2003,10 @@ define i32 @select_sub_1b(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_sub_1b:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB43_2
-; MCU-NEXT: # %bb.1:
-; MCU-NEXT: subl %eax, %edx
-; MCU-NEXT: .LBB43_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %eax, %ecx
+; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
|
// SELECT (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z)) | ||
// SELECT (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z)) | ||
// SELECT (X != 0), Y, (ADD Y, Z) -> (ADD Y, (AND (0 - X), Z)) | ||
// SELECT (X != 0), Y, (SUB Y, Z) -> (SUB Y, (AND (0 - X), Z)) | ||
if (!Subtarget.canUseCMOV() && X86CC == X86::COND_E && | ||
CmpVal.getOpcode() == ISD::AND && isOneConstant(CmpVal.getOperand(1))) { | ||
SDValue Src1, Src2; | ||
auto isIdentityPattern = [&]() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the middle end we have getBinOpIdentity
, maybe we should provide a generic helper in SelectionDAG?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, DAG has llvm::isNeutralConstant
which is similar, but no getNeutralConstant equivalent - we could add something that LowerSELECTWithCmpZero could use.
We should probably rename isNeutralConstant to isIdentityConstant as well to be consistent with other implementations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/5129 Here is the relevant piece of the build log for the reference
|
Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB.
As SUB is not commutative, we have to be more careful and only accept LHS matches.