Skip to content

Commit 6df1291

Browse files
authored
[X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codegen to handle ADD/SUB as well (llvm#107612)
Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB. As SUB is not commutative, we have to be more careful and only accept LHS matches.
1 parent 78cf9b8 commit 6df1291

File tree

3 files changed

+85
-85
lines changed

3 files changed

+85
-85
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24084,18 +24084,32 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
2408424084
if (!CmpVT.isScalarInteger() || !VT.isScalarInteger())
2408524085
return SDValue();
2408624086

24087-
// Convert OR/XOR 'identity' patterns (iff X is 0 or 1):
24088-
// select (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
24089-
// select (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
24087+
// Convert 'identity' patterns (iff X is 0 or 1):
24088+
// SELECT (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
24089+
// SELECT (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
24090+
// SELECT (X != 0), Y, (ADD Y, Z) -> (ADD Y, (AND (0 - X), Z))
24091+
// SELECT (X != 0), Y, (SUB Y, Z) -> (SUB Y, (AND (0 - X), Z))
2409024092
if (!Subtarget.canUseCMOV() && X86CC == X86::COND_E &&
2409124093
CmpVal.getOpcode() == ISD::AND && isOneConstant(CmpVal.getOperand(1))) {
2409224094
SDValue Src1, Src2;
2409324095
auto isIdentityPattern = [&]() {
24094-
if ((RHS.getOpcode() == ISD::XOR || RHS.getOpcode() == ISD::OR) &&
24095-
(RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS)) {
24096-
Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
24097-
Src2 = LHS;
24098-
return true;
24096+
switch (RHS.getOpcode()) {
24097+
case ISD::OR:
24098+
case ISD::XOR:
24099+
case ISD::ADD:
24100+
if (RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS) {
24101+
Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
24102+
Src2 = LHS;
24103+
return true;
24104+
}
24105+
break;
24106+
case ISD::SUB:
24107+
if (RHS.getOperand(0) == LHS) {
24108+
Src1 = RHS.getOperand(1);
24109+
Src2 = LHS;
24110+
return true;
24111+
}
24112+
break;
2409924113
}
2410024114
return false;
2410124115
};
@@ -24113,7 +24127,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
2411324127
DAG.getConstant(1, DL, VT));
2411424128
SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
2411524129
SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
24116-
return DAG.getNode(RHS.getOpcode(), DL, VT, And, Src2); // And Op y
24130+
return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And); // y Op And
2411724131
}
2411824132
}
2411924133

llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -191,12 +191,11 @@ define i32 @add_signbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
191191
; X86-LABEL: add_signbit_select_shl:
192192
; X86: # %bb.0:
193193
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
194-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
195-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
196-
; X86-NEXT: je .LBB6_2
197-
; X86-NEXT: # %bb.1:
198-
; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
199-
; X86-NEXT: .LBB6_2:
194+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
195+
; X86-NEXT: andl $1, %eax
196+
; X86-NEXT: negl %eax
197+
; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000
198+
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
200199
; X86-NEXT: shll $8, %eax
201200
; X86-NEXT: movl %eax, (%ecx)
202201
; X86-NEXT: retl
@@ -220,12 +219,11 @@ define i32 @add_nosignbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
220219
; X86-LABEL: add_nosignbit_select_shl:
221220
; X86: # %bb.0:
222221
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
223-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
224-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
225-
; X86-NEXT: je .LBB7_2
226-
; X86-NEXT: # %bb.1:
227-
; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
228-
; X86-NEXT: .LBB7_2:
222+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
223+
; X86-NEXT: andl $1, %eax
224+
; X86-NEXT: negl %eax
225+
; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000
226+
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
229227
; X86-NEXT: shll $8, %eax
230228
; X86-NEXT: movl %eax, (%ecx)
231229
; X86-NEXT: retl
@@ -425,12 +423,11 @@ define i32 @add_signbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
425423
; X86-LABEL: add_signbit_select_lshr:
426424
; X86: # %bb.0:
427425
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
428-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
429-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
430-
; X86-NEXT: je .LBB14_2
431-
; X86-NEXT: # %bb.1:
432-
; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
433-
; X86-NEXT: .LBB14_2:
426+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
427+
; X86-NEXT: andl $1, %eax
428+
; X86-NEXT: negl %eax
429+
; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
430+
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
434431
; X86-NEXT: shrl $8, %eax
435432
; X86-NEXT: movl %eax, (%ecx)
436433
; X86-NEXT: retl
@@ -454,12 +451,11 @@ define i32 @add_nosignbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
454451
; X86-LABEL: add_nosignbit_select_lshr:
455452
; X86: # %bb.0:
456453
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
457-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
458-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
459-
; X86-NEXT: je .LBB15_2
460-
; X86-NEXT: # %bb.1:
461-
; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
462-
; X86-NEXT: .LBB15_2:
454+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
455+
; X86-NEXT: andl $1, %eax
456+
; X86-NEXT: negl %eax
457+
; X86-NEXT: andl $2147418112, %eax # imm = 0x7FFF0000
458+
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
463459
; X86-NEXT: shrl $8, %eax
464460
; X86-NEXT: movl %eax, (%ecx)
465461
; X86-NEXT: retl
@@ -659,12 +655,11 @@ define i32 @add_signbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
659655
; X86-LABEL: add_signbit_select_ashr:
660656
; X86: # %bb.0:
661657
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
662-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
663-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
664-
; X86-NEXT: je .LBB22_2
665-
; X86-NEXT: # %bb.1:
666-
; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
667-
; X86-NEXT: .LBB22_2:
658+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
659+
; X86-NEXT: andl $1, %eax
660+
; X86-NEXT: negl %eax
661+
; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
662+
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
668663
; X86-NEXT: sarl $8, %eax
669664
; X86-NEXT: movl %eax, (%ecx)
670665
; X86-NEXT: retl
@@ -688,12 +683,11 @@ define i32 @add_nosignbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
688683
; X86-LABEL: add_nosignbit_select_ashr:
689684
; X86: # %bb.0:
690685
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
691-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
692-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
693-
; X86-NEXT: je .LBB23_2
694-
; X86-NEXT: # %bb.1:
695-
; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
696-
; X86-NEXT: .LBB23_2:
686+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
687+
; X86-NEXT: andl $1, %eax
688+
; X86-NEXT: negl %eax
689+
; X86-NEXT: andl $2147418112, %eax # imm = 0x7FFF0000
690+
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
697691
; X86-NEXT: sarl $8, %eax
698692
; X86-NEXT: movl %eax, (%ecx)
699693
; X86-NEXT: retl

llvm/test/CodeGen/X86/select.ll

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,11 +1725,10 @@ define i32 @select_add(i32 %A, i32 %B, i8 %cond) {
17251725
;
17261726
; MCU-LABEL: select_add:
17271727
; MCU: # %bb.0: # %entry
1728-
; MCU-NEXT: testb $1, %cl
1729-
; MCU-NEXT: je .LBB36_2
1730-
; MCU-NEXT: # %bb.1: # %entry
1731-
; MCU-NEXT: addl %edx, %eax
1732-
; MCU-NEXT: .LBB36_2: # %entry
1728+
; MCU-NEXT: andl $1, %ecx
1729+
; MCU-NEXT: negl %ecx
1730+
; MCU-NEXT: andl %edx, %ecx
1731+
; MCU-NEXT: addl %ecx, %eax
17331732
; MCU-NEXT: retl
17341733
entry:
17351734
%and = and i8 %cond, 1
@@ -1773,11 +1772,10 @@ define i32 @select_add_b(i32 %A, i32 %B, i8 %cond) {
17731772
;
17741773
; MCU-LABEL: select_add_b:
17751774
; MCU: # %bb.0: # %entry
1776-
; MCU-NEXT: testb $1, %cl
1777-
; MCU-NEXT: je .LBB37_2
1778-
; MCU-NEXT: # %bb.1:
1779-
; MCU-NEXT: addl %edx, %eax
1780-
; MCU-NEXT: .LBB37_2: # %entry
1775+
; MCU-NEXT: andl $1, %ecx
1776+
; MCU-NEXT: negl %ecx
1777+
; MCU-NEXT: andl %edx, %ecx
1778+
; MCU-NEXT: addl %ecx, %eax
17811779
; MCU-NEXT: retl
17821780
entry:
17831781
%and = and i8 %cond, 1
@@ -1819,11 +1817,10 @@ define i32 @select_add_1(i32 %A, i32 %B, i32 %cond) {
18191817
;
18201818
; MCU-LABEL: select_add_1:
18211819
; MCU: # %bb.0: # %entry
1822-
; MCU-NEXT: testb $1, %cl
1823-
; MCU-NEXT: je .LBB38_2
1824-
; MCU-NEXT: # %bb.1: # %entry
1825-
; MCU-NEXT: addl %edx, %eax
1826-
; MCU-NEXT: .LBB38_2: # %entry
1820+
; MCU-NEXT: andl $1, %ecx
1821+
; MCU-NEXT: negl %ecx
1822+
; MCU-NEXT: andl %edx, %ecx
1823+
; MCU-NEXT: addl %ecx, %eax
18271824
; MCU-NEXT: retl
18281825
entry:
18291826
%and = and i32 %cond, 1
@@ -1867,11 +1864,10 @@ define i32 @select_add_1b(i32 %A, i32 %B, i32 %cond) {
18671864
;
18681865
; MCU-LABEL: select_add_1b:
18691866
; MCU: # %bb.0: # %entry
1870-
; MCU-NEXT: testb $1, %cl
1871-
; MCU-NEXT: je .LBB39_2
1872-
; MCU-NEXT: # %bb.1:
1873-
; MCU-NEXT: addl %edx, %eax
1874-
; MCU-NEXT: .LBB39_2: # %entry
1867+
; MCU-NEXT: andl $1, %ecx
1868+
; MCU-NEXT: negl %ecx
1869+
; MCU-NEXT: andl %edx, %ecx
1870+
; MCU-NEXT: addl %ecx, %eax
18751871
; MCU-NEXT: retl
18761872
entry:
18771873
%and = and i32 %cond, 1
@@ -1901,11 +1897,10 @@ define i32 @select_sub(i32 %A, i32 %B, i8 %cond) {
19011897
;
19021898
; MCU-LABEL: select_sub:
19031899
; MCU: # %bb.0: # %entry
1904-
; MCU-NEXT: testb $1, %cl
1905-
; MCU-NEXT: je .LBB40_2
1906-
; MCU-NEXT: # %bb.1: # %entry
1907-
; MCU-NEXT: subl %eax, %edx
1908-
; MCU-NEXT: .LBB40_2: # %entry
1900+
; MCU-NEXT: andl $1, %ecx
1901+
; MCU-NEXT: negl %ecx
1902+
; MCU-NEXT: andl %eax, %ecx
1903+
; MCU-NEXT: subl %ecx, %edx
19091904
; MCU-NEXT: movl %edx, %eax
19101905
; MCU-NEXT: retl
19111906
entry:
@@ -1938,11 +1933,10 @@ define i32 @select_sub_b(i32 %A, i32 %B, i8 %cond) {
19381933
;
19391934
; MCU-LABEL: select_sub_b:
19401935
; MCU: # %bb.0: # %entry
1941-
; MCU-NEXT: testb $1, %cl
1942-
; MCU-NEXT: je .LBB41_2
1943-
; MCU-NEXT: # %bb.1:
1944-
; MCU-NEXT: subl %eax, %edx
1945-
; MCU-NEXT: .LBB41_2: # %entry
1936+
; MCU-NEXT: andl $1, %ecx
1937+
; MCU-NEXT: negl %ecx
1938+
; MCU-NEXT: andl %eax, %ecx
1939+
; MCU-NEXT: subl %ecx, %edx
19461940
; MCU-NEXT: movl %edx, %eax
19471941
; MCU-NEXT: retl
19481942
entry:
@@ -1973,11 +1967,10 @@ define i32 @select_sub_1(i32 %A, i32 %B, i32 %cond) {
19731967
;
19741968
; MCU-LABEL: select_sub_1:
19751969
; MCU: # %bb.0: # %entry
1976-
; MCU-NEXT: testb $1, %cl
1977-
; MCU-NEXT: je .LBB42_2
1978-
; MCU-NEXT: # %bb.1: # %entry
1979-
; MCU-NEXT: subl %eax, %edx
1980-
; MCU-NEXT: .LBB42_2: # %entry
1970+
; MCU-NEXT: andl $1, %ecx
1971+
; MCU-NEXT: negl %ecx
1972+
; MCU-NEXT: andl %eax, %ecx
1973+
; MCU-NEXT: subl %ecx, %edx
19811974
; MCU-NEXT: movl %edx, %eax
19821975
; MCU-NEXT: retl
19831976
entry:
@@ -2010,11 +2003,10 @@ define i32 @select_sub_1b(i32 %A, i32 %B, i32 %cond) {
20102003
;
20112004
; MCU-LABEL: select_sub_1b:
20122005
; MCU: # %bb.0: # %entry
2013-
; MCU-NEXT: testb $1, %cl
2014-
; MCU-NEXT: je .LBB43_2
2015-
; MCU-NEXT: # %bb.1:
2016-
; MCU-NEXT: subl %eax, %edx
2017-
; MCU-NEXT: .LBB43_2: # %entry
2006+
; MCU-NEXT: andl $1, %ecx
2007+
; MCU-NEXT: negl %ecx
2008+
; MCU-NEXT: andl %eax, %ecx
2009+
; MCU-NEXT: subl %ecx, %edx
20182010
; MCU-NEXT: movl %edx, %eax
20192011
; MCU-NEXT: retl
20202012
entry:

0 commit comments

Comments
 (0)