Skip to content

Commit 4be35fd

Browse files
authored
[X86] EmitCmp - use existing XOR node to check for equality (llvm#125506)
Normally, we use the result of the SUB flag for scalar comparison as its more compatible with CMP, but if we're testing for equality and already have a XOR we can reuse that instead. Fixes #6146
1 parent 91cb8f5 commit 4be35fd

File tree

3 files changed

+29
-34
lines changed

3 files changed

+29
-34
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23157,10 +23157,17 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC,
2315723157
return Add.getValue(1);
2315823158
}
2315923159

23160-
// Use SUB instead of CMP to enable CSE between SUB and CMP.
23160+
// If we already have an XOR of the ops, use that to check for equality.
23161+
// Else use SUB instead of CMP to enable CSE between SUB and CMP.
23162+
unsigned X86Opc = X86ISD::SUB;
23163+
if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
23164+
(DAG.doesNodeExist(ISD::XOR, DAG.getVTList({CmpVT}), {Op0, Op1}) ||
23165+
DAG.doesNodeExist(ISD::XOR, DAG.getVTList({CmpVT}), {Op1, Op0})))
23166+
X86Opc = X86ISD::XOR;
23167+
2316123168
SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
23162-
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
23163-
return Sub.getValue(1);
23169+
SDValue CmpOp = DAG.getNode(X86Opc, dl, VTs, Op0, Op1);
23170+
return CmpOp.getValue(1);
2316423171
}
2316523172

2316623173
bool X86TargetLowering::isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,

llvm/test/CodeGen/X86/cmp-xor.ll

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,18 @@
99
define i32 @cmp_xor_i32(i32 %a, i32 %b, i32 %c)
1010
; X86-LABEL: cmp_xor_i32:
1111
; X86: # %bb.0:
12-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1312
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
14-
; X86-NEXT: cmpl %ecx, %eax
15-
; X86-NEXT: je .LBB0_1
16-
; X86-NEXT: # %bb.2:
17-
; X86-NEXT: xorl %ecx, %eax
18-
; X86-NEXT: retl
19-
; X86-NEXT: .LBB0_1:
13+
; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
14+
; X86-NEXT: jne .LBB0_2
15+
; X86-NEXT: # %bb.1:
2016
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
17+
; X86-NEXT: .LBB0_2:
2118
; X86-NEXT: retl
2219
;
2320
; X64-LABEL: cmp_xor_i32:
2421
; X64: # %bb.0:
2522
; X64-NEXT: movl %edi, %eax
2623
; X64-NEXT: xorl %esi, %eax
27-
; X64-NEXT: cmpl %esi, %edi
2824
; X64-NEXT: cmovel %edx, %eax
2925
; X64-NEXT: retq
3026
{
@@ -37,22 +33,18 @@ define i32 @cmp_xor_i32(i32 %a, i32 %b, i32 %c)
3733
define i32 @cmp_xor_i32_commute(i32 %a, i32 %b, i32 %c)
3834
; X86-LABEL: cmp_xor_i32_commute:
3935
; X86: # %bb.0:
40-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
4136
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
42-
; X86-NEXT: cmpl %eax, %ecx
43-
; X86-NEXT: je .LBB1_1
44-
; X86-NEXT: # %bb.2:
45-
; X86-NEXT: xorl %ecx, %eax
46-
; X86-NEXT: retl
47-
; X86-NEXT: .LBB1_1:
37+
; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
38+
; X86-NEXT: jne .LBB1_2
39+
; X86-NEXT: # %bb.1:
4840
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
41+
; X86-NEXT: .LBB1_2:
4942
; X86-NEXT: retl
5043
;
5144
; X64-LABEL: cmp_xor_i32_commute:
5245
; X64: # %bb.0:
53-
; X64-NEXT: movl %esi, %eax
54-
; X64-NEXT: xorl %edi, %eax
55-
; X64-NEXT: cmpl %esi, %edi
46+
; X64-NEXT: movl %edi, %eax
47+
; X64-NEXT: xorl %esi, %eax
5648
; X64-NEXT: cmovel %edx, %eax
5749
; X64-NEXT: retq
5850
{

llvm/test/CodeGen/X86/pr32284.ll

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -321,11 +321,9 @@ define void @f2() {
321321
; X64-NEXT: xorl %ecx, %ecx
322322
; X64-NEXT: testl %eax, %eax
323323
; X64-NEXT: sete %cl
324-
; X64-NEXT: movl %eax, %edx
325-
; X64-NEXT: xorl %ecx, %edx
326-
; X64-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
327324
; X64-NEXT: xorl %edx, %edx
328-
; X64-NEXT: cmpl %eax, %ecx
325+
; X64-NEXT: xorl %eax, %ecx
326+
; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
329327
; X64-NEXT: sete %dl
330328
; X64-NEXT: movw %dx, (%rax)
331329
; X64-NEXT: retq
@@ -366,17 +364,15 @@ define void @f2() {
366364
; X86: # %bb.0: # %entry
367365
; X86-NEXT: subl $2, %esp
368366
; X86-NEXT: .cfi_def_cfa_offset 6
369-
; X86-NEXT: movzbl var_7, %ecx
367+
; X86-NEXT: movzbl var_7, %edx
370368
; X86-NEXT: xorl %eax, %eax
371-
; X86-NEXT: testl %ecx, %ecx
369+
; X86-NEXT: testl %edx, %edx
372370
; X86-NEXT: sete %al
373-
; X86-NEXT: movl %ecx, %edx
374-
; X86-NEXT: xorl %eax, %edx
375-
; X86-NEXT: movw %dx, (%esp)
376-
; X86-NEXT: xorl %edx, %edx
377-
; X86-NEXT: cmpl %ecx, %eax
378-
; X86-NEXT: sete %dl
379-
; X86-NEXT: movw %dx, (%eax)
371+
; X86-NEXT: xorl %ecx, %ecx
372+
; X86-NEXT: xorl %edx, %eax
373+
; X86-NEXT: movw %ax, (%esp)
374+
; X86-NEXT: sete %cl
375+
; X86-NEXT: movw %cx, (%eax)
380376
; X86-NEXT: addl $2, %esp
381377
; X86-NEXT: .cfi_def_cfa_offset 4
382378
; X86-NEXT: retl

0 commit comments

Comments
 (0)