Skip to content

Commit c4d3eed

Browse files
committed
[X86] Fold nested select_cc to select (cmp*ge/le Cond0, Cond1), LHS, Y)
select (cmpeq Cond0, Cond1), LHS, (select (cmpugt Cond0, Cond1), LHS, Y) --> (select (cmpuge Cond0, Cond1), LHS, Y) etc, We already perform this fold in DAGCombiner for MVT::i1 comparison results, but these can still appear after legalization (in x86 case with MVT::i8 results), where we need to be more careful about generating new comparison codes. Pulled out of D101074 to help address the remaining regressions. Differential Revision: https://reviews.llvm.org/D104707
1 parent d5e14ba commit c4d3eed

File tree

3 files changed

+154
-148
lines changed

3 files changed

+154
-148
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41836,6 +41836,36 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
4183641836
return DAG.getSelect(DL, VT, Cond, LHS, RHS);
4183741837
}
4183841838
}
41839+
41840+
// Similar to DAGCombine's select(or(CC0,CC1),X,Y) fold but for legal types.
41841+
// fold eq + gt/lt nested selects into ge/le selects
41842+
// select (cmpeq Cond0, Cond1), LHS, (select (cmpugt Cond0, Cond1), LHS, Y)
41843+
// --> (select (cmpuge Cond0, Cond1), LHS, Y)
41844+
// select (cmpslt Cond0, Cond1), LHS, (select (cmpeq Cond0, Cond1), LHS, Y)
41845+
// --> (select (cmpsle Cond0, Cond1), LHS, Y)
41846+
// .. etc ..
41847+
if (RHS.getOpcode() == ISD::SELECT && RHS.getOperand(1) == LHS &&
41848+
RHS.getOperand(0).getOpcode() == ISD::SETCC) {
41849+
SDValue InnerSetCC = RHS.getOperand(0);
41850+
ISD::CondCode InnerCC =
41851+
cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get();
41852+
if ((CC == ISD::SETEQ || InnerCC == ISD::SETEQ) &&
41853+
Cond0 == InnerSetCC.getOperand(0) &&
41854+
Cond1 == InnerSetCC.getOperand(1)) {
41855+
ISD::CondCode NewCC;
41856+
switch (CC == ISD::SETEQ ? InnerCC : CC) {
41857+
case ISD::SETGT: NewCC = ISD::SETGE; break;
41858+
case ISD::SETLT: NewCC = ISD::SETLE; break;
41859+
case ISD::SETUGT: NewCC = ISD::SETUGE; break;
41860+
case ISD::SETULT: NewCC = ISD::SETULE; break;
41861+
default: NewCC = ISD::SETCC_INVALID; break;
41862+
}
41863+
if (NewCC != ISD::SETCC_INVALID) {
41864+
Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC);
41865+
return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2));
41866+
}
41867+
}
41868+
}
4183941869
}
4184041870

4184141871
// Check if the first operand is all zeros and Cond type is vXi1.

llvm/test/CodeGen/X86/sdiv_fix_sat.ll

Lines changed: 58 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -313,50 +313,48 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
313313
; X64-NEXT: movq %rsi, (%rsp) # 8-byte Spill
314314
; X64-NEXT: movq %rdi, %r15
315315
; X64-NEXT: leaq (%rdi,%rdi), %rax
316-
; X64-NEXT: movq %rdi, %rbx
317-
; X64-NEXT: sarq $63, %rbx
318-
; X64-NEXT: shldq $31, %rax, %rbx
319-
; X64-NEXT: shlq $32, %r15
320-
; X64-NEXT: movq %rsi, %r12
316+
; X64-NEXT: movq %rdi, %r12
321317
; X64-NEXT: sarq $63, %r12
318+
; X64-NEXT: shldq $31, %rax, %r12
319+
; X64-NEXT: shlq $32, %r15
320+
; X64-NEXT: movq %rsi, %r13
321+
; X64-NEXT: sarq $63, %r13
322322
; X64-NEXT: movq %r15, %rdi
323-
; X64-NEXT: movq %rbx, %rsi
324-
; X64-NEXT: movq %r12, %rcx
323+
; X64-NEXT: movq %r12, %rsi
324+
; X64-NEXT: movq %r13, %rcx
325325
; X64-NEXT: callq __divti3@PLT
326-
; X64-NEXT: movq %rax, %r13
326+
; X64-NEXT: movq %rax, %rbx
327327
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
328328
; X64-NEXT: movq %rdx, %rbp
329329
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
330-
; X64-NEXT: subq $1, %r13
330+
; X64-NEXT: subq $1, %rbx
331331
; X64-NEXT: sbbq $0, %rbp
332-
; X64-NEXT: testq %rbx, %rbx
333-
; X64-NEXT: sets %al
334332
; X64-NEXT: testq %r12, %r12
333+
; X64-NEXT: sets %al
334+
; X64-NEXT: testq %r13, %r13
335335
; X64-NEXT: sets %r14b
336336
; X64-NEXT: xorb %al, %r14b
337337
; X64-NEXT: movq %r15, %rdi
338-
; X64-NEXT: movq %rbx, %rsi
338+
; X64-NEXT: movq %r12, %rsi
339339
; X64-NEXT: movq (%rsp), %rdx # 8-byte Reload
340-
; X64-NEXT: movq %r12, %rcx
340+
; X64-NEXT: movq %r13, %rcx
341341
; X64-NEXT: callq __modti3@PLT
342342
; X64-NEXT: orq %rax, %rdx
343343
; X64-NEXT: setne %al
344344
; X64-NEXT: testb %r14b, %al
345345
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
346-
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
346+
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
347347
; X64-NEXT: xorl %eax, %eax
348348
; X64-NEXT: testq %rbp, %rbp
349-
; X64-NEXT: movq $-1, %rcx
350-
; X64-NEXT: movq $-1, %rdx
351-
; X64-NEXT: cmovsq %r13, %rdx
352-
; X64-NEXT: cmoveq %r13, %rdx
353349
; X64-NEXT: cmovnsq %rax, %rbp
350+
; X64-NEXT: movq $-1, %rcx
351+
; X64-NEXT: cmovgq %rcx, %rbx
354352
; X64-NEXT: testq %rbp, %rbp
355353
; X64-NEXT: cmovnsq %rbp, %rcx
356-
; X64-NEXT: cmovnsq %rdx, %rax
357-
; X64-NEXT: cmpq $-1, %rbp
358-
; X64-NEXT: cmoveq %rdx, %rax
359-
; X64-NEXT: shrdq $1, %rcx, %rax
354+
; X64-NEXT: cmpq $-2, %rbp
355+
; X64-NEXT: cmovleq %rax, %rbx
356+
; X64-NEXT: shrdq $1, %rcx, %rbx
357+
; X64-NEXT: movq %rbx, %rax
360358
; X64-NEXT: addq $24, %rsp
361359
; X64-NEXT: popq %rbx
362360
; X64-NEXT: popq %r12
@@ -402,18 +400,19 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
402400
; X86-NEXT: pushl %eax
403401
; X86-NEXT: calll __divti3
404402
; X86-NEXT: addl $32, %esp
405-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
406-
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
407-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
408-
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
403+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
404+
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
409405
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
410406
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
411-
; X86-NEXT: subl $1, %eax
412-
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
413-
; X86-NEXT: sbbl $0, %esi
407+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
408+
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
409+
; X86-NEXT: subl $1, %esi
414410
; X86-NEXT: movl %ecx, %eax
415411
; X86-NEXT: sbbl $0, %eax
416412
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
413+
; X86-NEXT: movl %edx, %eax
414+
; X86-NEXT: sbbl $0, %eax
415+
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
417416
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
418417
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
419418
; X86-NEXT: sbbl $0, %ebx
@@ -446,51 +445,50 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
446445
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
447446
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
448447
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
449-
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
450-
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
451-
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
452448
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
449+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
450+
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
453451
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
454452
; X86-NEXT: testl %ebx, %ebx
455-
; X86-NEXT: movl $0, %edx
456-
; X86-NEXT: cmovsl %ebx, %edx
457-
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
458-
; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
459-
; X86-NEXT: cmovsl %esi, %edx
460-
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
461-
; X86-NEXT: movl $-1, %edx
462-
; X86-NEXT: cmovsl %eax, %edx
453+
; X86-NEXT: movl $0, %eax
454+
; X86-NEXT: cmovsl %ebx, %eax
455+
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
456+
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
457+
; X86-NEXT: cmovsl %edx, %eax
458+
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
459+
; X86-NEXT: movl $-1, %eax
460+
; X86-NEXT: cmovsl %esi, %eax
463461
; X86-NEXT: movl %ebx, %edi
464462
; X86-NEXT: sarl $31, %edi
465463
; X86-NEXT: andl %ecx, %edi
466464
; X86-NEXT: testl %ebx, %ebx
467465
; X86-NEXT: cmovel %ebx, %edi
468-
; X86-NEXT: cmpl $2147483647, %esi # imm = 0x7FFFFFFF
469-
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
470-
; X86-NEXT: cmovael %eax, %esi
471-
; X86-NEXT: movl $-1, %eax
472-
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
473-
; X86-NEXT: cmovbl %ecx, %eax
474-
; X86-NEXT: cmovel %ecx, %eax
466+
; X86-NEXT: movl %edx, %ecx
467+
; X86-NEXT: cmpl $2147483647, %edx # imm = 0x7FFFFFFF
468+
; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
469+
; X86-NEXT: cmovbl %ecx, %edx
470+
; X86-NEXT: testl %ecx, %ecx
471+
; X86-NEXT: movl $-1, %ecx
472+
; X86-NEXT: cmovsl %ecx, %esi
475473
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
476-
; X86-NEXT: cmovnel %edx, %eax
477-
; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
478-
; X86-NEXT: cmpl $-2147483648, %esi # imm = 0x80000000
479-
; X86-NEXT: movl $0, %ecx
480-
; X86-NEXT: cmoval %eax, %ecx
481-
; X86-NEXT: cmovel %eax, %ecx
482-
; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000
483-
; X86-NEXT: cmoval %esi, %edx
474+
; X86-NEXT: cmovnel %eax, %esi
475+
; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
476+
; X86-NEXT: cmpl $-2147483648, %edx # imm = 0x80000000
477+
; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
478+
; X86-NEXT: cmoval %edx, %eax
479+
; X86-NEXT: movl %edx, %ecx
480+
; X86-NEXT: sarl $31, %ecx
481+
; X86-NEXT: andl %esi, %ecx
484482
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
485-
; X86-NEXT: movl $0, %ebx
486-
; X86-NEXT: cmovsl %ebx, %eax
487483
; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
484+
; X86-NEXT: cmovsl %ebx, %edx
485+
; X86-NEXT: movl $0, %ebx
488486
; X86-NEXT: cmovsl %ebx, %esi
489487
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
490488
; X86-NEXT: cmpl $-1, %edi
491-
; X86-NEXT: cmovel %edx, %esi
492-
; X86-NEXT: cmovel %ecx, %eax
493-
; X86-NEXT: movl %esi, %edx
489+
; X86-NEXT: cmovel %ecx, %esi
490+
; X86-NEXT: cmovel %eax, %edx
491+
; X86-NEXT: movl %esi, %eax
494492
; X86-NEXT: leal -12(%ebp), %esp
495493
; X86-NEXT: popl %esi
496494
; X86-NEXT: popl %edi

0 commit comments

Comments
 (0)