-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][APX] Support peephole optimization with CCMP instruction #129994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This extends `opitimizeCompareInstr` to re-use previous CCMP results if the previous comparison was with an immediates that was 1 bigger or smaller. Example: ``` CCMP x, 13, 2, 5 ... CCMP x, 12, 2, 5 ; can be removed if we change the SETg SETg ... ; x > 12 changed to SETge (x >= 13) & remove the 2nd CCMP ```
@llvm/pr-subscribers-backend-x86 Author: Feng Zou (fzou1) ChangesThis extends
Full diff: https://github.com/llvm/llvm-project/pull/129994.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86InstrConditionalCompare.td b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
index 35af8405f1abe..ba8cf6cc3bc67 100644
--- a/llvm/lib/Target/X86/X86InstrConditionalCompare.td
+++ b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
@@ -36,7 +36,7 @@ class Ctest<bits<8> o, Format f, X86TypeInfo t, DAGOperand op1, DAGOperand op2>:
//===----------------------------------------------------------------------===//
// CCMP Instructions
//
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteALU], isCompare = 1 in {
def CCMP8rr : Ccmp<0x38, MRMDestReg, Xi8, GR8, GR8>;
def CCMP16rr: Ccmp<0x39, MRMDestReg, Xi16, GR16, GR16>, PD;
def CCMP32rr: Ccmp<0x39, MRMDestReg, Xi32, GR32, GR32>;
@@ -55,7 +55,7 @@ let SchedRW = [WriteALU] in {
def CCMP64ri32: Ccmp<0x81, MRM7r, Xi64, GR64, i64i32imm>;
}
-let mayLoad = 1 in {
+let mayLoad = 1, isCompare = 1 in {
let SchedRW = [WriteALU.Folded] in {
def CCMP16mi8: Ccmp<0x83, MRM7m, Xi16, i16mem, i16i8imm>, PD;
def CCMP32mi8: Ccmp<0x83, MRM7m, Xi32, i32mem, i32i8imm>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5fe7203c052d8..0b741338934b8 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4854,6 +4854,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
case X86::CMP32ri:
case X86::CMP16ri:
case X86::CMP8ri:
+ case X86::CCMP64ri32:
+ case X86::CCMP32ri:
+ case X86::CCMP16ri:
+ case X86::CCMP8ri:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
if (MI.getOperand(1).isImm()) {
@@ -4951,6 +4955,16 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
}
return false;
}
+ case X86::CCMP64ri32:
+ case X86::CCMP32ri:
+ case X86::CCMP16ri:
+ case X86::CCMP8ri:
+ // The CCMP instruction should not be optimized if the scc/dfv in it is not
+ // same as the one in previous CCMP instruction.
+ if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() ||
+ (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
+ return false;
+ [[fallthrough]];
case X86::CMP64ri32:
case X86::CMP32ri:
case X86::CMP16ri:
diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
new file mode 100644
index 0000000000000..1b5ecdfc40e8f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
@@ -0,0 +1,312 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass peephole-opt | FileCheck %s
+
+---
+name: opt_redundant_flags_adjusted_imm_0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_0
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 == 1
+ CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP can be removed.
+ CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 >=s 2 --> %0 >s 1
+ $bl = SETCCr 13, implicit $eflags
+ ; %0 >=u 2 --> %0 >u 1
+ $bl = SETCCr 3, implicit $eflags
+ ; %0 <s 2 --> %0 <=s 1
+ $bl = SETCCr 12, implicit $eflags
+ ; %0 <u 2 --> %0 <=u 1
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_1
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 42, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 5, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 != 42
+ CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 5, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP can be removed.
+ CCMP64ri32 %0, 41, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 >s 41 --> %0 >=s 42
+ $bl = SETCCr 15, implicit $eflags
+ ; %0 >u 41 --> %0 >=u 42
+ $bl = SETCCr 7, implicit $eflags
+ ; %0 <=s 41 --> %0 <s 42
+ $bl = SETCCr 14, implicit $eflags
+ ; %0 <=u 41 --> %0 <u 42
+ $bl = SETCCr 6, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_0
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 42, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 41, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 <s 1
+ CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP64ri32 %0, 41, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 == 41
+ $bl = SETCCr 4, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_1
+ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ %0:gr32 = COPY $esi
+ ; CCMP+SETCC %0 == INT32_MAX
+ CCMP32ri %0, 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 <s INT32_MIN
+ $bl = SETCCr 12, implicit $eflags
+
+ CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 12, implicit $eflags
+
+ CCMP32ri %0, 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 13, implicit $eflags
+
+ CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 2, implicit $eflags
+
+ CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 3, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_2
+ ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $cx
+ ; CHECK-NEXT: CCMP16ri [[COPY]], -32768, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], -32768, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+ %0:gr16 = COPY $cx
+ ; CCMP+SETCC %0 == INT16_MIN
+ CCMP16ri %0, -32768, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 >s INT16_MAX
+ $bl = SETCCr 15, implicit $eflags
+
+ CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 15, implicit $eflags
+
+ CCMP16ri %0, -32768, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 14, implicit $eflags
+
+ CCMP16ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 4, implicit $eflags
+
+ CCMP16ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 6, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_3
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_3
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 7, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 2, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 == 1
+ CCMP64ri32 %0, 1, 2, 7, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP should not be removed as the scc and dfv is
+ ; different.
+ CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 13, implicit $eflags
+ $bl = SETCCr 3, implicit $eflags
+ $bl = SETCCr 12, implicit $eflags
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_4
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_4
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 5, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 2, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 == 1
+ CCMP64ri32 %0, 1, 5, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP should not be removed as the scc and dfv is
+ ; different.
+ CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 13, implicit $eflags
+ $bl = SETCCr 3, implicit $eflags
+ $bl = SETCCr 12, implicit $eflags
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_adjusted_imm_multiple_blocks
+body: |
+ ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 20, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JCC_1 %bb.2, 15, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET 0
+ bb.0:
+ %0:gr32 = COPY $eax
+ CCMP32ri %0, 20, 2, 5, implicit-def $eflags, implicit $eflags
+ JCC_1 %bb.1, 4, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.1:
+ CCMP32ri %0, 21, 2, 5, implicit-def $eflags, implicit $eflags
+ JCC_1 %bb.2, 13, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.2:
+ JMP_1 %bb.3
+
+ bb.3:
+ RET 0
+...
+---
+name: opt_adjusted_imm_multiple_blocks_noopt
+body: |
+ ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks_noopt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 20, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 21, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: JCC_1 %bb.2, 13, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET 0
+ bb.0:
+ %0:gr32 = COPY $eax
+ CCMP32ri %0, 20, 2, 5, implicit-def $eflags, implicit $eflags
+ JCC_1 %bb.1, 4, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.1:
+ CCMP32ri %0, 21, 2, 5, implicit-def $eflags, implicit $eflags
+ JCC_1 %bb.2, 13, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.2:
+ liveins: $eflags
+ $al = SETCCr 4, implicit $eflags
+
+ bb.3:
+ RET 0
+...
|
I think https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/X86/apx/ccmp.ll should reflect your change? |
Check scc and dfv are same if FlagI is CCMP instruction and add more LIT tests.
For now, there is no such pattern in that LIT test. I'll add some sub-tests to validate this optimization. |
llvm/lib/Target/X86/X86InstrInfo.cpp
Outdated
if (Opcode == X86::CCMP64ri32 || Opcode == X86::CCMP32ri || | ||
Opcode == X86::CCMP16ri || Opcode == X86::CCMP8ri) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need FlagI.getOpcode() == OI.getOpcode()
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. Updated. It can prevent incorrect optimization (see CCMP + CMP combination in opt_redundant_flags_adjusted_imm_noopt_5 MIR sub-test). Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't prevent CCMP + CMP, just e.g., CCMP64ri32 + CCMP16ri. I don't know if we have this pattern in real workload.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
; CCMP+SETCC %0 != 42 | ||
CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags | ||
$cl = SETCCr 5, implicit $eflags | ||
; CCMP+SETCC %0 >= 2; CCMP can be removed. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment here is incorrect?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch. Updated it as well as the comments in llvm/test/CodeGen/X86/optimize-compare.mir test. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
llvm#129994)" This reverts commit 7ae7585. There is a problem with peephole optimization for CCMP instruction. See the example as below: C source code: ` if (a > 2 || (b && (a == 2))) { … } ` MIR before peephole optimization: ` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 CCMP32ri %30:gr32, 3, 0, 5, implicit-def $eflags, implicit $eflags // a > 3 JCC_1 %bb.6, 2, implicit $eflags JMP_1 %bb.3 ` Inputs: ` a = 1, b = 0. ` With the inputs above, the expected behavior is to jump to %bb.6 BB. After TEST8rr instruction being executed with b(%21) == 0, the ZF bit is set to 1 in eflags, so the eflags doesn't satisfy SCC condition in the following CCMP32ri instruction (for a==2 condition) which skips compare a(%30) with 2 and set flags in its payload to 0x202 (ZF = 0). The eflags satisfies the SCC condition in the 2nd CCMP32ri instruction which compares a(%30) with 3. It sets CF to 1 in eflags and the JCC instruction jumps to %bb.6 BB. But after adding CCMP support, peephole optimization eliminates the 2nd CCMP32ri instruction and updates the condition of JCC instruction to "BE" from "B". With same inputs, JCC instruction will fall through to the next instruction. It's not expected and the peephole optimization for CCMP instruction is not correct. ` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 JCC_1 %bb.6, 6, implicit $eflags JMP_1 %bb.3 `
#129994)" (#136796) This reverts commit 7ae7585. There is a problem with peephole optimization for CCMP instruction. See the example below: C source code: ``` if (a > 2 || (b && (a == 2))) { … } ``` MIR before peephole optimization: ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 CCMP32ri %30:gr32, 3, 0, 5, implicit-def $eflags, implicit $eflags // a > 2 (transformed to a < 3) JCC_1 %bb.6, 2, implicit $eflags JMP_1 %bb.3 ``` Inputs: ``` a = 1, b = 0. ``` With the inputs above, the expected behavior is to jump to %bb.6 BB. After TEST8rr instruction being executed with b(%21) == 0, the ZF bit is set to 1 in eflags, so the eflags doesn't satisfy SCC condition in the following CCMP32ri instruction (for a==2 condition) which skips compare a(%30) with 2 and set flags in its payload to 0x202 (ZF = 0). The eflags satisfies the SCC condition in the 2nd CCMP32ri instruction which compares a(%30) with 3. It sets CF to 1 in eflags and the JCC instruction jumps to %bb.6 BB. But after adding CCMP support, peephole optimization eliminates the 2nd CCMP32ri instruction and updates the condition of JCC instruction to "BE" from "B". With the same inputs, JCC instruction falls through to the next instruction. It's not expected and the 2nd CCMP32ri should not be eliminated. ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 JCC_1 %bb.6, 6, implicit $eflags JMP_1 %bb.3 ```
llvm#129994)" (llvm#136796) This reverts commit 7ae7585. There is a problem with peephole optimization for CCMP instruction. See the example below: C source code: ``` if (a > 2 || (b && (a == 2))) { … } ``` MIR before peephole optimization: ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 CCMP32ri %30:gr32, 3, 0, 5, implicit-def $eflags, implicit $eflags // a > 2 (transformed to a < 3) JCC_1 %bb.6, 2, implicit $eflags JMP_1 %bb.3 ``` Inputs: ``` a = 1, b = 0. ``` With the inputs above, the expected behavior is to jump to %bb.6 BB. After TEST8rr instruction being executed with b(%21) == 0, the ZF bit is set to 1 in eflags, so the eflags doesn't satisfy SCC condition in the following CCMP32ri instruction (for a==2 condition) which skips compare a(%30) with 2 and set flags in its payload to 0x202 (ZF = 0). The eflags satisfies the SCC condition in the 2nd CCMP32ri instruction which compares a(%30) with 3. It sets CF to 1 in eflags and the JCC instruction jumps to %bb.6 BB. But after adding CCMP support, peephole optimization eliminates the 2nd CCMP32ri instruction and updates the condition of JCC instruction to "BE" from "B". With the same inputs, JCC instruction falls through to the next instruction. It's not expected and the 2nd CCMP32ri should not be eliminated. ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 JCC_1 %bb.6, 6, implicit $eflags JMP_1 %bb.3 ```
llvm#129994)" (llvm#136796) This reverts commit 7ae7585. There is a problem with peephole optimization for CCMP instruction. See the example below: C source code: ``` if (a > 2 || (b && (a == 2))) { … } ``` MIR before peephole optimization: ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 CCMP32ri %30:gr32, 3, 0, 5, implicit-def $eflags, implicit $eflags // a > 2 (transformed to a < 3) JCC_1 %bb.6, 2, implicit $eflags JMP_1 %bb.3 ``` Inputs: ``` a = 1, b = 0. ``` With the inputs above, the expected behavior is to jump to %bb.6 BB. After TEST8rr instruction being executed with b(%21) == 0, the ZF bit is set to 1 in eflags, so the eflags doesn't satisfy SCC condition in the following CCMP32ri instruction (for a==2 condition) which skips compare a(%30) with 2 and set flags in its payload to 0x202 (ZF = 0). The eflags satisfies the SCC condition in the 2nd CCMP32ri instruction which compares a(%30) with 3. It sets CF to 1 in eflags and the JCC instruction jumps to %bb.6 BB. But after adding CCMP support, peephole optimization eliminates the 2nd CCMP32ri instruction and updates the condition of JCC instruction to "BE" from "B". With the same inputs, JCC instruction falls through to the next instruction. It's not expected and the 2nd CCMP32ri should not be eliminated. ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 JCC_1 %bb.6, 6, implicit $eflags JMP_1 %bb.3 ```
llvm#129994)" (llvm#136796) This reverts commit 7ae7585. There is a problem with peephole optimization for CCMP instruction. See the example below: C source code: ``` if (a > 2 || (b && (a == 2))) { … } ``` MIR before peephole optimization: ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 CCMP32ri %30:gr32, 3, 0, 5, implicit-def $eflags, implicit $eflags // a > 2 (transformed to a < 3) JCC_1 %bb.6, 2, implicit $eflags JMP_1 %bb.3 ``` Inputs: ``` a = 1, b = 0. ``` With the inputs above, the expected behavior is to jump to %bb.6 BB. After TEST8rr instruction being executed with b(%21) == 0, the ZF bit is set to 1 in eflags, so the eflags doesn't satisfy SCC condition in the following CCMP32ri instruction (for a==2 condition) which skips compare a(%30) with 2 and set flags in its payload to 0x202 (ZF = 0). The eflags satisfies the SCC condition in the 2nd CCMP32ri instruction which compares a(%30) with 3. It sets CF to 1 in eflags and the JCC instruction jumps to %bb.6 BB. But after adding CCMP support, peephole optimization eliminates the 2nd CCMP32ri instruction and updates the condition of JCC instruction to "BE" from "B". With the same inputs, JCC instruction falls through to the next instruction. It's not expected and the 2nd CCMP32ri should not be eliminated. ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 JCC_1 %bb.6, 6, implicit $eflags JMP_1 %bb.3 ```
llvm#129994)" (llvm#136796) This reverts commit 7ae7585. There is a problem with peephole optimization for CCMP instruction. See the example below: C source code: ``` if (a > 2 || (b && (a == 2))) { … } ``` MIR before peephole optimization: ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 CCMP32ri %30:gr32, 3, 0, 5, implicit-def $eflags, implicit $eflags // a > 2 (transformed to a < 3) JCC_1 %bb.6, 2, implicit $eflags JMP_1 %bb.3 ``` Inputs: ``` a = 1, b = 0. ``` With the inputs above, the expected behavior is to jump to %bb.6 BB. After TEST8rr instruction being executed with b(%21) == 0, the ZF bit is set to 1 in eflags, so the eflags doesn't satisfy SCC condition in the following CCMP32ri instruction (for a==2 condition) which skips compare a(%30) with 2 and set flags in its payload to 0x202 (ZF = 0). The eflags satisfies the SCC condition in the 2nd CCMP32ri instruction which compares a(%30) with 3. It sets CF to 1 in eflags and the JCC instruction jumps to %bb.6 BB. But after adding CCMP support, peephole optimization eliminates the 2nd CCMP32ri instruction and updates the condition of JCC instruction to "BE" from "B". With the same inputs, JCC instruction falls through to the next instruction. It's not expected and the 2nd CCMP32ri should not be eliminated. ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 JCC_1 %bb.6, 6, implicit $eflags JMP_1 %bb.3 ```
This extends
opitimizeCompareInstr
to re-use previous CCMP results if theprevious comparison was with an immediates that was 1 bigger or smaller. Example: