Skip to content

Commit 7a42427

Browse files
authored
Revert "[X86][APX] Support peephole optimization with CCMP instruction (#129994)" (#136796)
This reverts commit 7ae7585. There is a problem with peephole optimization for CCMP instruction. See the example below: C source code: ``` if (a > 2 || (b && (a == 2))) { … } ``` MIR before peephole optimization: ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 CCMP32ri %30:gr32, 3, 0, 5, implicit-def $eflags, implicit $eflags // a > 2 (transformed to a < 3) JCC_1 %bb.6, 2, implicit $eflags JMP_1 %bb.3 ``` Inputs: ``` a = 1, b = 0. ``` With the inputs above, the expected behavior is to jump to %bb.6 BB. After TEST8rr instruction being executed with b(%21) == 0, the ZF bit is set to 1 in eflags, so the eflags doesn't satisfy SCC condition in the following CCMP32ri instruction (for a==2 condition) which skips compare a(%30) with 2 and set flags in its payload to 0x202 (ZF = 0). The eflags satisfies the SCC condition in the 2nd CCMP32ri instruction which compares a(%30) with 3. It sets CF to 1 in eflags and the JCC instruction jumps to %bb.6 BB. But after adding CCMP support, peephole optimization eliminates the 2nd CCMP32ri instruction and updates the condition of JCC instruction to "BE" from "B". With the same inputs, JCC instruction falls through to the next instruction. It's not expected and the 2nd CCMP32ri should not be eliminated. ``` TEST8rr %21:gr8, %21:gr8, implicit-def $eflags // b CCMP32ri %30:gr32, 2, 0, 5, implicit-def $eflags, implicit $eflags // a == 2 JCC_1 %bb.6, 6, implicit $eflags JMP_1 %bb.3 ```
1 parent fb01f19 commit 7a42427

File tree

5 files changed

+3
-485
lines changed

5 files changed

+3
-485
lines changed

llvm/lib/Target/X86/X86InstrConditionalCompare.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class Ctest<bits<8> o, Format f, X86TypeInfo t, DAGOperand op1, DAGOperand op2>:
3636
//===----------------------------------------------------------------------===//
3737
// CCMP Instructions
3838
//
39-
let SchedRW = [WriteALU], isCompare = 1 in {
39+
let SchedRW = [WriteALU] in {
4040
def CCMP8rr : Ccmp<0x38, MRMDestReg, Xi8, GR8, GR8>;
4141
def CCMP16rr: Ccmp<0x39, MRMDestReg, Xi16, GR16, GR16>, PD;
4242
def CCMP32rr: Ccmp<0x39, MRMDestReg, Xi32, GR32, GR32>;
@@ -55,7 +55,7 @@ let SchedRW = [WriteALU], isCompare = 1 in {
5555
def CCMP64ri32: Ccmp<0x81, MRM7r, Xi64, GR64, i64i32imm>;
5656
}
5757

58-
let mayLoad = 1, isCompare = 1 in {
58+
let mayLoad = 1 in {
5959
let SchedRW = [WriteALU.Folded] in {
6060
def CCMP16mi8: Ccmp<0x83, MRM7m, Xi16, i16mem, i16i8imm>, PD;
6161
def CCMP32mi8: Ccmp<0x83, MRM7m, Xi32, i32mem, i32i8imm>;

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4875,10 +4875,6 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
48754875
case X86::CMP32ri:
48764876
case X86::CMP16ri:
48774877
case X86::CMP8ri:
4878-
case X86::CCMP64ri32:
4879-
case X86::CCMP32ri:
4880-
case X86::CCMP16ri:
4881-
case X86::CCMP8ri:
48824878
SrcReg = MI.getOperand(0).getReg();
48834879
SrcReg2 = 0;
48844880
if (MI.getOperand(1).isImm()) {
@@ -4976,18 +4972,6 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
49764972
}
49774973
return false;
49784974
}
4979-
case X86::CCMP64ri32:
4980-
case X86::CCMP32ri:
4981-
case X86::CCMP16ri:
4982-
case X86::CCMP8ri: {
4983-
// The CCMP instruction should not be optimized if the scc/dfv in it is not
4984-
// same as the one in previous CCMP instruction.
4985-
if ((FlagI.getOpcode() != OI.getOpcode()) ||
4986-
(OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm()) ||
4987-
(OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
4988-
return false;
4989-
[[fallthrough]];
4990-
}
49914975
case X86::CMP64ri32:
49924976
case X86::CMP32ri:
49934977
case X86::CMP16ri:

llvm/test/CodeGen/X86/apx/ccmp.ll

Lines changed: 0 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,75 +1300,5 @@ if.end: ; preds = %entry, %if.then
13001300
ret void
13011301
}
13021302

1303-
define void @ccmp_continous_adjust_imm(i32 noundef %a, i32 noundef %b) {
1304-
; CHECK-LABEL: ccmp_continous_adjust_imm:
1305-
; CHECK: # %bb.0: # %entry
1306-
; CHECK-NEXT: cmpl $2, %edi # encoding: [0x83,0xff,0x02]
1307-
; CHECK-NEXT: ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02]
1308-
; CHECK-NEXT: jg .LBB31_1 # encoding: [0x7f,A]
1309-
; CHECK-NEXT: # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1
1310-
; CHECK-NEXT: # %bb.2: # %if.then
1311-
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1312-
; CHECK-NEXT: jmp foo # TAILCALL
1313-
; CHECK-NEXT: # encoding: [0xeb,A]
1314-
; CHECK-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
1315-
; CHECK-NEXT: .LBB31_1: # %if.end
1316-
; CHECK-NEXT: retq # encoding: [0xc3]
1317-
;
1318-
; NDD-LABEL: ccmp_continous_adjust_imm:
1319-
; NDD: # %bb.0: # %entry
1320-
; NDD-NEXT: cmpl $2, %edi # encoding: [0x83,0xff,0x02]
1321-
; NDD-NEXT: ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02]
1322-
; NDD-NEXT: jg .LBB31_1 # encoding: [0x7f,A]
1323-
; NDD-NEXT: # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1
1324-
; NDD-NEXT: # %bb.2: # %if.then
1325-
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1326-
; NDD-NEXT: jmp foo # TAILCALL
1327-
; NDD-NEXT: # encoding: [0xeb,A]
1328-
; NDD-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
1329-
; NDD-NEXT: .LBB31_1: # %if.end
1330-
; NDD-NEXT: retq # encoding: [0xc3]
1331-
entry:
1332-
%cmp = icmp slt i32 %a, 2
1333-
%cmp1 = icmp slt i32 %b, 2
1334-
%or.cond = and i1 %cmp, %cmp1
1335-
%cmp3 = icmp slt i32 %b, 3
1336-
%or.cond4 = and i1 %or.cond, %cmp3
1337-
br i1 %or.cond4, label %if.then, label %if.end
1338-
1339-
if.then: ; preds = %entry
1340-
tail call void (...) @foo()
1341-
br label %if.end
1342-
1343-
if.end: ; preds = %if.then, %entry
1344-
ret void
1345-
}
1346-
1347-
define i32 @ccmp_continous_nobranch_adjust_imm(i32 noundef %a, i32 noundef %b) {
1348-
; CHECK-LABEL: ccmp_continous_nobranch_adjust_imm:
1349-
; CHECK: # %bb.0: # %entry
1350-
; CHECK-NEXT: cmpl $2, %esi # encoding: [0x83,0xfe,0x02]
1351-
; CHECK-NEXT: ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02]
1352-
; CHECK-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
1353-
; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
1354-
; CHECK-NEXT: retq # encoding: [0xc3]
1355-
;
1356-
; NDD-LABEL: ccmp_continous_nobranch_adjust_imm:
1357-
; NDD: # %bb.0: # %entry
1358-
; NDD-NEXT: cmpl $2, %esi # encoding: [0x83,0xfe,0x02]
1359-
; NDD-NEXT: ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02]
1360-
; NDD-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
1361-
; NDD-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
1362-
; NDD-NEXT: retq # encoding: [0xc3]
1363-
entry:
1364-
%cmp = icmp sgt i32 %a, 1
1365-
%cmp1 = icmp slt i32 %b, 2
1366-
%cmp2 = icmp slt i32 %b, 3
1367-
%or1 = or i1 %cmp, %cmp1
1368-
%or2 = or i1 %or1, %cmp2
1369-
%. = zext i1 %or2 to i32
1370-
ret i32 %.
1371-
}
1372-
13731303
declare dso_local void @foo(...)
13741304
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone

0 commit comments

Comments
 (0)