Skip to content

Commit 7ae7585

Browse files
authored
[X86][APX] Support peephole optimization with CCMP instruction (#129994)
This extends `opitimizeCompareInstr` to re-use previous CCMP results if the previous comparison was with an immediates that was 1 bigger or smaller. Example: ``` CCMP x, 13, 2, 5 ... CCMP x, 12, 2, 5 ; can be removed if we change the SETg SETg ... ; x > 12 changed to SETge (x >= 13) & remove the 2nd CCMP ```
1 parent 9415b7d commit 7ae7585

File tree

5 files changed

+485
-3
lines changed

5 files changed

+485
-3
lines changed

llvm/lib/Target/X86/X86InstrConditionalCompare.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class Ctest<bits<8> o, Format f, X86TypeInfo t, DAGOperand op1, DAGOperand op2>:
3636
//===----------------------------------------------------------------------===//
3737
// CCMP Instructions
3838
//
39-
let SchedRW = [WriteALU] in {
39+
let SchedRW = [WriteALU], isCompare = 1 in {
4040
def CCMP8rr : Ccmp<0x38, MRMDestReg, Xi8, GR8, GR8>;
4141
def CCMP16rr: Ccmp<0x39, MRMDestReg, Xi16, GR16, GR16>, PD;
4242
def CCMP32rr: Ccmp<0x39, MRMDestReg, Xi32, GR32, GR32>;
@@ -55,7 +55,7 @@ let SchedRW = [WriteALU] in {
5555
def CCMP64ri32: Ccmp<0x81, MRM7r, Xi64, GR64, i64i32imm>;
5656
}
5757

58-
let mayLoad = 1 in {
58+
let mayLoad = 1, isCompare = 1 in {
5959
let SchedRW = [WriteALU.Folded] in {
6060
def CCMP16mi8: Ccmp<0x83, MRM7m, Xi16, i16mem, i16i8imm>, PD;
6161
def CCMP32mi8: Ccmp<0x83, MRM7m, Xi32, i32mem, i32i8imm>;

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4854,6 +4854,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
48544854
case X86::CMP32ri:
48554855
case X86::CMP16ri:
48564856
case X86::CMP8ri:
4857+
case X86::CCMP64ri32:
4858+
case X86::CCMP32ri:
4859+
case X86::CCMP16ri:
4860+
case X86::CCMP8ri:
48574861
SrcReg = MI.getOperand(0).getReg();
48584862
SrcReg2 = 0;
48594863
if (MI.getOperand(1).isImm()) {
@@ -4951,6 +4955,18 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
49514955
}
49524956
return false;
49534957
}
4958+
case X86::CCMP64ri32:
4959+
case X86::CCMP32ri:
4960+
case X86::CCMP16ri:
4961+
case X86::CCMP8ri: {
4962+
// The CCMP instruction should not be optimized if the scc/dfv in it is not
4963+
// same as the one in previous CCMP instruction.
4964+
if ((FlagI.getOpcode() != OI.getOpcode()) ||
4965+
(OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm()) ||
4966+
(OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
4967+
return false;
4968+
[[fallthrough]];
4969+
}
49544970
case X86::CMP64ri32:
49554971
case X86::CMP32ri:
49564972
case X86::CMP16ri:

llvm/test/CodeGen/X86/apx/ccmp.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,5 +1300,75 @@ if.end: ; preds = %entry, %if.then
13001300
ret void
13011301
}
13021302

1303+
define void @ccmp_continous_adjust_imm(i32 noundef %a, i32 noundef %b) {
1304+
; CHECK-LABEL: ccmp_continous_adjust_imm:
1305+
; CHECK: # %bb.0: # %entry
1306+
; CHECK-NEXT: cmpl $2, %edi # encoding: [0x83,0xff,0x02]
1307+
; CHECK-NEXT: ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02]
1308+
; CHECK-NEXT: jg .LBB31_1 # encoding: [0x7f,A]
1309+
; CHECK-NEXT: # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1
1310+
; CHECK-NEXT: # %bb.2: # %if.then
1311+
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1312+
; CHECK-NEXT: jmp foo # TAILCALL
1313+
; CHECK-NEXT: # encoding: [0xeb,A]
1314+
; CHECK-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
1315+
; CHECK-NEXT: .LBB31_1: # %if.end
1316+
; CHECK-NEXT: retq # encoding: [0xc3]
1317+
;
1318+
; NDD-LABEL: ccmp_continous_adjust_imm:
1319+
; NDD: # %bb.0: # %entry
1320+
; NDD-NEXT: cmpl $2, %edi # encoding: [0x83,0xff,0x02]
1321+
; NDD-NEXT: ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02]
1322+
; NDD-NEXT: jg .LBB31_1 # encoding: [0x7f,A]
1323+
; NDD-NEXT: # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1
1324+
; NDD-NEXT: # %bb.2: # %if.then
1325+
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1326+
; NDD-NEXT: jmp foo # TAILCALL
1327+
; NDD-NEXT: # encoding: [0xeb,A]
1328+
; NDD-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
1329+
; NDD-NEXT: .LBB31_1: # %if.end
1330+
; NDD-NEXT: retq # encoding: [0xc3]
1331+
entry:
1332+
%cmp = icmp slt i32 %a, 2
1333+
%cmp1 = icmp slt i32 %b, 2
1334+
%or.cond = and i1 %cmp, %cmp1
1335+
%cmp3 = icmp slt i32 %b, 3
1336+
%or.cond4 = and i1 %or.cond, %cmp3
1337+
br i1 %or.cond4, label %if.then, label %if.end
1338+
1339+
if.then: ; preds = %entry
1340+
tail call void (...) @foo()
1341+
br label %if.end
1342+
1343+
if.end: ; preds = %if.then, %entry
1344+
ret void
1345+
}
1346+
1347+
define i32 @ccmp_continous_nobranch_adjust_imm(i32 noundef %a, i32 noundef %b) {
1348+
; CHECK-LABEL: ccmp_continous_nobranch_adjust_imm:
1349+
; CHECK: # %bb.0: # %entry
1350+
; CHECK-NEXT: cmpl $2, %esi # encoding: [0x83,0xfe,0x02]
1351+
; CHECK-NEXT: ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02]
1352+
; CHECK-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
1353+
; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
1354+
; CHECK-NEXT: retq # encoding: [0xc3]
1355+
;
1356+
; NDD-LABEL: ccmp_continous_nobranch_adjust_imm:
1357+
; NDD: # %bb.0: # %entry
1358+
; NDD-NEXT: cmpl $2, %esi # encoding: [0x83,0xfe,0x02]
1359+
; NDD-NEXT: ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02]
1360+
; NDD-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
1361+
; NDD-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
1362+
; NDD-NEXT: retq # encoding: [0xc3]
1363+
entry:
1364+
%cmp = icmp sgt i32 %a, 1
1365+
%cmp1 = icmp slt i32 %b, 2
1366+
%cmp2 = icmp slt i32 %b, 3
1367+
%or1 = or i1 %cmp, %cmp1
1368+
%or2 = or i1 %or1, %cmp2
1369+
%. = zext i1 %or2 to i32
1370+
ret i32 %.
1371+
}
1372+
13031373
declare dso_local void @foo(...)
13041374
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone

0 commit comments

Comments
 (0)