Skip to content

[AArch64] Correctness fix: Turn cmn 0 into cmp 0 #143965

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,13 @@ AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
const int OldImm = (int)CmpMI->getOperand(2).getImm();
const int NewImm = std::abs(OldImm + Correction);

// Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by
// Handle cmn 1 -> cmp 0, because we prefer CMP 0 over cmn 0.
if (OldImm == 1 && Negative && Correction == -1) {
// If we are adjusting from -1 to 0, we need to change the opcode.
Opc = getComplementOpc(Opc);
}

// Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate.) transitions by
// adjusting compare instruction opcode.
if (OldImm == 0 && ((Negative && Correction == 1) ||
(!Negative && Correction == -1))) {
Expand Down Expand Up @@ -380,8 +386,8 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}

const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();

LLVM_DEBUG(dbgs() << "Head branch:\n");
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
Expand All @@ -393,6 +399,14 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
<< '\n');
LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');

unsigned Opc = HeadCmpMI->getOpcode();
if (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri)
HeadImm = -HeadImm;

Opc = TrueCmpMI->getOpcode();
if (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri)
TrueImm = -TrueImm;

if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
std::abs(TrueImm - HeadImm) == 2) {
Expand Down Expand Up @@ -434,6 +448,8 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
adjustHeadCond = !adjustHeadCond;
}

TrueImm = std::abs(TrueImm);
HeadImm = std::abs(HeadImm);
if (adjustHeadCond) {
Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
} else {
Expand Down
124 changes: 124 additions & 0 deletions llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,130 @@ return: ; preds = %if.end, %land.lhs.t
ret i32 %retval.0
}

; (a < 0 && b == c) || (a < -1 && b == d)
define i32 @combine_gt_lt_n0() #0 {
; CHECK-LABEL: combine_gt_lt_n0:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: tbz w8, #31, .LBB12_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB12_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB12_3: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
%0 = load i32, ptr @a, align 4
%cmp = icmp slt i32 %0, 0
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false

land.lhs.true: ; preds = %entry
%1 = load i32, ptr @b, align 4
%2 = load i32, ptr @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end

lor.lhs.false: ; preds = %entry
%cmp2 = icmp slt i32 %0, -1
br i1 %cmp2, label %land.lhs.true3, label %if.end

land.lhs.true3: ; preds = %lor.lhs.false
%3 = load i32, ptr @b, align 4
%4 = load i32, ptr @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end

if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
br label %return

return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
ret i32 %retval.0
}

define i32 @combine_gt_ge_sel_2(i64 %v, ptr %p) #0 {
; CHECK-LABEL: combine_gt_ge_sel_2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #1
; CHECK-NEXT: csel x9, x0, xzr, gt
; CHECK-NEXT: str x9, [x1]
; CHECK-NEXT: tbnz w8, #31, .LBB13_2
; CHECK-NEXT: // %bb.1: // %lor.lhs.false
; CHECK-NEXT: cmp w8, #1
; CHECK-NEXT: b.ge .LBB13_4
; CHECK-NEXT: b .LBB13_6
; CHECK-NEXT: .LBB13_2: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB13_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB13_4: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB13_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB13_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
%0 = load i32, ptr @a, align 4
%cmp = icmp sgt i32 %0, -1
%m = select i1 %cmp, i64 %v, i64 0
store i64 %m, ptr %p
br i1 %cmp, label %lor.lhs.false, label %land.lhs.true

land.lhs.true: ; preds = %entry
%1 = load i32, ptr @b, align 4
%2 = load i32, ptr @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %land.lhs.true3

lor.lhs.false: ; preds = %entry
%cmp2 = icmp sgt i32 %0, 0
br i1 %cmp2, label %land.lhs.true3, label %if.end

land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true
%3 = load i32, ptr @b, align 4
%4 = load i32, ptr @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end

if.end: ; preds = %land.lhs.true3, %lor.lhs.false
br label %return

return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
ret i32 %retval.0
}

declare i32 @zoo(i32)

declare double @yoo(i32)
Expand Down