Skip to content

Commit 889c156

Browse files
author
nemanjai
committed
[PowerPC] Eliminate compares - add i32 sext/zext handling for SETULE/SETUGE
As mentioned in https://reviews.llvm.org/D33718, this simply adds another pattern to the compare elimination sequence and is committed without a differential revision. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314060 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 428c400 commit 889c156

19 files changed

+1502
-27
lines changed

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ STATISTIC(NumZextSetcc,
7575
"Number of (zext(setcc)) nodes expanded into GPR sequence.");
7676
STATISTIC(SignExtensionsAdded,
7777
"Number of sign extensions for compare inputs added.");
78+
STATISTIC(ZeroExtensionsAdded,
79+
"Number of zero extensions for compare inputs added.");
7880
STATISTIC(NumLogicOpsOnComparison,
7981
"Number of logical ops on i1 values calculated in GPR.");
8082
STATISTIC(OmittedForNonExtendUses,
@@ -301,6 +303,7 @@ namespace {
301303
bool tryLogicOpOfCompares(SDNode *N);
302304
SDValue computeLogicOpInGPR(SDValue LogicOp);
303305
SDValue signExtendInputIfNeeded(SDValue Input);
306+
SDValue zeroExtendInputIfNeeded(SDValue Input);
304307
SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
305308
SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
306309
ZeroCompare CmpTy);
@@ -2763,6 +2766,41 @@ SDValue PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input) {
27632766
MVT::i64, Input), 0);
27642767
}
27652768

2769+
/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2770+
/// Otherwise just reinterpret it as a 64-bit value.
2771+
/// Useful when emitting comparison code for 32-bit values without using
2772+
/// the compare instruction (which only considers the lower 32-bits).
2773+
SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) {
2774+
assert(Input.getValueType() == MVT::i32 &&
2775+
"Can only zero-extend 32-bit values here.");
2776+
unsigned Opc = Input.getOpcode();
2777+
2778+
// The only condition under which we can omit the actual extend instruction:
2779+
// - The value has already been zero-extended
2780+
// - The value is a positive constant
2781+
// - The value comes from a load that isn't a sign-extending load
2782+
// An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have
2783+
// to conservatively actually clear the high bits.
2784+
if (Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND)
2785+
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2786+
2787+
ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2788+
if (InputConst && InputConst->getSExtValue() >= 0)
2789+
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2790+
2791+
LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2792+
// The input is a load that doesn't sign-extend (it will be zero-extended).
2793+
if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
2794+
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2795+
2796+
// None of the above, need to zero-extend.
2797+
SDLoc dl(Input);
2798+
ZeroExtensionsAdded++;
2799+
return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
2800+
getI64Imm(0, dl), getI64Imm(32, dl)),
2801+
0);
2802+
}
2803+
27662804
// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
27672805
// course not actual zero/sign extensions that will generate machine code,
27682806
// they're just a way to reinterpret a 32 bit value in a register as a
@@ -2980,6 +3018,24 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
29803018
SUBFNode, getI64Imm(1, dl),
29813019
getI64Imm(63, dl)), 0);
29823020
}
3021+
case ISD::SETUGE:
3022+
// (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3023+
// (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3024+
std::swap(LHS, RHS);
3025+
LLVM_FALLTHROUGH;
3026+
case ISD::SETULE: {
3027+
// The upper 32-bits of the register can't be undefined for this sequence.
3028+
LHS = zeroExtendInputIfNeeded(LHS);
3029+
RHS = zeroExtendInputIfNeeded(RHS);
3030+
SDValue Subtract =
3031+
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3032+
SDValue SrdiNode =
3033+
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3034+
Subtract, getI64Imm(1, dl),
3035+
getI64Imm(63, dl)), 0);
3036+
return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3037+
getI32Imm(1, dl)), 0);
3038+
}
29833039
}
29843040
}
29853041

@@ -3103,6 +3159,23 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
31033159
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
31043160
SUBFNode, getI64Imm(63, dl)), 0);
31053161
}
3162+
case ISD::SETUGE:
3163+
// (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3164+
// (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3165+
std::swap(LHS, RHS);
3166+
LLVM_FALLTHROUGH;
3167+
case ISD::SETULE: {
3168+
// The upper 32-bits of the register can't be undefined for this sequence.
3169+
LHS = zeroExtendInputIfNeeded(LHS);
3170+
RHS = zeroExtendInputIfNeeded(RHS);
3171+
SDValue Subtract =
3172+
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3173+
SDValue Shift =
3174+
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3175+
getI32Imm(1, dl), getI32Imm(63,dl)), 0);
3176+
return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3177+
getI32Imm(-1, dl)), 0);
3178+
}
31063179
}
31073180
}
31083181

lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4260,7 +4260,7 @@ def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
42604260
def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
42614261
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
42624262
def : InstAlias<"clrldi $rA, $rS, $n",
4263-
(RLDICL_32 gprc:$rA, gprc:$rS, 0, u6imm:$n)>;
4263+
(RLDICL_32_64 g8rc:$rA, gprc:$rS, 0, u6imm:$n)>;
42644264
def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
42654265
def : InstAlias<"lnia $RT", (ADDPCIS g8rc:$RT, 0)>;
42664266

test/CodeGen/PowerPC/fast-isel-call.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,17 @@ define void @foo(i8 %a, i16 %b) nounwind {
2929
%1 = call i32 @t1(i8 signext %a)
3030
; ELF64: extsb
3131
%2 = call i32 @t2(i8 zeroext %a)
32-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
32+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
3333
%3 = call i32 @t3(i16 signext %b)
3434
; ELF64: extsh
3535
%4 = call i32 @t4(i16 zeroext %b)
36-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
36+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
3737

3838
;; A few test to check materialization
3939
%5 = call i32 @t2(i8 zeroext 255)
40-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
40+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
4141
%6 = call i32 @t4(i16 zeroext 65535)
42-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
42+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
4343
ret void
4444
}
4545

@@ -66,12 +66,12 @@ entry:
6666
; ELF64: li 6, 28
6767
; ELF64: li 7, 40
6868
; ELF64: li 8, 186
69-
; ELF64: rldicl 3, 3, 0, 56
70-
; ELF64: rldicl 4, 4, 0, 56
71-
; ELF64: rldicl 5, 5, 0, 56
72-
; ELF64: rldicl 6, 6, 0, 56
73-
; ELF64: rldicl 7, 7, 0, 56
74-
; ELF64: rldicl 8, 8, 0, 56
69+
; ELF64: clrldi 3, 3, 56
70+
; ELF64: clrldi 4, 4, 56
71+
; ELF64: clrldi 5, 5, 56
72+
; ELF64: clrldi 6, 6, 56
73+
; ELF64: clrldi 7, 7, 56
74+
; ELF64: clrldi 8, 8, 56
7575
ret i32 0
7676
}
7777

test/CodeGen/PowerPC/fast-isel-conversion.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -245,11 +245,11 @@ entry:
245245
; PPC970: uitofp_single_i16
246246
%b.addr = alloca float, align 4
247247
%conv = uitofp i16 %a to float
248-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
248+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
249249
; ELF64: std
250250
; ELF64: lfd
251251
; ELF64: fcfidus
252-
; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
252+
; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
253253
; ELF64LE: std
254254
; ELF64LE: lfd
255255
; ELF64LE: fcfidus
@@ -269,11 +269,11 @@ entry:
269269
; PPC970: uitofp_single_i8
270270
%b.addr = alloca float, align 4
271271
%conv = uitofp i8 %a to float
272-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
272+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
273273
; ELF64: std
274274
; ELF64: lfd
275275
; ELF64: fcfidus
276-
; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
276+
; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
277277
; ELF64LE: std
278278
; ELF64LE: lfd
279279
; ELF64LE: fcfidus
@@ -334,11 +334,11 @@ entry:
334334
; PPC970: uitofp_double_i16
335335
%b.addr = alloca double, align 8
336336
%conv = uitofp i16 %a to double
337-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
337+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
338338
; ELF64: std
339339
; ELF64: lfd
340340
; ELF64: fcfidu
341-
; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
341+
; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
342342
; ELF64LE: std
343343
; ELF64LE: lfd
344344
; ELF64LE: fcfidu
@@ -357,11 +357,11 @@ entry:
357357
; PPC970: uitofp_double_i8
358358
%b.addr = alloca double, align 8
359359
%conv = uitofp i8 %a to double
360-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
360+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
361361
; ELF64: std
362362
; ELF64: lfd
363363
; ELF64: fcfidu
364-
; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
364+
; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
365365
; ELF64LE: std
366366
; ELF64LE: lfd
367367
; ELF64LE: fcfidu

test/CodeGen/PowerPC/fast-isel-ext.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,21 @@ define i32 @zext_16_32(i16 %a) nounwind {
1919
define i64 @zext_8_64(i8 %a) nounwind {
2020
; ELF64: zext_8_64
2121
%r = zext i8 %a to i64
22-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
22+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
2323
ret i64 %r
2424
}
2525

2626
define i64 @zext_16_64(i16 %a) nounwind {
2727
; ELF64: zext_16_64
2828
%r = zext i16 %a to i64
29-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
29+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
3030
ret i64 %r
3131
}
3232

3333
define i64 @zext_32_64(i32 %a) nounwind {
3434
; ELF64: zext_32_64
3535
%r = zext i32 %a to i64
36-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
36+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 32
3737
ret i64 %r
3838
}
3939

test/CodeGen/PowerPC/fast-isel-ret.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ entry:
4747
define zeroext i8 @ret3(i8 signext %a) nounwind {
4848
entry:
4949
; ELF64-LABEL: ret3
50-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
50+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
5151
; ELF64: blr
5252
ret i8 %a
5353
}
@@ -63,15 +63,15 @@ entry:
6363
define zeroext i16 @ret5(i16 signext %a) nounwind {
6464
entry:
6565
; ELF64-LABEL: ret5
66-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
66+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
6767
; ELF64: blr
6868
ret i16 %a
6969
}
7070

7171
define i16 @ret6(i16 %a) nounwind {
7272
entry:
7373
; ELF64-LABEL: ret6
74-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
74+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
7575
; ELF64: blr
7676
ret i16 %a
7777
}
@@ -87,15 +87,15 @@ entry:
8787
define zeroext i32 @ret8(i32 signext %a) nounwind {
8888
entry:
8989
; ELF64-LABEL: ret8
90-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
90+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 32
9191
; ELF64: blr
9292
ret i32 %a
9393
}
9494

9595
define i32 @ret9(i32 %a) nounwind {
9696
entry:
9797
; ELF64-LABEL: ret9
98-
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
98+
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 32
9999
; ELF64: blr
100100
ret i32 %a
101101
}
@@ -104,6 +104,7 @@ define i64 @ret10(i64 %a) nounwind {
104104
entry:
105105
; ELF64-LABEL: ret10
106106
; ELF64-NOT: exts
107+
; ELF64-NOT: clrldi
107108
; ELF64-NOT: rldicl
108109
; ELF64: blr
109110
ret i64 %a
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
3+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
4+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
5+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
6+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
7+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
8+
9+
define signext i32 @test(i8 zeroext %a, i8 zeroext %b) {
10+
; CHECK-LABEL: test:
11+
; CHECK: # BB#0: # %entry
12+
; CHECK-NEXT: rlwinm r3, r3, 0, 31, 31
13+
; CHECK-NEXT: rlwinm r4, r4, 0, 31, 31
14+
; CHECK-NEXT: clrldi r3, r3, 32
15+
; CHECK-NEXT: clrldi r4, r4, 32
16+
; CHECK-NEXT: sub r3, r4, r3
17+
; CHECK-NEXT: rldicl r3, r3, 1, 63
18+
; CHECK-NEXT: xori r3, r3, 1
19+
; CHECK-NEXT: blr
20+
entry:
21+
%0 = and i8 %a, 1
22+
%1 = and i8 %b, 1
23+
%cmp = icmp ule i8 %0, %1
24+
%conv3 = zext i1 %cmp to i32
25+
ret i32 %conv3
26+
}

0 commit comments

Comments
 (0)