Skip to content

Commit edfdb9d

Browse files
author
nemanjai
committed
[PowerPC] Eliminate compares - add i32 sext/zext handling for SETLT/SETGT
As mentioned in https://reviews.llvm.org/D33718, this simply adds another pattern to the compare elimination sequence and is committed without a differential revision. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314055 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 3da3422 commit edfdb9d

File tree

9 files changed

+711
-7
lines changed

9 files changed

+711
-7
lines changed

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2870,6 +2870,8 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
28702870
ISD::CondCode CC,
28712871
int64_t RHSValue, SDLoc dl) {
28722872
bool IsRHSZero = RHSValue == 0;
2873+
bool IsRHSOne = RHSValue == 1;
2874+
bool IsRHSNegOne = RHSValue == -1LL;
28732875
switch (CC) {
28742876
default: return SDValue();
28752877
case ISD::SETEQ: {
@@ -2903,6 +2905,9 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
29032905
// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
29042906
if(IsRHSZero)
29052907
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2908+
2909+
// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2910+
// by swapping inputs and falling through.
29062911
std::swap(LHS, RHS);
29072912
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
29082913
IsRHSZero = RHSConst && RHSConst->isNullValue();
@@ -2926,6 +2931,55 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
29262931
SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
29272932
MVT::i64, Shift, getI32Imm(1, dl)), 0);
29282933
}
2934+
case ISD::SETGT: {
2935+
// (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2936+
// (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2937+
// (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
2938+
// Handle SETLT -1 (which is equivalent to SETGE 0).
2939+
if (IsRHSNegOne)
2940+
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2941+
2942+
if (IsRHSZero) {
2943+
// The upper 32-bits of the register can't be undefined for this sequence.
2944+
LHS = signExtendInputIfNeeded(LHS);
2945+
RHS = signExtendInputIfNeeded(RHS);
2946+
SDValue Neg =
2947+
SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2948+
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2949+
Neg, getI32Imm(1, dl), getI32Imm(63, dl)), 0);
2950+
}
2951+
// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2952+
// (%b < %a) by swapping inputs and falling through.
2953+
std::swap(LHS, RHS);
2954+
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2955+
IsRHSZero = RHSConst && RHSConst->isNullValue();
2956+
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2957+
LLVM_FALLTHROUGH;
2958+
}
2959+
case ISD::SETLT: {
2960+
// (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2961+
// (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
2962+
// (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
2963+
// Handle SETLT 1 (which is equivalent to SETLE 0).
2964+
if (IsRHSOne)
2965+
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2966+
2967+
if (IsRHSZero) {
2968+
SDValue ShiftOps[] = { LHS, getI32Imm(1, dl), getI32Imm(31, dl),
2969+
getI32Imm(31, dl) };
2970+
return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2971+
ShiftOps), 0);
2972+
}
2973+
2974+
// The upper 32-bits of the register can't be undefined for this sequence.
2975+
LHS = signExtendInputIfNeeded(LHS);
2976+
RHS = signExtendInputIfNeeded(RHS);
2977+
SDValue SUBFNode =
2978+
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
2979+
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2980+
SUBFNode, getI64Imm(1, dl),
2981+
getI64Imm(63, dl)), 0);
2982+
}
29292983
}
29302984
}
29312985

@@ -2935,6 +2989,9 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
29352989
ISD::CondCode CC,
29362990
int64_t RHSValue, SDLoc dl) {
29372991
bool IsRHSZero = RHSValue == 0;
2992+
bool IsRHSOne = RHSValue == 1;
2993+
bool IsRHSNegOne = RHSValue == -1LL;
2994+
29382995
switch (CC) {
29392996
default: return SDValue();
29402997
case ISD::SETEQ: {
@@ -2978,6 +3035,9 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
29783035
// (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
29793036
if (IsRHSZero)
29803037
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3038+
3039+
// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3040+
// by swapping inputs and falling through.
29813041
std::swap(LHS, RHS);
29823042
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
29833043
IsRHSZero = RHSConst && RHSConst->isNullValue();
@@ -3002,6 +3062,47 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
30023062
return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
30033063
getI32Imm(-1, dl)), 0);
30043064
}
3065+
case ISD::SETGT: {
3066+
// (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3067+
// (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3068+
// (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3069+
if (IsRHSNegOne)
3070+
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3071+
if (IsRHSZero) {
3072+
// The upper 32-bits of the register can't be undefined for this sequence.
3073+
LHS = signExtendInputIfNeeded(LHS);
3074+
RHS = signExtendInputIfNeeded(RHS);
3075+
SDValue Neg =
3076+
SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3077+
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3078+
getI64Imm(63, dl)), 0);
3079+
}
3080+
// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3081+
// (%b < %a) by swapping inputs and falling through.
3082+
std::swap(LHS, RHS);
3083+
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3084+
IsRHSZero = RHSConst && RHSConst->isNullValue();
3085+
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3086+
LLVM_FALLTHROUGH;
3087+
}
3088+
case ISD::SETLT: {
3089+
// (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3090+
// (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3091+
// (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3092+
if (IsRHSOne)
3093+
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3094+
if (IsRHSZero)
3095+
return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3096+
getI32Imm(31, dl)), 0);
3097+
3098+
// The upper 32-bits of the register can't be undefined for this sequence.
3099+
LHS = signExtendInputIfNeeded(LHS);
3100+
RHS = signExtendInputIfNeeded(RHS);
3101+
SDValue SUBFNode =
3102+
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3103+
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3104+
SUBFNode, getI64Imm(63, dl)), 0);
3105+
}
30053106
}
30063107
}
30073108

test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,9 @@ define signext i32 @zeroEqualityTest04() {
115115
; CHECK-NEXT: li 12, -1
116116
; CHECK-NEXT: isel 5, 12, 11, 0
117117
; CHECK-NEXT: .LBB3_3: # %endblock
118-
; CHECK-NEXT: cmpwi 5, 1
119-
; CHECK-NEXT: li 3, 0
120-
; CHECK-NEXT: li 4, 1
121-
; CHECK-NEXT: isel 3, 4, 3, 0
118+
; CHECK-NEXT: neg 3, 5
119+
; CHECK-NEXT: rldicl 3, 3, 1, 63
120+
; CHECK-NEXT: xori 3, 3, 1
122121
; CHECK-NEXT: blr
123122
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
124123
%not.cmp = icmp slt i32 %call, 1

test/CodeGen/PowerPC/no-pref-jumps.ll

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@ entry:
1111
br i1 %or.cond, label %if.then, label %if.else
1212

1313
; CHECK-LABEL: @foo
14-
; CHECK: cmpwi
15-
; CHECK: cmpwi
16-
; CHECK: cror
14+
; CHECK: li
15+
; CHECK: li
16+
; CHECK: sub
17+
; CHECK: sub
18+
; CHECK: rldicl
19+
; CHECK: rldicl
20+
; CHECK: or.
1721
; CHECK: blr
1822

1923
if.then: ; preds = %entry
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
2+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
3+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
4+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
5+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
6+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
7+
8+
@glob = common local_unnamed_addr global i8 0, align 1
9+
10+
; Function Attrs: norecurse nounwind readnone
11+
define signext i32 @test_igtsc(i8 signext %a, i8 signext %b) {
12+
; CHECK-LABEL: test_igtsc:
13+
; CHECK: # BB#0: # %entry
14+
; CHECK-NEXT: sub [[REG:r[0-9]+]], r4, r3
15+
; CHECK-NEXT: rldicl r3, [[REG]], 1, 63
16+
; CHECK-NEXT: blr
17+
entry:
18+
%cmp = icmp sgt i8 %a, %b
19+
%conv2 = zext i1 %cmp to i32
20+
ret i32 %conv2
21+
}
22+
23+
; Function Attrs: norecurse nounwind readnone
24+
define signext i32 @test_igtsc_sext(i8 signext %a, i8 signext %b) {
25+
; CHECK-LABEL: test_igtsc_sext:
26+
; CHECK: # BB#0: # %entry
27+
; CHECK-NEXT: sub [[REG:r[0-9]+]], r4, r3
28+
; CHECK-NEXT: sradi r3, [[REG]], 63
29+
; CHECK-NEXT: blr
30+
entry:
31+
%cmp = icmp sgt i8 %a, %b
32+
%sub = sext i1 %cmp to i32
33+
ret i32 %sub
34+
}
35+
36+
; FIXME
37+
; Function Attrs: norecurse nounwind readnone
38+
define signext i32 @test_igtsc_z(i8 signext %a) {
39+
; CHECK-LABEL: test_igtsc_z:
40+
; CHECK: # BB#0: # %entry
41+
; CHECK-NEXT: neg r3, r3
42+
; CHECK-NEXT: rldicl r3, r3, 1, 63
43+
; CHECK-NEXT: blr
44+
entry:
45+
%cmp = icmp sgt i8 %a, 0
46+
%conv1 = zext i1 %cmp to i32
47+
ret i32 %conv1
48+
}
49+
50+
; Function Attrs: norecurse nounwind readnone
51+
define signext i32 @test_igtsc_sext_z(i8 signext %a) {
52+
; CHECK-LABEL: test_igtsc_sext_z:
53+
; CHECK: neg [[REG2:r[0-9]+]], r3
54+
; CHECK-NEXT: sradi r3, [[REG2]], 63
55+
; CHECK-NEXT: blr
56+
entry:
57+
%cmp = icmp sgt i8 %a, 0
58+
%sub = sext i1 %cmp to i32
59+
ret i32 %sub
60+
}
61+
62+
; Function Attrs: norecurse nounwind
63+
define void @test_igtsc_store(i8 signext %a, i8 signext %b) {
64+
; CHECK-LABEL: test_igtsc_store:
65+
; CHECK: # BB#0: # %entry
66+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
67+
; CHECK: rldicl {{r[0-9]+}}, [[REG]], 1, 63
68+
entry:
69+
%cmp = icmp sgt i8 %a, %b
70+
%conv3 = zext i1 %cmp to i8
71+
store i8 %conv3, i8* @glob, align 1
72+
ret void
73+
}
74+
75+
; Function Attrs: norecurse nounwind
76+
define void @test_igtsc_sext_store(i8 signext %a, i8 signext %b) {
77+
; CHECK-LABEL: test_igtsc_sext_store:
78+
; CHECK: # BB#0: # %entry
79+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
80+
; CHECK: sradi {{r[0-9]+}}, [[REG]], 63
81+
entry:
82+
%cmp = icmp sgt i8 %a, %b
83+
%conv3 = sext i1 %cmp to i8
84+
store i8 %conv3, i8* @glob, align 1
85+
ret void
86+
}
87+
88+
; FIXME
89+
; Function Attrs: norecurse nounwind
90+
define void @test_igtsc_z_store(i8 signext %a) {
91+
; CHECK-LABEL: test_igtsc_z_store:
92+
; CHECK: # BB#0: # %entry
93+
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
94+
; CHECK-NEXT: neg r3, r3
95+
; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
96+
; CHECK-NEXT: rldicl r3, r3, 1, 63
97+
; CHECK-NEXT: stb r3, 0(r4)
98+
; CHECK-NEXT: blr
99+
entry:
100+
%cmp = icmp sgt i8 %a, 0
101+
%conv2 = zext i1 %cmp to i8
102+
store i8 %conv2, i8* @glob, align 1
103+
ret void
104+
}
105+
106+
; Function Attrs: norecurse nounwind
107+
define void @test_igtsc_sext_z_store(i8 signext %a) {
108+
; CHECK-LABEL: test_igtsc_sext_z_store:
109+
; CHECK: neg [[REG2:r[0-9]+]], r3
110+
; CHECK: sradi {{r[0-9]+}}, [[REG2]], 63
111+
entry:
112+
%cmp = icmp sgt i8 %a, 0
113+
%conv2 = sext i1 %cmp to i8
114+
store i8 %conv2, i8* @glob, align 1
115+
ret void
116+
}

0 commit comments

Comments
 (0)