Skip to content

Commit bab9724

Browse files
author
nemanjai
committed
[PowerPC] Eliminate compares - add i32 sext/zext handling for SETULT/SETUGT
As mentioned in https://reviews.llvm.org/D33718, this simply adds another pattern to the compare elimination sequence and is committed without a differential revision. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314062 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 889c156 commit bab9724

15 files changed

+1234
-3
lines changed

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2779,9 +2779,11 @@ SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) {
27792779
// - The value has already been zero-extended
27802780
// - The value is a positive constant
27812781
// - The value comes from a load that isn't a sign-extending load
2782-
// An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have
2783-
// to conservatively actually clear the high bits.
2784-
if (Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND)
2782+
// An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2783+
bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
2784+
(Input.getOperand(0).getOpcode() == ISD::AssertZext ||
2785+
Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
2786+
if (Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND || IsTruncateOfZExt)
27852787
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
27862788

27872789
ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
@@ -3036,6 +3038,21 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
30363038
return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
30373039
getI32Imm(1, dl)), 0);
30383040
}
3041+
case ISD::SETUGT:
3042+
// (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3043+
// (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3044+
std::swap(LHS, RHS);
3045+
LLVM_FALLTHROUGH;
3046+
case ISD::SETULT: {
3047+
// The upper 32-bits of the register can't be undefined for this sequence.
3048+
LHS = zeroExtendInputIfNeeded(LHS);
3049+
RHS = zeroExtendInputIfNeeded(RHS);
3050+
SDValue Subtract =
3051+
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3052+
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3053+
Subtract, getI64Imm(1, dl),
3054+
getI64Imm(63, dl)), 0);
3055+
}
30393056
}
30403057
}
30413058

@@ -3176,6 +3193,20 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
31763193
return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
31773194
getI32Imm(-1, dl)), 0);
31783195
}
3196+
case ISD::SETUGT:
3197+
// (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3198+
// (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3199+
std::swap(LHS, RHS);
3200+
LLVM_FALLTHROUGH;
3201+
case ISD::SETULT: {
3202+
// The upper 32-bits of the register can't be undefined for this sequence.
3203+
LHS = zeroExtendInputIfNeeded(LHS);
3204+
RHS = zeroExtendInputIfNeeded(RHS);
3205+
SDValue Subtract =
3206+
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3207+
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3208+
Subtract, getI64Imm(63, dl)), 0);
3209+
}
31793210
}
31803211
}
31813212

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
2+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
3+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
4+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
5+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
6+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
7+
8+
%struct.tree_common = type { i8, [3 x i8] }
9+
declare signext i32 @fn2(...) local_unnamed_addr #1
10+
11+
; Function Attrs: nounwind
12+
define i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1) {
13+
; CHECK-LABEL: testCompare1:
14+
; CHECK: # BB#0: # %entry
15+
; CHECK: lbz r3, 0(r3)
16+
; CHECK-DAG: clrlwi r3, r3, 31
17+
; CHECK-DAG: clrldi r3, r3, 32
18+
; CHECK: lbz r4, 0(r4)
19+
; CHECK-DAG: clrlwi r4, r4, 31
20+
; CHECK-DAG: clrldi r4, r4, 32
21+
; CHECK: sub r3, r3, r4
22+
; CHECK-NEXT: rldicl r3, r3, 1, 63
23+
entry:
24+
%bf.load = load i8, i8* bitcast (i32 (%struct.tree_common*)* @testCompare1 to i8*), align 4
25+
%bf.clear = and i8 %bf.load, 1
26+
%0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %arg1, i64 0, i32 0
27+
%bf.load1 = load i8, i8* %0, align 4
28+
%bf.clear2 = and i8 %bf.load1, 1
29+
%cmp = icmp ugt i8 %bf.clear, %bf.clear2
30+
%conv = zext i1 %cmp to i32
31+
%call = tail call signext i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 signext %conv) #2
32+
ret i32 undef
33+
}
34+
35+
; Function Attrs: norecurse nounwind readnone
36+
define signext i32 @testCompare2(i32 zeroext %a, i32 zeroext %b) {
37+
; CHECK-LABEL: testCompare2:
38+
; CHECK: # BB#0: # %entry
39+
; CHECK-DAG: rlwinm r3, r3, 0, 31, 31
40+
; CHECK-DAG: rlwinm r4, r4, 0, 31, 31
41+
; CHECK-DAG: clrldi r3, r3, 32
42+
; CHECK-DAG: clrldi r4, r4, 32
43+
; CHECK: sub r3, r4, r3
44+
; CHECK-NEXT: rldicl r3, r3, 1, 63
45+
; CHECK-NEXT: blr
46+
entry:
47+
%and = and i32 %a, 1
48+
%and1 = and i32 %b, 1
49+
%cmp = icmp ugt i32 %and, %and1
50+
%conv = zext i1 %cmp to i32
51+
ret i32 %conv
52+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
2+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
3+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
4+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
5+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
6+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
7+
8+
%struct.tree_common = type { i8, [3 x i8] }
9+
declare signext i32 @fn2(...) local_unnamed_addr #1
10+
11+
; Function Attrs: nounwind
12+
define i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1) {
13+
; CHECK-LABEL: testCompare1:
14+
; CHECK: # BB#0: # %entry
15+
; CHECK: lbz r3, 0(r3)
16+
; CHECK-DAG: clrlwi r3, r3, 31
17+
; CHECK-DAG: clrldi r3, r3, 32
18+
; CHECK: lbz r4, 0(r4)
19+
; CHECK-DAG: clrlwi r4, r4, 31
20+
; CHECK-DAG: clrldi r4, r4, 32
21+
; CHECK: sub r3, r4, r3
22+
; CHECK-NEXT: rldicl r3, r3, 1, 63
23+
entry:
24+
%bf.load = load i8, i8* bitcast (i32 (%struct.tree_common*)* @testCompare1 to i8*), align 4
25+
%bf.clear = and i8 %bf.load, 1
26+
%0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %arg1, i64 0, i32 0
27+
%bf.load1 = load i8, i8* %0, align 4
28+
%bf.clear2 = and i8 %bf.load1, 1
29+
%cmp = icmp ult i8 %bf.clear, %bf.clear2
30+
%conv = zext i1 %cmp to i32
31+
%call = tail call signext i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 signext %conv) #2
32+
ret i32 undef
33+
}
34+
35+
; Function Attrs: norecurse nounwind readnone
36+
define signext i32 @testCompare2(i32 zeroext %a, i32 zeroext %b) {
37+
; CHECK-LABEL: testCompare2:
38+
; CHECK: # BB#0: # %entry
39+
; CHECK-DAG: rlwinm r3, r3, 0, 31, 31
40+
; CHECK-DAG: rlwinm r4, r4, 0, 31, 31
41+
; CHECK-DAG: clrldi r3, r3, 32
42+
; CHECK-DAG: clrldi r4, r4, 32
43+
; CHECK: sub r3, r3, r4
44+
; CHECK-NEXT: rldicl r3, r3, 1, 63
45+
; CHECK-NEXT: blr
46+
entry:
47+
%and = and i32 %a, 1
48+
%and1 = and i32 %b, 1
49+
%cmp = icmp ult i32 %and, %and1
50+
%conv = zext i1 %cmp to i32
51+
ret i32 %conv
52+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
2+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
3+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
4+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
5+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
6+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
7+
8+
@glob = common local_unnamed_addr global i8 0, align 1
9+
10+
; Function Attrs: norecurse nounwind readnone
11+
define signext i32 @test_igtuc(i8 zeroext %a, i8 zeroext %b) {
12+
; CHECK-LABEL: test_igtuc:
13+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
14+
; CHECK-NEXT: rldicl r3, [[REG]], 1, 63
15+
; CHECK-NEXT: blr
16+
entry:
17+
%cmp = icmp ugt i8 %a, %b
18+
%conv2 = zext i1 %cmp to i32
19+
ret i32 %conv2
20+
}
21+
22+
; Function Attrs: norecurse nounwind readnone
23+
define signext i32 @test_igtuc_sext(i8 zeroext %a, i8 zeroext %b) {
24+
; CHECK-LABEL: test_igtuc_sext:
25+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
26+
; CHECK-NEXT: sradi r3, [[REG]], 63
27+
; CHECK-NEXT: blr
28+
entry:
29+
%cmp = icmp ugt i8 %a, %b
30+
%sub = sext i1 %cmp to i32
31+
ret i32 %sub
32+
}
33+
34+
; Function Attrs: norecurse nounwind readnone
35+
define signext i32 @test_igtuc_z(i8 zeroext %a) {
36+
; CHECK-LABEL: test_igtuc_z:
37+
; CHECK: cntlzw r3, r3
38+
; CHECK-NEXT: srwi r3, r3, 5
39+
; CHECK-NEXT: xori r3, r3, 1
40+
; CHECK-NEXT: blr
41+
entry:
42+
%cmp = icmp ne i8 %a, 0
43+
%conv1 = zext i1 %cmp to i32
44+
ret i32 %conv1
45+
}
46+
47+
; Function Attrs: norecurse nounwind readnone
48+
define signext i32 @test_igtuc_sext_z(i8 zeroext %a) {
49+
; CHECK-LABEL: test_igtuc_sext_z:
50+
; CHECK: cntlzw r3, r3
51+
; CHECK-NEXT: srwi r3, r3, 5
52+
; CHECK-NEXT: xori r3, r3, 1
53+
; CHECK-NEXT: neg r3, r3
54+
; CHECK-NEXT: blr
55+
entry:
56+
%cmp = icmp ne i8 %a, 0
57+
%sub = sext i1 %cmp to i32
58+
ret i32 %sub
59+
}
60+
61+
; Function Attrs: norecurse nounwind
62+
define void @test_igtuc_store(i8 zeroext %a, i8 zeroext %b) {
63+
; CHECK-LABEL: test_igtuc_store:
64+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
65+
; CHECK: rldicl {{r[0-9]+}}, [[REG]], 1, 63
66+
entry:
67+
%cmp = icmp ugt i8 %a, %b
68+
%conv3 = zext i1 %cmp to i8
69+
store i8 %conv3, i8* @glob, align 1
70+
ret void
71+
}
72+
73+
; Function Attrs: norecurse nounwind
74+
define void @test_igtuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
75+
; CHECK-LABEL: test_igtuc_sext_store:
76+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
77+
; CHECK: sradi {{r[0-9]+}}, [[REG]], 63
78+
entry:
79+
%cmp = icmp ugt i8 %a, %b
80+
%conv3 = sext i1 %cmp to i8
81+
store i8 %conv3, i8* @glob, align 1
82+
ret void
83+
}
84+
85+
; Function Attrs: norecurse nounwind
86+
define void @test_igtuc_z_store(i8 zeroext %a) {
87+
; CHECK-LABEL: test_igtuc_z_store:
88+
; CHECK: cntlzw r3, r3
89+
; CHECK: srwi r3, r3, 5
90+
; CHECK: xori r3, r3, 1
91+
; CHECK: stb r3, 0(r4)
92+
; CHECK-NEXT: blr
93+
entry:
94+
%cmp = icmp ne i8 %a, 0
95+
%conv2 = zext i1 %cmp to i8
96+
store i8 %conv2, i8* @glob, align 1
97+
ret void
98+
}
99+
100+
; Function Attrs: norecurse nounwind
101+
define void @test_igtuc_sext_z_store(i8 zeroext %a) {
102+
; CHECK-LABEL: test_igtuc_sext_z_store:
103+
; CHECK: cntlzw r3, r3
104+
; CHECK: srwi r3, r3, 5
105+
; CHECK: xori r3, r3, 1
106+
; CHECK: neg r3, r3
107+
; CHECK: stb r3, 0(r4)
108+
; CHECK-NEXT: blr
109+
entry:
110+
%cmp = icmp ne i8 %a, 0
111+
%conv2 = sext i1 %cmp to i8
112+
store i8 %conv2, i8* @glob, align 1
113+
ret void
114+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
2+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
3+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
4+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
5+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
6+
; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
7+
8+
@glob = common local_unnamed_addr global i32 0, align 4
9+
10+
; Function Attrs: norecurse nounwind readnone
11+
define signext i32 @test_igtui(i32 zeroext %a, i32 zeroext %b) {
12+
; CHECK-LABEL: test_igtui:
13+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
14+
; CHECK-NEXT: rldicl r3, [[REG]], 1, 63
15+
; CHECK-NEXT: blr
16+
entry:
17+
%cmp = icmp ugt i32 %a, %b
18+
%conv = zext i1 %cmp to i32
19+
ret i32 %conv
20+
}
21+
22+
; Function Attrs: norecurse nounwind readnone
23+
define signext i32 @test_igtui_sext(i32 zeroext %a, i32 zeroext %b) {
24+
; CHECK-LABEL: test_igtui_sext:
25+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
26+
; CHECK-NEXT: sradi r3, [[REG]], 63
27+
; CHECK-NEXT: blr
28+
entry:
29+
%cmp = icmp ugt i32 %a, %b
30+
%sub = sext i1 %cmp to i32
31+
ret i32 %sub
32+
}
33+
34+
; Function Attrs: norecurse nounwind readnone
35+
define signext i32 @test_igtui_z(i32 zeroext %a) {
36+
; CHECK-LABEL: test_igtui_z:
37+
; CHECK: cntlzw r3, r3
38+
; CHECK-NEXT: srwi r3, r3, 5
39+
; CHECK-NEXT: xori r3, r3, 1
40+
; CHECK-NEXT: blr
41+
entry:
42+
%cmp = icmp ne i32 %a, 0
43+
%conv = zext i1 %cmp to i32
44+
ret i32 %conv
45+
}
46+
47+
; Function Attrs: norecurse nounwind readnone
48+
define signext i32 @test_igtui_sext_z(i32 zeroext %a) {
49+
; CHECK-LABEL: test_igtui_sext_z:
50+
; CHECK: cntlzw r3, r3
51+
; CHECK-NEXT: srwi r3, r3, 5
52+
; CHECK-NEXT: xori r3, r3, 1
53+
; CHECK-NEXT: neg r3, r3
54+
; CHECK-NEXT: blr
55+
entry:
56+
%cmp = icmp ne i32 %a, 0
57+
%sub = sext i1 %cmp to i32
58+
ret i32 %sub
59+
}
60+
61+
; Function Attrs: norecurse nounwind
62+
define void @test_igtui_store(i32 zeroext %a, i32 zeroext %b) {
63+
; CHECK-LABEL: test_igtui_store:
64+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
65+
; CHECK: rldicl {{r[0-9]+}}, [[REG]], 1, 63
66+
entry:
67+
%cmp = icmp ugt i32 %a, %b
68+
%conv = zext i1 %cmp to i32
69+
store i32 %conv, i32* @glob, align 4
70+
ret void
71+
}
72+
73+
; Function Attrs: norecurse nounwind
74+
define void @test_igtui_sext_store(i32 zeroext %a, i32 zeroext %b) {
75+
; CHECK-LABEL: test_igtui_sext_store:
76+
; CHECK: sub [[REG:r[0-9]+]], r4, r3
77+
; CHECK: sradi {{r[0-9]+}}, [[REG]], 63
78+
entry:
79+
%cmp = icmp ugt i32 %a, %b
80+
%sub = sext i1 %cmp to i32
81+
store i32 %sub, i32* @glob, align 4
82+
ret void
83+
}
84+
85+
; Function Attrs: norecurse nounwind
86+
define void @test_igtui_z_store(i32 zeroext %a) {
87+
; CHECK-LABEL: test_igtui_z_store:
88+
; CHECK: cntlzw r3, r3
89+
; CHECK: srwi r3, r3, 5
90+
; CHECK: xori r3, r3, 1
91+
; CHECK: stw r3, 0(r4)
92+
; CHECK-NEXT: blr
93+
entry:
94+
%cmp = icmp ne i32 %a, 0
95+
%conv = zext i1 %cmp to i32
96+
store i32 %conv, i32* @glob, align 4
97+
ret void
98+
}
99+
100+
; Function Attrs: norecurse nounwind
101+
define void @test_igtui_sext_z_store(i32 zeroext %a) {
102+
; CHECK-LABEL: test_igtui_sext_z_store:
103+
; CHECK: cntlzw r3, r3
104+
; CHECK: srwi r3, r3, 5
105+
; CHECK: xori r3, r3, 1
106+
; CHECK: neg r3, r3
107+
; CHECK: stw r3, 0(r4)
108+
; CHECK-NEXT: blr
109+
entry:
110+
%cmp = icmp ne i32 %a, 0
111+
%sub = sext i1 %cmp to i32
112+
store i32 %sub, i32* @glob, align 4
113+
ret void
114+
}
115+

0 commit comments

Comments
 (0)