Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 96e2795

Browse files
committed
[SystemZ] Pass regalloc hints to help Load-and-Test transformations.
Since there is no "Load-and-Test-High" instruction, the 32 bit load of a register to be compared with 0 can only be implemented with LT if the virtual GRX32 register ends up in a low part (GR32 register). This patch detects these cases and passes the GR32 registers (low parts) as (soft) hints in getRegAllocationHints(). Review: Ulrich Weigand. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354935 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ea0d235 commit 96e2795

File tree

2 files changed

+204
-15
lines changed

2 files changed

+204
-15
lines changed

lib/Target/SystemZ/SystemZRegisterInfo.cpp

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,26 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO,
5353
return RC;
5454
}
5555

56+
// Pass the registers of RC as hints while making sure that if any of these
57+
// registers are copy hints (and therefore already in Hints), hint them
58+
// first.
59+
static void addHints(ArrayRef<MCPhysReg> Order,
60+
SmallVectorImpl<MCPhysReg> &Hints,
61+
const TargetRegisterClass *RC,
62+
const MachineRegisterInfo *MRI) {
63+
SmallSet<unsigned, 4> CopyHints;
64+
CopyHints.insert(Hints.begin(), Hints.end());
65+
Hints.clear();
66+
for (MCPhysReg Reg : Order)
67+
if (CopyHints.count(Reg) &&
68+
RC->contains(Reg) && !MRI->isReserved(Reg))
69+
Hints.push_back(Reg);
70+
for (MCPhysReg Reg : Order)
71+
if (!CopyHints.count(Reg) &&
72+
RC->contains(Reg) && !MRI->isReserved(Reg))
73+
Hints.push_back(Reg);
74+
}
75+
5676
bool
5777
SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
5878
ArrayRef<MCPhysReg> Order,
@@ -75,7 +95,7 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
7595
if (!DoneRegs.insert(Reg).second)
7696
continue;
7797

78-
for (auto &Use : MRI->use_instructions(Reg))
98+
for (auto &Use : MRI->use_instructions(Reg)) {
7999
// For LOCRMux, see if the other operand is already a high or low
80100
// register, and in that case give the correpsonding hints for
81101
// VirtReg. LOCR instructions need both operands in either high or
@@ -87,19 +107,7 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
87107
TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
88108
getRC32(TrueMO, VRM, MRI));
89109
if (RC && RC != &SystemZ::GRX32BitRegClass) {
90-
// Pass the registers of RC as hints while making sure that if
91-
// any of these registers are copy hints, hint them first.
92-
SmallSet<unsigned, 4> CopyHints;
93-
CopyHints.insert(Hints.begin(), Hints.end());
94-
Hints.clear();
95-
for (MCPhysReg Reg : Order)
96-
if (CopyHints.count(Reg) &&
97-
RC->contains(Reg) && !MRI->isReserved(Reg))
98-
Hints.push_back(Reg);
99-
for (MCPhysReg Reg : Order)
100-
if (!CopyHints.count(Reg) &&
101-
RC->contains(Reg) && !MRI->isReserved(Reg))
102-
Hints.push_back(Reg);
110+
addHints(Order, Hints, RC, MRI);
103111
// Return true to make these hints the only regs available to
104112
// RA. This may mean extra spilling but since the alternative is
105113
// a jump sequence expansion of the LOCRMux, it is preferred.
@@ -111,7 +119,22 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
111119
(TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg());
112120
if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass)
113121
Worklist.push_back(OtherReg);
114-
}
122+
} // end LOCRMux
123+
else if (Use.getOpcode() == SystemZ::CHIMux ||
124+
Use.getOpcode() == SystemZ::CFIMux) {
125+
if (Use.getOperand(1).getImm() == 0) {
126+
bool OnlyLMuxes = true;
127+
for (MachineInstr &DefMI : MRI->def_instructions(VirtReg))
128+
if (DefMI.getOpcode() != SystemZ::LMux)
129+
OnlyLMuxes = false;
130+
if (OnlyLMuxes) {
131+
addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI);
132+
// Return false to make these hints preferred but not obligatory.
133+
return false;
134+
}
135+
}
136+
} // end CHIMux / CFIMux
137+
}
115138
}
116139
}
117140

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -start-before=greedy %s -o - \
2+
# RUN: -debug-only=regalloc 2>&1 | FileCheck %s
3+
#
4+
# REQUIRES: asserts
5+
#
6+
# Test that regalloc hints are passed for compare with zero cases that can be
7+
# converted to load-and-test.
8+
9+
--- |
10+
; ModuleID = './tc.ll'
11+
source_filename = "proof.c"
12+
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
13+
target triple = "s390x-ibm-linux"
14+
15+
@rootlosers = external dso_local local_unnamed_addr global [300 x i32], align 4
16+
17+
define dso_local void @proofnumberscan() local_unnamed_addr #0 {
18+
bb:
19+
br i1 undef, label %bb20.preheader, label %bb1.preheader
20+
21+
bb1.preheader: ; preds = %bb
22+
br label %bb1
23+
24+
bb20.preheader: ; preds = %bb
25+
br label %bb20
26+
27+
bb1: ; preds = %bb1.preheader, %bb15
28+
%lsr.iv3 = phi [512 x i32]* [ undef, %bb1.preheader ], [ %2, %bb15 ]
29+
%lsr.iv1 = phi [300 x i32]* [ @rootlosers, %bb1.preheader ], [ %1, %bb15 ]
30+
%lsr.iv = phi i32 [ 0, %bb1.preheader ], [ %lsr.iv.next, %bb15 ]
31+
%tmp2 = phi i32 [ %tmp18, %bb15 ], [ 0, %bb1.preheader ]
32+
%tmp3 = phi i32 [ %tmp17, %bb15 ], [ 100000000, %bb1.preheader ]
33+
%lsr.iv35 = bitcast [512 x i32]* %lsr.iv3 to i32*
34+
%tmp5 = load i32, i32* %lsr.iv35, align 4, !tbaa !1
35+
%tmp6 = load i32, i32* undef, align 4, !tbaa !1
36+
%tmp7 = icmp eq i32 %tmp6, 0
37+
br i1 %tmp7, label %bb15, label %bb8
38+
39+
bb8: ; preds = %bb1
40+
%0 = bitcast [300 x i32]* %lsr.iv1 to i32*
41+
%tmp10 = load i32, i32* %0, align 4, !tbaa !1
42+
%tmp11 = icmp eq i32 %tmp10, 0
43+
%tmp12 = select i1 %tmp11, i32 %tmp5, i32 %tmp3
44+
%tmp14 = select i1 %tmp11, i32 %lsr.iv, i32 %tmp2
45+
br label %bb15
46+
47+
bb15: ; preds = %bb8, %bb1
48+
%tmp16 = phi i32 [ 0, %bb1 ], [ %tmp6, %bb8 ]
49+
%tmp17 = phi i32 [ %tmp3, %bb1 ], [ %tmp12, %bb8 ]
50+
%tmp18 = phi i32 [ %tmp2, %bb1 ], [ %tmp14, %bb8 ]
51+
%lsr.iv.next = add i32 %lsr.iv, 4
52+
%scevgep = getelementptr [300 x i32], [300 x i32]* %lsr.iv1, i64 0, i64 4
53+
%1 = bitcast i32* %scevgep to [300 x i32]*
54+
%scevgep4 = getelementptr [512 x i32], [512 x i32]* %lsr.iv3, i64 0, i64 4
55+
%2 = bitcast i32* %scevgep4 to [512 x i32]*
56+
br label %bb1
57+
58+
bb20: ; preds = %bb20, %bb20.preheader
59+
br label %bb20
60+
}
61+
62+
attributes #0 = { "target-cpu"="z13" "use-soft-float"="false" }
63+
64+
!llvm.ident = !{!0}
65+
66+
!0 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git 29e2813a2ab7d5569860bb07892dfef7b5374d96) (http://llvm.org/git/llvm.git 546f779cb9d4ac2ce9c9b9522019f500abca9522)"}
67+
!1 = !{!2, !2, i64 0}
68+
!2 = !{!"int", !3, i64 0}
69+
!3 = !{!"omnipotent char", !4, i64 0}
70+
!4 = !{!"Simple C/C++ TBAA"}
71+
72+
...
73+
74+
# CHECK: ********** MACHINEINSTRS **********
75+
# CHECK: LMux
76+
# CHECK: [[VREG0:%[0-9]+]]:grx32bit = LMux
77+
# CHECK: CHIMux [[VREG0]]:grx32bit, 0, implicit-def $cc
78+
# CHECK: [[VREG1:%[0-9]+]]:grx32bit = LMux
79+
# CHECK: CHIMux [[VREG1]]:grx32bit, 0, implicit-def $cc
80+
# CHECK: selectOrSplit GRX32Bit:[[VREG0]]
81+
# CHECK-NEXT: hints: $r0l $r1l $r2l $r3l $r4l $r5l $r14l $r13l $r12l $r11l $r10l $r9l $r8l $r7l $r6l
82+
# CHECK-NEXT: assigning [[VREG0]] to $[[PREG0:r[0-9]+]]l
83+
# CHECK: selectOrSplit GRX32Bit:[[VREG1]]
84+
# CHECK-NEXT: hints: $r0l $r1l $r2l $r3l $r4l $r5l $r14l $r13l $r12l $r11l $r10l $r9l $r8l $r7l $r6l
85+
# CHECK-NEXT: assigning [[VREG1]] to $[[PREG1:r[0-9]+]]l
86+
# CHECK: lt %[[PREG0]]
87+
# CHECK: lt %[[PREG1]]
88+
89+
---
90+
name: proofnumberscan
91+
alignment: 4
92+
tracksRegLiveness: true
93+
registers:
94+
- { id: 0, class: addr64bit }
95+
- { id: 1, class: addr64bit }
96+
- { id: 2, class: grx32bit }
97+
- { id: 3, class: grx32bit }
98+
- { id: 4, class: grx32bit }
99+
- { id: 5, class: grx32bit }
100+
- { id: 6, class: grx32bit }
101+
- { id: 7, class: grx32bit }
102+
- { id: 8, class: grx32bit }
103+
- { id: 9, class: grx32bit }
104+
- { id: 10, class: grx32bit }
105+
- { id: 11, class: grx32bit }
106+
- { id: 12, class: gr64bit }
107+
- { id: 13, class: gr64bit }
108+
- { id: 14, class: grx32bit }
109+
- { id: 15, class: gr64bit }
110+
- { id: 16, class: gr64bit }
111+
- { id: 17, class: grx32bit }
112+
- { id: 18, class: grx32bit }
113+
- { id: 19, class: addr64bit }
114+
- { id: 20, class: grx32bit }
115+
- { id: 21, class: addr64bit }
116+
- { id: 22, class: addr64bit }
117+
- { id: 23, class: grx32bit }
118+
- { id: 24, class: grx32bit }
119+
- { id: 25, class: grx32bit }
120+
- { id: 26, class: grx32bit }
121+
- { id: 27, class: grx32bit }
122+
body: |
123+
bb.0.bb:
124+
successors: %bb.1, %bb.2
125+
126+
%23:grx32bit = LHIMux 0
127+
CHIMux %23, 0, implicit-def $cc
128+
BRC 14, 8, %bb.2, implicit killed $cc
129+
130+
bb.1:
131+
J %bb.6
132+
133+
bb.2.bb1.preheader:
134+
%25:grx32bit = IIFMux 100000000
135+
%22:addr64bit = LARL @rootlosers
136+
%21:addr64bit = IMPLICIT_DEF
137+
%24:grx32bit = LHIMux 0
138+
J %bb.3
139+
140+
bb.3.bb1:
141+
successors: %bb.7(0x30000000), %bb.4(0x50000000)
142+
143+
%5:grx32bit = LMux %21, 0, $noreg :: (load 4 from %ir.lsr.iv35, !tbaa !1)
144+
%6:grx32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`, !tbaa !1)
145+
CHIMux %6, 0, implicit-def $cc
146+
BRC 14, 6, %bb.4, implicit killed $cc
147+
148+
bb.7:
149+
J %bb.5
150+
151+
bb.4.bb8:
152+
%20:grx32bit = LMux %22, 0, $noreg :: (load 4 from %ir.0, !tbaa !1)
153+
CHIMux %20, 0, implicit-def $cc
154+
%25:grx32bit = LOCRMux %25, %5, 14, 8, implicit $cc
155+
%24:grx32bit = LOCRMux %24, %23, 14, 8, implicit killed $cc
156+
157+
bb.5.bb15:
158+
%23:grx32bit = AHIMux %23, 4, implicit-def dead $cc
159+
%22:addr64bit = LA %22, 16, $noreg
160+
%21:addr64bit = LA %21, 16, $noreg
161+
J %bb.3
162+
163+
bb.6.bb20:
164+
J %bb.6
165+
166+
...

0 commit comments

Comments
 (0)