Skip to content

Commit 709c951

Browse files
committed
SpillPlacement: fix a bug in iterate.
Inside iterate, we scan backwards then scan forwards in a loop. When iteration is not zero, the last node was just updated so we can skip it. But when iteration is zero, we can't skip the last node. For the testing case, fixing this will save a spill and move register copies from hot path to cold path. llvm-svn: 202557
1 parent 7c8743d commit 709c951

File tree

2 files changed

+296
-2
lines changed

2 files changed

+296
-2
lines changed

llvm/lib/CodeGen/SpillPlacement.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,12 @@ void SpillPlacement::iterate() {
323323
// affect the entire network in a single iteration. That means very fast
324324
// convergence, usually in a single iteration.
325325
for (unsigned iteration = 0; iteration != 10; ++iteration) {
326-
// Scan backwards, skipping the last node which was just updated.
326+
// Scan backwards, skipping the last node when iteration is not zero. When
327+
// iteration is not zero, the last node was just updated.
327328
bool Changed = false;
328329
for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
329-
llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
330+
iteration == 0 ? Linked.rbegin() : llvm::next(Linked.rbegin()),
331+
E = Linked.rend(); I != E; ++I) {
330332
unsigned n = *I;
331333
if (nodes[n].update(nodes)) {
332334
Changed = true;

llvm/test/CodeGen/X86/ragreedy-bug.ll

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s
2+
3+
; This testing case is reduced from 197.parser prune_match function.
4+
; We make sure register copies are not generated on isupper.exit blocks.
5+
6+
; CHECK: isupper.exit
7+
; CHECK-NEXT: in Loop
8+
; CHECK-NEXT: testl
9+
; CHECK-NEXT: jne
10+
; CHECK: isupper.exit
11+
; CHECK-NEXT: in Loop
12+
; CHECK-NEXT: testl
13+
; CHECK-NEXT: je
14+
; CHECK: maskrune
15+
; CHECK: maskrune
16+
17+
%struct.List_o_links_struct = type { i32, i32, i32, %struct.List_o_links_struct* }
18+
%struct.Connector_struct = type { i16, i16, i8, i8, %struct.Connector_struct*, i8* }
19+
%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i64, i8**)*, i32 (i32, i8*, i64, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
20+
%struct._RuneRange = type { i32, %struct._RuneEntry* }
21+
%struct._RuneEntry = type { i32, i32, i32, i32* }
22+
%struct._RuneCharClass = type { [14 x i8], i32 }
23+
%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon }
24+
%union.anon = type { %struct.E_list_struct* }
25+
%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* }
26+
%struct.domain_struct = type { i8*, i32, %struct.List_o_links_struct*, i32, i32, %struct.d_tree_leaf_struct*, %struct.domain_struct* }
27+
%struct.d_tree_leaf_struct = type { %struct.domain_struct*, i32, %struct.d_tree_leaf_struct* }
28+
@_DefaultRuneLocale = external global %struct._RuneLocale
29+
declare i32 @__maskrune(i32, i64) #7
30+
define fastcc i32 @prune_match(%struct.Connector_struct* nocapture readonly %a, %struct.Connector_struct* nocapture readonly %b) #9 {
31+
entry:
32+
%label56 = bitcast %struct.Connector_struct* %a to i16*
33+
%0 = load i16* %label56, align 2
34+
%label157 = bitcast %struct.Connector_struct* %b to i16*
35+
%1 = load i16* %label157, align 2
36+
%cmp = icmp eq i16 %0, %1
37+
br i1 %cmp, label %if.end, label %return, !prof !988
38+
if.end:
39+
%priority = getelementptr inbounds %struct.Connector_struct* %a, i64 0, i32 2
40+
%2 = load i8* %priority, align 1
41+
%priority5 = getelementptr inbounds %struct.Connector_struct* %b, i64 0, i32 2
42+
%3 = load i8* %priority5, align 1
43+
%string = getelementptr inbounds %struct.Connector_struct* %a, i64 0, i32 5
44+
%4 = load i8** %string, align 8
45+
%string7 = getelementptr inbounds %struct.Connector_struct* %b, i64 0, i32 5
46+
%5 = load i8** %string7, align 8
47+
br label %while.cond
48+
while.cond:
49+
%lsr.iv27 = phi i64 [ %lsr.iv.next28, %if.end17 ], [ 0, %if.end ]
50+
%scevgep55 = getelementptr i8* %4, i64 %lsr.iv27
51+
%6 = load i8* %scevgep55, align 1
52+
%idxprom.i.i = sext i8 %6 to i64
53+
%isascii.i.i224 = icmp sgt i8 %6, -1
54+
br i1 %isascii.i.i224, label %cond.true.i.i, label %cond.false.i.i, !prof !181
55+
cond.true.i.i:
56+
%arrayidx.i.i = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i
57+
%7 = load i32* %arrayidx.i.i, align 4
58+
%and.i.i = and i32 %7, 32768
59+
br label %isupper.exit
60+
cond.false.i.i:
61+
%8 = trunc i64 %idxprom.i.i to i8
62+
%conv8 = sext i8 %8 to i32
63+
%call3.i.i = tail call i32 @__maskrune(i32 %conv8, i64 32768) #3
64+
br label %isupper.exit
65+
isupper.exit:
66+
%tobool1.sink.i.in.i = phi i32 [ %and.i.i, %cond.true.i.i ], [ %call3.i.i, %cond.false.i.i ]
67+
%tobool1.sink.i.i = icmp eq i32 %tobool1.sink.i.in.i, 0
68+
br i1 %tobool1.sink.i.i, label %lor.rhs, label %while.body, !prof !989
69+
lor.rhs:
70+
%sunkaddr = ptrtoint i8* %5 to i64
71+
%sunkaddr58 = add i64 %sunkaddr, %lsr.iv27
72+
%sunkaddr59 = inttoptr i64 %sunkaddr58 to i8*
73+
%9 = load i8* %sunkaddr59, align 1
74+
%idxprom.i.i214 = sext i8 %9 to i64
75+
%isascii.i.i213225 = icmp sgt i8 %9, -1
76+
br i1 %isascii.i.i213225, label %cond.true.i.i217, label %cond.false.i.i219, !prof !181
77+
cond.true.i.i217:
78+
%arrayidx.i.i215 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i214
79+
%10 = load i32* %arrayidx.i.i215, align 4
80+
%and.i.i216 = and i32 %10, 32768
81+
br label %isupper.exit223
82+
cond.false.i.i219:
83+
%11 = trunc i64 %idxprom.i.i214 to i8
84+
%conv9 = sext i8 %11 to i32
85+
%call3.i.i218 = tail call i32 @__maskrune(i32 %conv9, i64 32768) #3
86+
br label %isupper.exit223
87+
isupper.exit223:
88+
%tobool1.sink.i.in.i220 = phi i32 [ %and.i.i216, %cond.true.i.i217 ], [ %call3.i.i218, %cond.false.i.i219 ]
89+
%tobool1.sink.i.i221 = icmp eq i32 %tobool1.sink.i.in.i220, 0
90+
br i1 %tobool1.sink.i.i221, label %while.end, label %while.body, !prof !990
91+
while.body:
92+
%sunkaddr60 = ptrtoint i8* %4 to i64
93+
%sunkaddr61 = add i64 %sunkaddr60, %lsr.iv27
94+
%sunkaddr62 = inttoptr i64 %sunkaddr61 to i8*
95+
%12 = load i8* %sunkaddr62, align 1
96+
%sunkaddr63 = ptrtoint i8* %5 to i64
97+
%sunkaddr64 = add i64 %sunkaddr63, %lsr.iv27
98+
%sunkaddr65 = inttoptr i64 %sunkaddr64 to i8*
99+
%13 = load i8* %sunkaddr65, align 1
100+
%cmp14 = icmp eq i8 %12, %13
101+
br i1 %cmp14, label %if.end17, label %return, !prof !991
102+
if.end17:
103+
%lsr.iv.next28 = add i64 %lsr.iv27, 1
104+
br label %while.cond
105+
while.end:
106+
%14 = or i8 %3, %2
107+
%15 = icmp eq i8 %14, 0
108+
br i1 %15, label %if.then23, label %if.else88, !prof !992
109+
if.then23:
110+
%sunkaddr66 = ptrtoint %struct.Connector_struct* %a to i64
111+
%sunkaddr67 = add i64 %sunkaddr66, 16
112+
%sunkaddr68 = inttoptr i64 %sunkaddr67 to i8**
113+
%16 = load i8** %sunkaddr68, align 8
114+
%17 = load i8* %16, align 1
115+
%cmp26 = icmp eq i8 %17, 83
116+
%sunkaddr69 = ptrtoint i8* %4 to i64
117+
%sunkaddr70 = add i64 %sunkaddr69, %lsr.iv27
118+
%sunkaddr71 = inttoptr i64 %sunkaddr70 to i8*
119+
%18 = load i8* %sunkaddr71, align 1
120+
br i1 %cmp26, label %land.lhs.true28, label %while.cond59.preheader, !prof !993
121+
land.lhs.true28:
122+
switch i8 %18, label %land.rhs.preheader [
123+
i8 112, label %land.lhs.true35
124+
i8 0, label %return
125+
], !prof !994
126+
land.lhs.true35:
127+
%sunkaddr72 = ptrtoint i8* %5 to i64
128+
%sunkaddr73 = add i64 %sunkaddr72, %lsr.iv27
129+
%sunkaddr74 = inttoptr i64 %sunkaddr73 to i8*
130+
%19 = load i8* %sunkaddr74, align 1
131+
switch i8 %19, label %land.rhs.preheader [
132+
i8 112, label %land.lhs.true43
133+
], !prof !995
134+
land.lhs.true43:
135+
%20 = ptrtoint i8* %16 to i64
136+
%21 = sub i64 0, %20
137+
%scevgep52 = getelementptr i8* %4, i64 %21
138+
%scevgep53 = getelementptr i8* %scevgep52, i64 %lsr.iv27
139+
%scevgep54 = getelementptr i8* %scevgep53, i64 -1
140+
%cmp45 = icmp eq i8* %scevgep54, null
141+
br i1 %cmp45, label %return, label %lor.lhs.false47, !prof !996
142+
lor.lhs.false47:
143+
%22 = ptrtoint i8* %16 to i64
144+
%23 = sub i64 0, %22
145+
%scevgep47 = getelementptr i8* %4, i64 %23
146+
%scevgep48 = getelementptr i8* %scevgep47, i64 %lsr.iv27
147+
%scevgep49 = getelementptr i8* %scevgep48, i64 -2
148+
%cmp50 = icmp eq i8* %scevgep49, null
149+
br i1 %cmp50, label %land.lhs.true52, label %while.cond59.preheader, !prof !997
150+
land.lhs.true52:
151+
%sunkaddr75 = ptrtoint i8* %4 to i64
152+
%sunkaddr76 = add i64 %sunkaddr75, %lsr.iv27
153+
%sunkaddr77 = add i64 %sunkaddr76, -1
154+
%sunkaddr78 = inttoptr i64 %sunkaddr77 to i8*
155+
%24 = load i8* %sunkaddr78, align 1
156+
%cmp55 = icmp eq i8 %24, 73
157+
%cmp61233 = icmp eq i8 %18, 0
158+
%or.cond265 = or i1 %cmp55, %cmp61233
159+
br i1 %or.cond265, label %return, label %land.rhs.preheader, !prof !998
160+
while.cond59.preheader:
161+
%cmp61233.old = icmp eq i8 %18, 0
162+
br i1 %cmp61233.old, label %return, label %land.rhs.preheader, !prof !999
163+
land.rhs.preheader:
164+
%scevgep33 = getelementptr i8* %5, i64 %lsr.iv27
165+
%scevgep43 = getelementptr i8* %4, i64 %lsr.iv27
166+
br label %land.rhs
167+
land.rhs:
168+
%lsr.iv = phi i64 [ 0, %land.rhs.preheader ], [ %lsr.iv.next, %if.then83 ]
169+
%25 = phi i8 [ %27, %if.then83 ], [ %18, %land.rhs.preheader ]
170+
%scevgep34 = getelementptr i8* %scevgep33, i64 %lsr.iv
171+
%26 = load i8* %scevgep34, align 1
172+
%cmp64 = icmp eq i8 %26, 0
173+
br i1 %cmp64, label %return, label %while.body66, !prof !1000
174+
while.body66:
175+
%cmp68 = icmp eq i8 %25, 42
176+
%cmp72 = icmp eq i8 %26, 42
177+
%or.cond = or i1 %cmp68, %cmp72
178+
br i1 %or.cond, label %if.then83, label %lor.lhs.false74, !prof !1001
179+
lor.lhs.false74:
180+
%cmp77 = icmp ne i8 %25, %26
181+
%cmp81 = icmp eq i8 %25, 94
182+
%or.cond208 = or i1 %cmp77, %cmp81
183+
br i1 %or.cond208, label %return, label %if.then83, !prof !1002
184+
if.then83:
185+
%scevgep44 = getelementptr i8* %scevgep43, i64 %lsr.iv
186+
%scevgep45 = getelementptr i8* %scevgep44, i64 1
187+
%27 = load i8* %scevgep45, align 1
188+
%cmp61 = icmp eq i8 %27, 0
189+
%lsr.iv.next = add i64 %lsr.iv, 1
190+
br i1 %cmp61, label %return, label %land.rhs, !prof !999
191+
if.else88:
192+
%cmp89 = icmp eq i8 %2, 1
193+
%cmp92 = icmp eq i8 %3, 2
194+
%or.cond159 = and i1 %cmp89, %cmp92
195+
br i1 %or.cond159, label %while.cond95.preheader, label %if.else123, !prof !1003
196+
while.cond95.preheader:
197+
%sunkaddr79 = ptrtoint i8* %4 to i64
198+
%sunkaddr80 = add i64 %sunkaddr79, %lsr.iv27
199+
%sunkaddr81 = inttoptr i64 %sunkaddr80 to i8*
200+
%28 = load i8* %sunkaddr81, align 1
201+
%cmp97238 = icmp eq i8 %28, 0
202+
br i1 %cmp97238, label %return, label %land.rhs99.preheader, !prof !1004
203+
land.rhs99.preheader:
204+
%scevgep31 = getelementptr i8* %5, i64 %lsr.iv27
205+
%scevgep40 = getelementptr i8* %4, i64 %lsr.iv27
206+
br label %land.rhs99
207+
land.rhs99:
208+
%lsr.iv17 = phi i64 [ 0, %land.rhs99.preheader ], [ %lsr.iv.next18, %if.then117 ]
209+
%29 = phi i8 [ %31, %if.then117 ], [ %28, %land.rhs99.preheader ]
210+
%scevgep32 = getelementptr i8* %scevgep31, i64 %lsr.iv17
211+
%30 = load i8* %scevgep32, align 1
212+
%cmp101 = icmp eq i8 %30, 0
213+
br i1 %cmp101, label %return, label %while.body104, !prof !1005
214+
while.body104:
215+
%cmp107 = icmp eq i8 %29, %30
216+
%cmp111 = icmp eq i8 %29, 42
217+
%or.cond209 = or i1 %cmp107, %cmp111
218+
%cmp115 = icmp eq i8 %30, 94
219+
%or.cond210 = or i1 %or.cond209, %cmp115
220+
br i1 %or.cond210, label %if.then117, label %return, !prof !1006
221+
if.then117:
222+
%scevgep41 = getelementptr i8* %scevgep40, i64 %lsr.iv17
223+
%scevgep42 = getelementptr i8* %scevgep41, i64 1
224+
%31 = load i8* %scevgep42, align 1
225+
%cmp97 = icmp eq i8 %31, 0
226+
%lsr.iv.next18 = add i64 %lsr.iv17, 1
227+
br i1 %cmp97, label %return, label %land.rhs99, !prof !1004
228+
if.else123:
229+
%cmp124 = icmp eq i8 %3, 1
230+
%cmp127 = icmp eq i8 %2, 2
231+
%or.cond160 = and i1 %cmp124, %cmp127
232+
br i1 %or.cond160, label %while.cond130.preheader, label %return, !prof !1007
233+
while.cond130.preheader:
234+
%sunkaddr82 = ptrtoint i8* %4 to i64
235+
%sunkaddr83 = add i64 %sunkaddr82, %lsr.iv27
236+
%sunkaddr84 = inttoptr i64 %sunkaddr83 to i8*
237+
%32 = load i8* %sunkaddr84, align 1
238+
%cmp132244 = icmp eq i8 %32, 0
239+
br i1 %cmp132244, label %return, label %land.rhs134.preheader, !prof !1008
240+
land.rhs134.preheader:
241+
%scevgep29 = getelementptr i8* %5, i64 %lsr.iv27
242+
%scevgep37 = getelementptr i8* %4, i64 %lsr.iv27
243+
br label %land.rhs134
244+
land.rhs134:
245+
%lsr.iv22 = phi i64 [ 0, %land.rhs134.preheader ], [ %lsr.iv.next23, %if.then152 ]
246+
%33 = phi i8 [ %35, %if.then152 ], [ %32, %land.rhs134.preheader ]
247+
%scevgep30 = getelementptr i8* %scevgep29, i64 %lsr.iv22
248+
%34 = load i8* %scevgep30, align 1
249+
%cmp136 = icmp eq i8 %34, 0
250+
br i1 %cmp136, label %return, label %while.body139, !prof !1009
251+
while.body139:
252+
%cmp142 = icmp eq i8 %33, %34
253+
%cmp146 = icmp eq i8 %34, 42
254+
%or.cond211 = or i1 %cmp142, %cmp146
255+
%cmp150 = icmp eq i8 %33, 94
256+
%or.cond212 = or i1 %or.cond211, %cmp150
257+
br i1 %or.cond212, label %if.then152, label %return, !prof !1010
258+
if.then152:
259+
%scevgep38 = getelementptr i8* %scevgep37, i64 %lsr.iv22
260+
%scevgep39 = getelementptr i8* %scevgep38, i64 1
261+
%35 = load i8* %scevgep39, align 1
262+
%cmp132 = icmp eq i8 %35, 0
263+
%lsr.iv.next23 = add i64 %lsr.iv22, 1
264+
br i1 %cmp132, label %return, label %land.rhs134, !prof !1008
265+
return:
266+
%retval.0 = phi i32 [ 0, %entry ], [ 1, %land.lhs.true52 ], [ 1, %land.lhs.true43 ], [ 0, %if.else123 ], [ 1, %while.cond59.preheader ], [ 1, %while.cond95.preheader ], [ 1, %while.cond130.preheader ], [ 1, %land.lhs.true28 ], [ 1, %if.then83 ], [ 0, %lor.lhs.false74 ], [ 1, %land.rhs ], [ 1, %if.then117 ], [ 0, %while.body104 ], [ 1, %land.rhs99 ], [ 1, %if.then152 ], [ 0, %while.body139 ], [ 1, %land.rhs134 ], [ 0, %while.body ]
267+
ret i32 %retval.0
268+
}
269+
!181 = metadata !{metadata !"branch_weights", i32 662038, i32 1}
270+
!988 = metadata !{metadata !"branch_weights", i32 12091450, i32 1916}
271+
!989 = metadata !{metadata !"branch_weights", i32 7564670, i32 4526781}
272+
!990 = metadata !{metadata !"branch_weights", i32 7484958, i32 13283499}
273+
!991 = metadata !{metadata !"branch_weights", i32 8677007, i32 4606493}
274+
!992 = metadata !{metadata !"branch_weights", i32 -1172426948, i32 145094705}
275+
!993 = metadata !{metadata !"branch_weights", i32 1468914, i32 5683688}
276+
!994 = metadata !{metadata !"branch_weights", i32 114025221, i32 -1217548794, i32 -1199521551, i32 87712616}
277+
!995 = metadata !{metadata !"branch_weights", i32 1853716452, i32 -444717951, i32 932776759}
278+
!996 = metadata !{metadata !"branch_weights", i32 1004870, i32 20259}
279+
!997 = metadata !{metadata !"branch_weights", i32 20071, i32 189}
280+
!998 = metadata !{metadata !"branch_weights", i32 -1020255939, i32 572177766}
281+
!999 = metadata !{metadata !"branch_weights", i32 2666513, i32 3466431}
282+
!1000 = metadata !{metadata !"branch_weights", i32 5117635, i32 1859780}
283+
!1001 = metadata !{metadata !"branch_weights", i32 354902465, i32 -1444604407}
284+
!1002 = metadata !{metadata !"branch_weights", i32 -1762419279, i32 1592770684}
285+
!1003 = metadata !{metadata !"branch_weights", i32 1435905930, i32 -1951930624}
286+
!1004 = metadata !{metadata !"branch_weights", i32 1, i32 504888}
287+
!1005 = metadata !{metadata !"branch_weights", i32 94662, i32 504888}
288+
!1006 = metadata !{metadata !"branch_weights", i32 -1897793104, i32 160196332}
289+
!1007 = metadata !{metadata !"branch_weights", i32 2074643678, i32 -29579071}
290+
!1008 = metadata !{metadata !"branch_weights", i32 1, i32 226163}
291+
!1009 = metadata !{metadata !"branch_weights", i32 58357, i32 226163}
292+
!1010 = metadata !{metadata !"branch_weights", i32 -2072848646, i32 92907517}

0 commit comments

Comments
 (0)