Skip to content

Commit c8757ff

Browse files
committed
RegAllocFast: Rewrite and improve
This rewrites big parts of the fast register allocator. The basic strategy of doing block-local allocation hasn't changed but I tweaked several details: Track register state on register units instead of physical registers. This simplifies and speeds up handling of register aliases. Process basic blocks in reverse order: Definitions are known to end register livetimes when walking backwards (contrary when walking forward then uses may or may not be a kill so we need heuristics). Check register mask operands (calls) instead of conservatively assuming everything is clobbered. Enhance heuristics to detect killing uses: In case of a small number of defs/uses check if they are all in the same basic block and if so the last one is a killing use. Enhance heuristic for copy-coalescing through hinting: We check the first k defs of a register for COPYs rather than relying on there just being a single definition. When testing this on the full llvm test-suite including SPEC externals I measured: average 5.1% reduction in code size for X86, 4.9% reduction in code on aarch64. (ranging between 0% and 20% depending on the test) 0.5% faster compiletime (some analysis suggests the pass is slightly slower than before, but we more than make up for it because later passes are faster with the reduced instruction count) Also adds a few testcases that were broken without this patch, in particular bug 47278. Patch mostly by Matthias Braun
1 parent 870fd53 commit c8757ff

File tree

184 files changed

+11181
-10800
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

184 files changed

+11181
-10800
lines changed

llvm/lib/CodeGen/RegAllocFast.cpp

Lines changed: 725 additions & 547 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,8 @@ target triple = "arm64-apple-ios13.0.0"
3131
; This test checks that we don't re-use the register for the variable descriptor
3232
; for the second ldr.
3333
; CHECK: adrp x[[PTR1:[0-9]+]], _t_val@TLVPPAGE
34-
; CHECK: ldr x[[PTR1]], [x[[PTR1]], _t_val@TLVPPAGEOFF]
35-
; CHECK: ldr x[[FPTR:[0-9]+]], [x[[PTR1]]]
36-
; CHECK: mov x0, x[[PTR1]]
34+
; CHECK: ldr x0, [x[[PTR1]], _t_val@TLVPPAGEOFF]
35+
; CHECK: ldr x[[FPTR:[0-9]+]], [x0]
3736
; CHECK: blr x[[FPTR]]
3837

3938
define void @_Z4funcPKc(i8* %id) {

llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ entry:
9494
store i32 %c, i32* %c.addr, align 4
9595
store i64 %d, i64* %d.addr, align 8
9696
%0 = load i16, i16* %b.addr, align 2
97-
; CHECK: tbz w8, #0, LBB4_2
97+
; CHECK: tbz {{w[0-9]+}}, #0, LBB4_2
9898
%conv = trunc i16 %0 to i1
9999
br i1 %conv, label %if.then, label %if.end
100100

llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8
7979
define i32 @t2() {
8080
entry:
8181
; CHECK-LABEL: t2
82-
; CHECK: mov [[REG1:x[0-9]+]], xzr
83-
; CHECK: mov x0, [[REG1]]
82+
; CHECK: mov x0, xzr
8483
; CHECK: mov w1, #-8
8584
; CHECK: mov [[REG2:w[0-9]+]], #1023
8685
; CHECK: uxth w2, [[REG2]]

llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
define i32 @fptosi_wh(half %a) nounwind ssp {
55
entry:
66
; CHECK-LABEL: fptosi_wh
7-
; CHECK: fcvt s0, h0
8-
; CHECK: fcvtzs [[REG:w[0-9]+]], s0
9-
; CHECK: mov w0, [[REG]]
7+
; CHECK: fcvt [[REG:s[0-9]+]], h0
8+
; CHECK: fcvtzs w0, [[REG]]
109
%conv = fptosi half %a to i32
1110
ret i32 %conv
1211
}
@@ -15,9 +14,8 @@ entry:
1514
define i32 @fptoui_swh(half %a) nounwind ssp {
1615
entry:
1716
; CHECK-LABEL: fptoui_swh
18-
; CHECK: fcvt s0, h0
19-
; CHECK: fcvtzu [[REG:w[0-9]+]], s0
20-
; CHECK: mov w0, [[REG]]
17+
; CHECK: fcvt [[REG:s[0-9]+]], h0
18+
; CHECK: fcvtzu w0, [[REG]]
2119
%conv = fptoui half %a to i32
2220
ret i32 %conv
2321
}
@@ -26,8 +24,8 @@ entry:
2624
define half @sitofp_hw_i1(i1 %a) nounwind ssp {
2725
entry:
2826
; CHECK-LABEL: sitofp_hw_i1
29-
; CHECK: sbfx w8, w0, #0, #1
30-
; CHECK: scvtf s0, w8
27+
; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
28+
; CHECK: scvtf s0, [[REG]]
3129
; CHECK: fcvt h0, s0
3230
%conv = sitofp i1 %a to half
3331
ret half %conv
@@ -37,8 +35,8 @@ entry:
3735
define half @sitofp_hw_i8(i8 %a) nounwind ssp {
3836
entry:
3937
; CHECK-LABEL: sitofp_hw_i8
40-
; CHECK: sxtb w8, w0
41-
; CHECK: scvtf s0, w8
38+
; CHECK: sxtb [[REG:w[0-9]+]], w0
39+
; CHECK: scvtf s0, [[REG]]
4240
; CHECK: fcvt h0, s0
4341
%conv = sitofp i8 %a to half
4442
ret half %conv
@@ -48,8 +46,8 @@ entry:
4846
define half @sitofp_hw_i16(i16 %a) nounwind ssp {
4947
entry:
5048
; CHECK-LABEL: sitofp_hw_i16
51-
; CHECK: sxth w8, w0
52-
; CHECK: scvtf s0, w8
49+
; CHECK: sxth [[REG:w[0-9]+]], w0
50+
; CHECK: scvtf s0, [[REG]]
5351
; CHECK: fcvt h0, s0
5452
%conv = sitofp i16 %a to half
5553
ret half %conv
@@ -79,8 +77,8 @@ entry:
7977
define half @uitofp_hw_i1(i1 %a) nounwind ssp {
8078
entry:
8179
; CHECK-LABEL: uitofp_hw_i1
82-
; CHECK: and w8, w0, #0x1
83-
; CHECK: ucvtf s0, w8
80+
; CHECK: and [[REG:w[0-9]+]], w0, #0x1
81+
; CHECK: ucvtf s0, [[REG]]
8482
; CHECK: fcvt h0, s0
8583
%conv = uitofp i1 %a to half
8684
ret half %conv
@@ -90,8 +88,8 @@ entry:
9088
define half @uitofp_hw_i8(i8 %a) nounwind ssp {
9189
entry:
9290
; CHECK-LABEL: uitofp_hw_i8
93-
; CHECK: and w8, w0, #0xff
94-
; CHECK: ucvtf s0, w8
91+
; CHECK: and [[REG:w[0-9]+]], w0, #0xff
92+
; CHECK: ucvtf s0, [[REG]]
9593
; CHECK: fcvt h0, s0
9694
%conv = uitofp i8 %a to half
9795
ret half %conv
@@ -101,8 +99,8 @@ entry:
10199
define half @uitofp_hw_i16(i16 %a) nounwind ssp {
102100
entry:
103101
; CHECK-LABEL: uitofp_hw_i16
104-
; CHECK: and w8, w0, #0xffff
105-
; CHECK: ucvtf s0, w8
102+
; CHECK: and [[REG:w[0-9]+]], w0, #0xffff
103+
; CHECK: ucvtf s0, [[REG]]
106104
; CHECK: fcvt h0, s0
107105
%conv = uitofp i16 %a to half
108106
ret half %conv

llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s
1+
; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck -enable-var-scope %s
22

33
;; Test various conversions.
44
define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
@@ -49,13 +49,12 @@ entry:
4949
; CHECK: strh w1, [sp, #12]
5050
; CHECK: str w2, [sp, #8]
5151
; CHECK: str x3, [sp]
52-
; CHECK: ldrb w8, [sp, #15]
53-
; CHECK: strh w8, [sp, #12]
54-
; CHECK: ldrh w8, [sp, #12]
55-
; CHECK: str w8, [sp, #8]
56-
; CHECK: ldr w8, [sp, #8]
57-
; CHECK: ; kill: def $x8 killed $w8
58-
; CHECK: str x8, [sp]
52+
; CHECK: ldrb [[REG0:w[0-9]+]], [sp, #15]
53+
; CHECK: strh [[REG0]], [sp, #12]
54+
; CHECK: ldrh [[REG1:w[0-9]+]], [sp, #12]
55+
; CHECK: str [[REG1]], [sp, #8]
56+
; CHECK: ldr w[[REG2:[0-9]+]], [sp, #8]
57+
; CHECK: str x[[REG2]], [sp]
5958
; CHECK: ldr x0, [sp]
6059
; CHECK: ret
6160
%a.addr = alloca i8, align 1
@@ -105,12 +104,12 @@ entry:
105104
; CHECK: strh w1, [sp, #12]
106105
; CHECK: str w2, [sp, #8]
107106
; CHECK: str x3, [sp]
108-
; CHECK: ldrsb w8, [sp, #15]
109-
; CHECK: strh w8, [sp, #12]
110-
; CHECK: ldrsh w8, [sp, #12]
111-
; CHECK: str w8, [sp, #8]
112-
; CHECK: ldrsw x8, [sp, #8]
113-
; CHECK: str x8, [sp]
107+
; CHECK: ldrsb [[REG0:w[0-9]+]], [sp, #15]
108+
; CHECK: strh [[REG0]], [sp, #12]
109+
; CHECK: ldrsh [[REG1:w[0-9]+]], [sp, #12]
110+
; CHECK: str [[REG1]], [sp, #8]
111+
; CHECK: ldrsw [[REG2:x[0-9]+]], [sp, #8]
112+
; CHECK: str [[REG2]], [sp]
114113
; CHECK: ldr x0, [sp]
115114
; CHECK: ret
116115
%a.addr = alloca i8, align 1
@@ -166,8 +165,8 @@ entry:
166165
define signext i16 @sext_i1_i16(i1 %a) nounwind ssp {
167166
entry:
168167
; CHECK-LABEL: sext_i1_i16
169-
; CHECK: sbfx w8, w0, #0, #1
170-
; CHECK-NEXT: sxth w0, w8
168+
; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
169+
; CHECK: sxth w0, [[REG]]
171170
%conv = sext i1 %a to i16
172171
ret i16 %conv
173172
}
@@ -176,8 +175,8 @@ entry:
176175
define signext i8 @sext_i1_i8(i1 %a) nounwind ssp {
177176
entry:
178177
; CHECK-LABEL: sext_i1_i8
179-
; CHECK: sbfx w8, w0, #0, #1
180-
; CHECK-NEXT: sxtb w0, w8
178+
; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
179+
; CHECK: sxtb w0, [[REG]]
181180
%conv = sext i1 %a to i8
182181
ret i8 %conv
183182
}
@@ -240,8 +239,8 @@ entry:
240239
define float @sitofp_sw_i1(i1 %a) nounwind ssp {
241240
entry:
242241
; CHECK-LABEL: sitofp_sw_i1
243-
; CHECK: sbfx w8, w0, #0, #1
244-
; CHECK: scvtf s0, w8
242+
; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
243+
; CHECK: scvtf s0, [[REG]]
245244
%conv = sitofp i1 %a to float
246245
ret float %conv
247246
}
@@ -250,8 +249,8 @@ entry:
250249
define float @sitofp_sw_i8(i8 %a) nounwind ssp {
251250
entry:
252251
; CHECK-LABEL: sitofp_sw_i8
253-
; CHECK: sxtb w8, w0
254-
; CHECK: scvtf s0, w8
252+
; CHECK: sxtb [[REG:w[0-9]+]], w0
253+
; CHECK: scvtf s0, [[REG]]
255254
%conv = sitofp i8 %a to float
256255
ret float %conv
257256
}
@@ -304,8 +303,8 @@ entry:
304303
define float @uitofp_sw_i1(i1 %a) nounwind ssp {
305304
entry:
306305
; CHECK-LABEL: uitofp_sw_i1
307-
; CHECK: and w8, w0, #0x1
308-
; CHECK: ucvtf s0, w8
306+
; CHECK: and [[REG:w[0-9]+]], w0, #0x1
307+
; CHECK: ucvtf s0, [[REG]]
309308
%conv = uitofp i1 %a to float
310309
ret float %conv
311310
}
@@ -374,7 +373,8 @@ entry:
374373
define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp {
375374
entry:
376375
; CHECK-LABEL: i64_trunc_i16
377-
; CHECK: and [[REG2:w[0-9]+]], w0, #0xffff
376+
; CHECK: mov x[[TMP:[0-9]+]], x0
377+
; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xffff{{$}}
378378
; CHECK: uxth w0, [[REG2]]
379379
%conv = trunc i64 %a to i16
380380
ret i16 %conv
@@ -383,7 +383,8 @@ entry:
383383
define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp {
384384
entry:
385385
; CHECK-LABEL: i64_trunc_i8
386-
; CHECK: and [[REG2:w[0-9]+]], w0, #0xff
386+
; CHECK: mov x[[TMP:[0-9]+]], x0
387+
; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xff{{$}}
387388
; CHECK: uxtb w0, [[REG2]]
388389
%conv = trunc i64 %a to i8
389390
ret i8 %conv
@@ -392,7 +393,8 @@ entry:
392393
define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp {
393394
entry:
394395
; CHECK-LABEL: i64_trunc_i1
395-
; CHECK: and [[REG2:w[0-9]+]], w0, #0x1
396+
; CHECK: mov x[[TMP:[0-9]+]], x0
397+
; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0x1{{$}}
396398
; CHECK: and w0, [[REG2]], #0x1
397399
%conv = trunc i64 %a to i1
398400
ret i1 %conv

llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,10 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun
210210
;
211211
; FAST-LABEL: test_vcvt_high_f32_f64:
212212
; FAST: // %bb.0:
213-
; FAST-NEXT: // implicit-def: $q2
214213
; FAST-NEXT: mov.16b v2, v0
215-
; FAST-NEXT: fcvtn2 v2.4s, v1.2d
214+
; FAST-NEXT: // implicit-def: $q0
216215
; FAST-NEXT: mov.16b v0, v2
216+
; FAST-NEXT: fcvtn2 v0.4s, v1.2d
217217
; FAST-NEXT: ret
218218
;
219219
; GISEL-LABEL: test_vcvt_high_f32_f64:
@@ -249,10 +249,10 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou
249249
;
250250
; FAST-LABEL: test_vcvtx_high_f32_f64:
251251
; FAST: // %bb.0:
252-
; FAST-NEXT: // implicit-def: $q2
253252
; FAST-NEXT: mov.16b v2, v0
254-
; FAST-NEXT: fcvtxn2 v2.4s, v1.2d
253+
; FAST-NEXT: // implicit-def: $q0
255254
; FAST-NEXT: mov.16b v0, v2
255+
; FAST-NEXT: fcvtxn2 v0.4s, v1.2d
256256
; FAST-NEXT: ret
257257
;
258258
; GISEL-LABEL: test_vcvtx_high_f32_f64:
@@ -283,17 +283,12 @@ define i16 @to_half(float %in) {
283283
;
284284
; FAST-LABEL: to_half:
285285
; FAST: // %bb.0:
286-
; FAST-NEXT: sub sp, sp, #16 // =16
287-
; FAST-NEXT: .cfi_def_cfa_offset 16
288-
; FAST-NEXT: fcvt h0, s0
286+
; FAST-NEXT: fcvt h1, s0
289287
; FAST-NEXT: // implicit-def: $w0
290-
; FAST-NEXT: fmov s1, w0
291-
; FAST-NEXT: mov.16b v1, v0
292-
; FAST-NEXT: fmov w8, s1
293-
; FAST-NEXT: mov w0, w8
294-
; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill
295-
; FAST-NEXT: mov w0, w8
296-
; FAST-NEXT: add sp, sp, #16 // =16
288+
; FAST-NEXT: fmov s0, w0
289+
; FAST-NEXT: mov.16b v0, v1
290+
; FAST-NEXT: fmov w0, s0
291+
; FAST-NEXT: // kill: def $w1 killed $w0
297292
; FAST-NEXT: ret
298293
;
299294
; GISEL-LABEL: to_half:

llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ declare [2 x i32] @callee()
1717
define void @test_struct_return(i32* %addr) {
1818
; CHECK-LABEL: test_struct_return:
1919
; CHECK: bl _callee
20-
; CHECK-DAG: lsr [[HI:x[0-9]+]], x0, #32
21-
; CHECK-DAG: str w0
20+
; CHECK: x[[COPYX0:[0-9]+]], x0
21+
; CHECK-DAG: lsr [[HI:x[0-9]+]], x[[COPYX0]], #32
22+
; CHECK-DAG: str w[[COPYX0]]
2223
%res = call [2 x i32] @callee()
2324
%res.0 = extractvalue [2 x i32] %res, 0
2425
store i32 %res.0, i32* %addr

llvm/test/CodeGen/AArch64/arm64_32-null.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ define void @test_store(i8** %p) {
1313
define void @test_phi(i8** %p) {
1414
; CHECK-LABEL: test_phi:
1515
; CHECK: mov [[R1:x[0-9]+]], xzr
16-
; CHECK: str [[R1]], [sp]
16+
; CHECK: str [[R1]], [sp, #8]
1717
; CHECK: b [[BB:LBB[0-9_]+]]
1818
; CHECK: [[BB]]:
19-
; CHECK: ldr x0, [sp]
20-
; CHECK: str w0, [x{{.*}}]
19+
; CHECK: ldr x0, [sp, #8]
20+
; CHECK: mov w8, w0
21+
; CHECK: str w8, [x{{.*}}]
2122

2223
bb0:
2324
br label %bb1

llvm/test/CodeGen/AArch64/br-cond-not-merge.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ bb3:
6464
; OPT: b.gt [[L:\.LBB[0-9_]+]]
6565
; OPT: tbz w1, #0, [[L]]
6666
;
67+
; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]]
6768
; NOOPT: subs w{{[0-9]+}}, w{{[0-9]+}}, #0
6869
; NOOPT: cset [[R1:w[0-9]+]], gt
69-
; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]]
7070
; NOOPT: str [[R1]], [sp, #[[SLOT1:[0-9]+]]]
7171
; NOOPT: b .LBB
7272
; NOOPT: ldr [[R2:w[0-9]+]], [sp, #[[SLOT1]]]

0 commit comments

Comments
 (0)