Skip to content

Commit d0f9553

Browse files
committed
[PowerPC] Enable fast-isel on AIX 64 subtarget
This patch basically enables fast-isel for AIX 64-bit subtarget (previously enabled only for ELF 64). The initial motivation is to introduce branch folding to AIX generated code for correct debug behavior. I also saw some compiling time improvement in a few LLVM test-suite benchmarks. (toast, dbms, cjpeg, burg, etc.) Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D98844
1 parent 34badc4 commit d0f9553

11 files changed

+239
-125
lines changed

llvm/lib/Target/PowerPC/PPCFastISel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2467,9 +2467,9 @@ namespace llvm {
24672467
// Create the fast instruction selector for PowerPC64 ELF.
24682468
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
24692469
const TargetLibraryInfo *LibInfo) {
2470-
// Only available on 64-bit ELF for now.
2470+
// Only available on 64-bit for now.
24712471
const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2472-
if (Subtarget.is64BitELFABI())
2472+
if (Subtarget.isPPC64())
24732473
return new PPCFastISel(FuncInfo, LibInfo);
24742474
return nullptr;
24752475
}
Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
1+
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=PPC64
2+
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr7 | FileCheck %s --check-prefix=PPC64
23

34
%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
45
%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
@@ -9,40 +10,40 @@
910

1011
define i32* @t1() nounwind {
1112
entry:
12-
; ELF64: t1
13+
; PPC64: t1
1314
%addr = alloca i32*, align 4
1415
store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]], [2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
15-
; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 124
16+
; PPC64: addi {{[0-9]+}}, {{[0-9]+}}, 124
1617
%0 = load i32*, i32** %addr, align 4
1718
ret i32* %0
1819
}
1920

2021
define i32* @t2() nounwind {
2122
entry:
22-
; ELF64: t2
23+
; PPC64: t2
2324
%addr = alloca i32*, align 4
2425
store i32* getelementptr inbounds ([3 x [3 x %struct.A]], [3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
25-
; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1148
26+
; PPC64: addi {{[0-9]+}}, {{[0-9]+}}, 1148
2627
%0 = load i32*, i32** %addr, align 4
2728
ret i32* %0
2829
}
2930

3031
define i32* @t3() nounwind {
3132
entry:
32-
; ELF64: t3
33+
; PPC64: t3
3334
%addr = alloca i32*, align 4
3435
store i32* getelementptr inbounds ([3 x [3 x %struct.A]], [3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
35-
; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 140
36+
; PPC64: addi {{[0-9]+}}, {{[0-9]+}}, 140
3637
%0 = load i32*, i32** %addr, align 4
3738
ret i32* %0
3839
}
3940

4041
define i32* @t4() nounwind {
4142
entry:
42-
; ELF64: t4
43+
; PPC64: t4
4344
%addr = alloca i32*, align 4
4445
store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]], [2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
45-
; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1284
46+
; PPC64: addi {{[0-9]+}}, {{[0-9]+}}, 1284
4647
%0 = load i32*, i32** %addr, align 4
4748
ret i32* %0
4849
}
Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,44 @@
1-
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
1+
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=PPC64
2+
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr7 | FileCheck %s --check-prefix=PPC64
23

34
; Test add with non-legal types
45

56
define void @add_i8(i8 %a, i8 %b) nounwind {
67
entry:
7-
; ELF64: add_i8
8+
; PPC64: add_i8
89
%a.addr = alloca i8, align 4
910
%0 = add i8 %a, %b
10-
; ELF64: add
11+
; PPC64: add
1112
store i8 %0, i8* %a.addr, align 4
1213
ret void
1314
}
1415

1516
define void @add_i8_imm(i8 %a) nounwind {
1617
entry:
17-
; ELF64: add_i8_imm
18+
; PPC64: add_i8_imm
1819
%a.addr = alloca i8, align 4
1920
%0 = add i8 %a, 22;
20-
; ELF64: addi
21+
; PPC64: addi
2122
store i8 %0, i8* %a.addr, align 4
2223
ret void
2324
}
2425

2526
define void @add_i16(i16 %a, i16 %b) nounwind {
2627
entry:
27-
; ELF64: add_i16
28+
; PPC64: add_i16
2829
%a.addr = alloca i16, align 4
2930
%0 = add i16 %a, %b
30-
; ELF64: add
31+
; PPC64: add
3132
store i16 %0, i16* %a.addr, align 4
3233
ret void
3334
}
3435

3536
define void @add_i16_imm(i16 %a, i16 %b) nounwind {
3637
entry:
37-
; ELF64: add_i16_imm
38+
; PPC64: add_i16_imm
3839
%a.addr = alloca i16, align 4
3940
%0 = add i16 %a, 243;
40-
; ELF64: addi
41+
; PPC64: addi
4142
store i16 %0, i16* %a.addr, align 4
4243
ret void
4344
}
@@ -46,40 +47,40 @@ entry:
4647

4748
define void @or_i8(i8 %a, i8 %b) nounwind {
4849
entry:
49-
; ELF64: or_i8
50+
; PPC64: or_i8
5051
%a.addr = alloca i8, align 4
5152
%0 = or i8 %a, %b
52-
; ELF64: or
53+
; PPC64: or
5354
store i8 %0, i8* %a.addr, align 4
5455
ret void
5556
}
5657

5758
define void @or_i8_imm(i8 %a) nounwind {
5859
entry:
59-
; ELF64: or_i8_imm
60+
; PPC64: or_i8_imm
6061
%a.addr = alloca i8, align 4
6162
%0 = or i8 %a, -13;
62-
; ELF64: ori
63+
; PPC64: ori
6364
store i8 %0, i8* %a.addr, align 4
6465
ret void
6566
}
6667

6768
define void @or_i16(i16 %a, i16 %b) nounwind {
6869
entry:
69-
; ELF64: or_i16
70+
; PPC64: or_i16
7071
%a.addr = alloca i16, align 4
7172
%0 = or i16 %a, %b
72-
; ELF64: or
73+
; PPC64: or
7374
store i16 %0, i16* %a.addr, align 4
7475
ret void
7576
}
7677

7778
define void @or_i16_imm(i16 %a) nounwind {
7879
entry:
79-
; ELF64: or_i16_imm
80+
; PPC64: or_i16_imm
8081
%a.addr = alloca i16, align 4
8182
%0 = or i16 %a, 273;
82-
; ELF64: ori
83+
; PPC64: ori
8384
store i16 %0, i16* %a.addr, align 4
8485
ret void
8586
}
@@ -88,50 +89,50 @@ entry:
8889

8990
define void @sub_i8(i8 %a, i8 %b) nounwind {
9091
entry:
91-
; ELF64: sub_i8
92+
; PPC64: sub_i8
9293
%a.addr = alloca i8, align 4
9394
%0 = sub i8 %a, %b
94-
; ELF64: sub
95+
; PPC64: sub
9596
store i8 %0, i8* %a.addr, align 4
9697
ret void
9798
}
9899

99100
define void @sub_i8_imm(i8 %a) nounwind {
100101
entry:
101-
; ELF64: sub_i8_imm
102+
; PPC64: sub_i8_imm
102103
%a.addr = alloca i8, align 4
103104
%0 = sub i8 %a, 22;
104-
; ELF64: addi
105+
; PPC64: addi
105106
store i8 %0, i8* %a.addr, align 4
106107
ret void
107108
}
108109

109110
define void @sub_i16(i16 %a, i16 %b) nounwind {
110111
entry:
111-
; ELF64: sub_i16
112+
; PPC64: sub_i16
112113
%a.addr = alloca i16, align 4
113114
%0 = sub i16 %a, %b
114-
; ELF64: sub
115+
; PPC64: sub
115116
store i16 %0, i16* %a.addr, align 4
116117
ret void
117118
}
118119

119120
define void @sub_i16_imm(i16 %a) nounwind {
120121
entry:
121-
; ELF64: sub_i16_imm
122+
; PPC64: sub_i16_imm
122123
%a.addr = alloca i16, align 4
123124
%0 = sub i16 %a, 247;
124-
; ELF64: addi
125+
; PPC64: addi
125126
store i16 %0, i16* %a.addr, align 4
126127
ret void
127128
}
128129

129130
define void @sub_i16_badimm(i16 %a) nounwind {
130131
entry:
131-
; ELF64: sub_i16_imm
132+
; PPC64: sub_i16_imm
132133
%a.addr = alloca i16, align 4
133134
%0 = sub i16 %a, -32768;
134-
; ELF64: sub
135+
; PPC64: sub
135136
store i16 %0, i16* %a.addr, align 4
136137
ret void
137138
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=ELF64
3+
; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s -check-prefix=AIX64
4+
5+
@x = global i32 1000, align 4
6+
7+
define signext i32 @bar() #0 {
8+
; ELF64-LABEL: bar:
9+
; ELF64: # %bb.0: # %entry
10+
; ELF64-NEXT: mflr 0
11+
; ELF64-NEXT: std 0, 16(1)
12+
; ELF64-NEXT: stdu 1, -48(1)
13+
; ELF64-NEXT: .cfi_def_cfa_offset 48
14+
; ELF64-NEXT: .cfi_offset lr, 16
15+
; ELF64-NEXT: li 3, 0
16+
; ELF64-NEXT: stw 3, 44(1)
17+
; ELF64-NEXT: li 3, 0
18+
; ELF64-NEXT: stw 3, 40(1)
19+
; ELF64-NEXT: .LBB0_1: # %for.cond
20+
; ELF64-NEXT: #
21+
; ELF64-NEXT: lwz 3, 40(1)
22+
; ELF64-NEXT: addis 4, 2, .LC0@toc@ha
23+
; ELF64-NEXT: ld 4, .LC0@toc@l(4)
24+
; ELF64-NEXT: lwz 4, 0(4)
25+
; ELF64-NEXT: cmpw 3, 4
26+
; ELF64-NEXT: bge 0, .LBB0_4
27+
; ELF64-NEXT: # %bb.2: # %for.body
28+
; ELF64-NEXT: #
29+
; ELF64-NEXT: bl foo
30+
; ELF64-NEXT: nop
31+
; ELF64-NEXT: # %bb.3: # %for.inc
32+
; ELF64-NEXT: #
33+
; ELF64-NEXT: lwz 3, 40(1)
34+
; ELF64-NEXT: addi 3, 3, 1
35+
; ELF64-NEXT: stw 3, 40(1)
36+
; ELF64-NEXT: b .LBB0_1
37+
; ELF64-NEXT: .LBB0_4: # %for.end
38+
; ELF64-NEXT: li 3, 0
39+
; ELF64-NEXT: addi 1, 1, 48
40+
; ELF64-NEXT: ld 0, 16(1)
41+
; ELF64-NEXT: mtlr 0
42+
; ELF64-NEXT: blr
43+
;
44+
; AIX64-LABEL: bar:
45+
; AIX64: # %bb.0: # %entry
46+
; AIX64-NEXT: mflr 0
47+
; AIX64-NEXT: std 0, 16(1)
48+
; AIX64-NEXT: stdu 1, -128(1)
49+
; AIX64-NEXT: li 3, 0
50+
; AIX64-NEXT: stw 3, 124(1)
51+
; AIX64-NEXT: li 3, 0
52+
; AIX64-NEXT: stw 3, 120(1)
53+
; AIX64-NEXT: L..BB0_1: # %for.cond
54+
; AIX64-NEXT: #
55+
; AIX64-NEXT: lwz 3, 120(1)
56+
; AIX64-NEXT: ld 4, L..C0(2)
57+
; AIX64-NEXT: lwz 4, 0(4)
58+
; AIX64-NEXT: cmpw 3, 4
59+
; AIX64-NEXT: bge 0, L..BB0_4
60+
; AIX64-NEXT: # %bb.2: # %for.body
61+
; AIX64-NEXT: #
62+
; AIX64-NEXT: bl .foo[PR]
63+
; AIX64-NEXT: nop
64+
; AIX64-NEXT: # %bb.3: # %for.inc
65+
; AIX64-NEXT: #
66+
; AIX64-NEXT: lwz 3, 120(1)
67+
; AIX64-NEXT: addi 3, 3, 1
68+
; AIX64-NEXT: stw 3, 120(1)
69+
; AIX64-NEXT: b L..BB0_1
70+
; AIX64-NEXT: L..BB0_4: # %for.end
71+
; AIX64-NEXT: li 3, 0
72+
; AIX64-NEXT: addi 1, 1, 128
73+
; AIX64-NEXT: ld 0, 16(1)
74+
; AIX64-NEXT: mtlr 0
75+
; AIX64-NEXT: blr
76+
entry:
77+
%retval = alloca i32, align 4
78+
%i = alloca i32, align 4
79+
store i32 0, i32* %retval, align 4
80+
store i32 0, i32* %i, align 4
81+
br label %for.cond
82+
83+
for.cond:
84+
%0 = load i32, i32* %i, align 4
85+
%1 = load i32, i32* @x, align 4
86+
%cmp = icmp slt i32 %0, %1
87+
br i1 %cmp, label %for.body, label %for.end
88+
89+
for.body:
90+
call void bitcast (void (...)* @foo to void ()*)()
91+
br label %for.inc
92+
93+
for.inc:
94+
%2 = load i32, i32* %i, align 4
95+
%inc = add nsw i32 %2, 1
96+
store i32 %inc, i32* %i, align 4
97+
br label %for.cond
98+
99+
for.end:
100+
ret i32 0
101+
}
102+
103+
declare void @foo(...)
104+
105+
attributes #0 = { optnone noinline }
Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,28 @@
1-
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
1+
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=PPC64
2+
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr7 | FileCheck %s --check-prefix=PPC64
23

34
define zeroext i1 @testi1(i8 %in) nounwind {
45
entry:
56
%c = icmp eq i8 %in, 5
67
br i1 %c, label %true, label %false
78

8-
; ELF64-LABEL: @testi1
9+
; PPC64-LABEL: @testi1
910

1011
true:
1112
br label %end
1213

13-
; ELF64-NOT: li {{[0-9]+}}, -1
14-
; ELF64: li {{[0-9]+}}, 1
14+
; PPC64-NOT: li {{[0-9]+}}, -1
15+
; PPC64: li {{[0-9]+}}, 1
1516

1617
false:
1718
br label %end
1819

19-
; ELF64: li {{[0-9]+}}, 0
20+
; PPC64: li {{[0-9]+}}, 0
2021

2122
end:
2223
%r = phi i1 [ 0, %false], [ 1, %true ]
2324
ret i1 %r
2425

25-
; ELF64: blr
26+
; PPC64: blr
2627
}
2728

llvm/test/CodeGen/PowerPC/fast-isel-crash.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
2+
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr7
23

34
; Ensure this doesn't crash.
45

0 commit comments

Comments
 (0)