Skip to content

Commit 0d11c10

Browse files
committed
[AArch64][GlobalISel] Fix TLS accesses clobbering registers incorrectly.
This was happening because the BLR didn't have a use of the X0 arg register, which would end up being re-used in high reg pressure situations. The change also avoids hard coding the use of X0 for the sequence except to copy the value for the call. ld64 should still be able to optimize it. rdar://65438258
1 parent 0e4b921 commit 0d11c10

File tree

4 files changed

+217
-7
lines changed

4 files changed

+217
-7
lines changed

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2494,17 +2494,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
24942494
const GlobalValue &GV = *I.getOperand(1).getGlobal();
24952495
MachineIRBuilder MIB(I);
24962496

2497-
MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2498-
.addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2497+
auto LoadGOT =
2498+
MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
2499+
.addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
24992500

25002501
auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2501-
{Register(AArch64::X0)})
2502+
{LoadGOT.getReg(0)})
25022503
.addImm(0);
25032504

2505+
MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
25042506
// TLS calls preserve all registers except those that absolutely must be
25052507
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
25062508
// silly).
25072509
MIB.buildInstr(AArch64::BLR, {}, {Load})
2510+
.addUse(AArch64::X0, RegState::Implicit)
25082511
.addDef(AArch64::X0, RegState::Implicit)
25092512
.addRegMask(TRI.getTLSCallPreservedMask());
25102513

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
; RUN: llc -mtriple aarch64-apple-darwin -O0 -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
2+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
3+
target triple = "arm64-apple-ios13.0.0"
4+
5+
@t_val = thread_local global i32 0, align 4
6+
@.str = private unnamed_addr constant [5 x i8] c"str1\00", align 1
7+
@str1 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), align 8
8+
@.str.1 = private unnamed_addr constant [5 x i8] c"str2\00", align 1
9+
@str2 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i32 0, i32 0), align 8
10+
@.str.2 = private unnamed_addr constant [5 x i8] c"str3\00", align 1
11+
@str3 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2, i32 0, i32 0), align 8
12+
@.str.3 = private unnamed_addr constant [5 x i8] c"str4\00", align 1
13+
@str4 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.3, i32 0, i32 0), align 8
14+
@.str.4 = private unnamed_addr constant [5 x i8] c"str5\00", align 1
15+
@str5 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.4, i32 0, i32 0), align 8
16+
@.str.5 = private unnamed_addr constant [5 x i8] c"str6\00", align 1
17+
@str6 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), align 8
18+
@.str.6 = private unnamed_addr constant [5 x i8] c"str7\00", align 1
19+
@str7 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.6, i32 0, i32 0), align 8
20+
@.str.7 = private unnamed_addr constant [5 x i8] c"str8\00", align 1
21+
@str8 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.7, i32 0, i32 0), align 8
22+
@.str.8 = private unnamed_addr constant [5 x i8] c"str9\00", align 1
23+
@str9 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.8, i32 0, i32 0), align 8
24+
@.str.9 = private unnamed_addr constant [6 x i8] c"str10\00", align 1
25+
@str10 = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.9, i32 0, i32 0), align 8
26+
@.str.10 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
27+
@.str.11 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
28+
@.str.12 = private unnamed_addr constant [4 x i8] c"xyz\00", align 1
29+
30+
31+
; This test checks that we don't re-use the register for the variable descriptor
32+
; for the second ldr.
33+
; CHECK: adrp x2, _t_val@TLVPPAGE
34+
; CHECK: ldr x2, [x2, _t_val@TLVPPAGEOFF]
35+
; CHECK: ldr x3, [x2]
36+
; CHECK: mov x0, x2
37+
; CHECK: blr x3
38+
39+
define void @_Z4funcPKc(i8* %id) {
40+
entry:
41+
%id.addr = alloca i8*, align 8
42+
store i8* %id, i8** %id.addr, align 8
43+
%0 = load i8*, i8** %id.addr, align 8
44+
%1 = load i8*, i8** @str1, align 8
45+
%cmp = icmp eq i8* %0, %1
46+
br i1 %cmp, label %if.then, label %if.else
47+
48+
if.then: ; preds = %entry
49+
%2 = load i8*, i8** @str1, align 8
50+
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %2)
51+
%3 = load i8*, i8** @str2, align 8
52+
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %3)
53+
%4 = load i8*, i8** @str3, align 8
54+
%call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %4)
55+
%5 = load i8*, i8** @str4, align 8
56+
%call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %5)
57+
%6 = load i8*, i8** @str5, align 8
58+
%call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %6)
59+
%7 = load i8*, i8** @str6, align 8
60+
%call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %7)
61+
%8 = load i8*, i8** @str7, align 8
62+
%call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %8)
63+
%9 = load i8*, i8** @str8, align 8
64+
%call7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %9)
65+
%10 = load i8*, i8** @str9, align 8
66+
%call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %10)
67+
%11 = load i8*, i8** @str10, align 8
68+
%call9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %11)
69+
%12 = load i32, i32* @t_val, align 4
70+
%call10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.11, i64 0, i64 0), i32 %12)
71+
br label %if.end56
72+
73+
if.else: ; preds = %entry
74+
%13 = load i8*, i8** %id.addr, align 8
75+
%14 = load i8*, i8** @str2, align 8
76+
%cmp11 = icmp eq i8* %13, %14
77+
br i1 %cmp11, label %if.then12, label %if.else24
78+
79+
if.then12: ; preds = %if.else
80+
%15 = load i8*, i8** @str1, align 8
81+
%call13 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %15)
82+
%16 = load i8*, i8** @str2, align 8
83+
%call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %16)
84+
%17 = load i8*, i8** @str3, align 8
85+
%call15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %17)
86+
%18 = load i8*, i8** @str4, align 8
87+
%call16 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %18)
88+
%19 = load i8*, i8** @str5, align 8
89+
%call17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %19)
90+
%20 = load i8*, i8** @str6, align 8
91+
%call18 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %20)
92+
%21 = load i8*, i8** @str7, align 8
93+
%call19 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %21)
94+
%22 = load i8*, i8** @str8, align 8
95+
%call20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %22)
96+
%23 = load i8*, i8** @str9, align 8
97+
%call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %23)
98+
%24 = load i8*, i8** @str10, align 8
99+
%call22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %24)
100+
%25 = load i32, i32* @t_val, align 4
101+
%call23 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.11, i64 0, i64 0), i32 %25)
102+
br label %if.end55
103+
104+
if.else24: ; preds = %if.else
105+
%26 = load i8*, i8** %id.addr, align 8
106+
%27 = load i8*, i8** @str3, align 8
107+
%cmp25 = icmp eq i8* %26, %27
108+
br i1 %cmp25, label %if.then26, label %if.else27
109+
110+
if.then26: ; preds = %if.else24
111+
br label %if.end54
112+
113+
if.else27: ; preds = %if.else24
114+
%28 = load i8*, i8** %id.addr, align 8
115+
%29 = load i8*, i8** @str4, align 8
116+
%cmp28 = icmp eq i8* %28, %29
117+
br i1 %cmp28, label %if.then29, label %if.else30
118+
119+
if.then29: ; preds = %if.else27
120+
br label %if.end53
121+
122+
if.else30: ; preds = %if.else27
123+
%30 = load i8*, i8** %id.addr, align 8
124+
%31 = load i8*, i8** @str5, align 8
125+
%cmp31 = icmp eq i8* %30, %31
126+
br i1 %cmp31, label %if.then32, label %if.else33
127+
128+
if.then32: ; preds = %if.else30
129+
br label %if.end52
130+
131+
if.else33: ; preds = %if.else30
132+
%32 = load i8*, i8** %id.addr, align 8
133+
%33 = load i8*, i8** @str6, align 8
134+
%cmp34 = icmp eq i8* %32, %33
135+
br i1 %cmp34, label %if.then35, label %if.else36
136+
137+
if.then35: ; preds = %if.else33
138+
br label %if.end51
139+
140+
if.else36: ; preds = %if.else33
141+
%34 = load i8*, i8** %id.addr, align 8
142+
%35 = load i8*, i8** @str7, align 8
143+
%cmp37 = icmp eq i8* %34, %35
144+
br i1 %cmp37, label %if.then38, label %if.else39
145+
146+
if.then38: ; preds = %if.else36
147+
br label %if.end50
148+
149+
if.else39: ; preds = %if.else36
150+
%36 = load i8*, i8** %id.addr, align 8
151+
%37 = load i8*, i8** @str8, align 8
152+
%cmp40 = icmp eq i8* %36, %37
153+
br i1 %cmp40, label %if.then41, label %if.else42
154+
155+
if.then41: ; preds = %if.else39
156+
br label %if.end49
157+
158+
if.else42: ; preds = %if.else39
159+
%38 = load i8*, i8** %id.addr, align 8
160+
%39 = load i8*, i8** @str9, align 8
161+
%cmp43 = icmp eq i8* %38, %39
162+
br i1 %cmp43, label %if.then44, label %if.else45
163+
164+
if.then44: ; preds = %if.else42
165+
br label %if.end48
166+
167+
if.else45: ; preds = %if.else42
168+
%40 = load i8*, i8** %id.addr, align 8
169+
%41 = load i8*, i8** @str10, align 8
170+
%cmp46 = icmp eq i8* %40, %41
171+
br i1 %cmp46, label %if.then47, label %if.end
172+
173+
if.then47: ; preds = %if.else45
174+
br label %if.end
175+
176+
if.end: ; preds = %if.then47, %if.else45
177+
br label %if.end48
178+
179+
if.end48: ; preds = %if.end, %if.then44
180+
br label %if.end49
181+
182+
if.end49: ; preds = %if.end48, %if.then41
183+
br label %if.end50
184+
185+
if.end50: ; preds = %if.end49, %if.then38
186+
br label %if.end51
187+
188+
if.end51: ; preds = %if.end50, %if.then35
189+
br label %if.end52
190+
191+
if.end52: ; preds = %if.end51, %if.then32
192+
br label %if.end53
193+
194+
if.end53: ; preds = %if.end52, %if.then29
195+
br label %if.end54
196+
197+
if.end54: ; preds = %if.end53, %if.then26
198+
br label %if.end55
199+
200+
if.end55: ; preds = %if.end54, %if.then12
201+
br label %if.end56
202+
203+
if.end56: ; preds = %if.end55, %if.then
204+
ret void
205+
}
206+
declare i32 @printf(i8*, ...)
207+

llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
define i8 @get_var() {
1010
; CHECK-LABEL: get_var:
1111
; CHECK: adrp x[[TLVPDESC_SLOT_HI:[0-9]+]], _var@TLVPPAGE
12-
; CHECK: ldr x0, [x[[TLVPDESC_SLOT_HI]], _var@TLVPPAGEOFF]
13-
; CHECK: ldr [[TLV_GET_ADDR:x[0-9]+]], [x0]
12+
; CHECK: ldr x[[PTR:[0-9]+]], [x[[TLVPDESC_SLOT_HI]], _var@TLVPPAGEOFF]
13+
; CHECK: ldr [[TLV_GET_ADDR:x[0-9]+]], [x[[PTR]]]
1414
; CHECK: blr [[TLV_GET_ADDR]]
1515
; CHECK: ldrb w0, [x0]
1616

llvm/test/CodeGen/AArch64/arm64e-ptrauth-tls.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
define i8 @get_var() #0 {
66
; CHECK-LABEL: get_var:
77
; CHECK: adrp x[[TLVPDESC_SLOT_HI:[0-9]+]], _var@TLVPPAGE
8-
; CHECK: ldr x0, [x[[TLVPDESC_SLOT_HI]], _var@TLVPPAGEOFF]
9-
; CHECK: ldr [[TLV_GET_ADDR:x[0-9]+]], [x0]
8+
; CHECK: ldr x[[PTR:[0-9]+]], [x[[TLVPDESC_SLOT_HI]], _var@TLVPPAGEOFF]
9+
; CHECK: ldr [[TLV_GET_ADDR:x[0-9]+]], [x[[PTR]]]
1010
; CHECK: blraaz [[TLV_GET_ADDR]]
1111
; CHECK: ldrb w0, [x0]
1212

0 commit comments

Comments
 (0)