Skip to content

Commit d51be84

Browse files
alex-tsearlmc1
authored andcommitted
AlignmentFromAssumptions should only track pointer operand users (llvm#73370)
AlignmentFromAssumptions uses SCEV to update the load/store alignment. It tracks down the use-def chains for the pointer which it takes from the assumption cache until it reaches the load or store instruction. It mistakenly adds to the worklist the users of the load result irrespective of the fact that the load result has no connection with the original pointer, moreover, it is not a pointer at all in most cases. Thus the def-use chain contains irrelevant load users. When it is a store instruction the algorithm attempts to adjust its alignment to the alignment of the original pointer. The problem appears when the load and store memory operand pointers belong to different address spaces and possibly have different sizes. The 4bf015c was an attempt to address a similar problem. The truncation or zero extension was added to make pointers the same size. That looks strange to me because the zero extension of the pointer is not legal. The test in the 4bf015c does not work any longer as for the explicit address spaces conversion the addrspacecast is generated. Summarize: 1. For the alloca to global address spaces conversion addrspacecasts are used, so the code added by the 4bf015c is no longer functional. 2. The AlignmentFromAssumptions algorithm should not add the load users to the worklist as they have nothing to do with the original pointer. 3. Instead we only track users that are: GetelementPtrIns, PHINodes. Change-Id: Ie15b831946ea34832ded545f096a583f4336180c
1 parent a8f8617 commit d51be84

File tree

2 files changed

+216
-10
lines changed

2 files changed

+216
-10
lines changed

llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,7 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
8383
const SCEV *OffSCEV, Value *Ptr,
8484
ScalarEvolution *SE) {
8585
const SCEV *PtrSCEV = SE->getSCEV(Ptr);
86-
// On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
87-
// (*cough* AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
88-
// may disagree. Trunc/extend so they agree.
89-
PtrSCEV = SE->getTruncateOrZeroExtend(
90-
PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
86+
9187
const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
9288
if (isa<SCEVCouldNotCompute>(DiffSCEV))
9389
return Align(1);
@@ -264,11 +260,17 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
264260
// Now that we've updated that use of the pointer, look for other uses of
265261
// the pointer to update.
266262
Visited.insert(J);
267-
for (User *UJ : J->users()) {
268-
Instruction *K = cast<Instruction>(UJ);
269-
if (!Visited.count(K))
270-
WorkList.push_back(K);
271-
}
263+
if (isa<GetElementPtrInst>(J) || isa<PHINode>(J))
264+
for (auto &U : J->uses()) {
265+
if (U->getType()->isPointerTy()) {
266+
Instruction *K = cast<Instruction>(U.getUser());
267+
StoreInst *SI = dyn_cast<StoreInst>(K);
268+
if (SI && SI->getPointerOperandIndex() != U.getOperandNo())
269+
continue;
270+
if (!Visited.count(K))
271+
WorkList.push_back(K);
272+
}
273+
}
272274
}
273275

274276
return true;
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
3+
4+
define void @widget(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
5+
; CHECK-LABEL: define void @widget(
6+
; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(3) nocapture [[ARG1:%.*]]) {
7+
; CHECK-NEXT: bb:
8+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1
9+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[GETELEMENTPTR]], i64 4) ]
10+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR]], align 4
11+
; CHECK-NEXT: [[GETELEMENTPTR2:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG1]], i32 1
12+
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR2]], align 4
13+
; CHECK-NEXT: ret void
14+
;
15+
bb:
16+
%getelementptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
17+
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %getelementptr, i64 4) ]
18+
%load = load i32, ptr addrspace(1) %getelementptr, align 2
19+
%getelementptr2 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
20+
store i32 %load, ptr addrspace(3) %getelementptr2, align 4
21+
ret void
22+
}
23+
24+
define void @wibble(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
25+
; CHECK-LABEL: define void @wibble(
26+
; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
27+
; CHECK-NEXT: bb:
28+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
29+
; CHECK-NEXT: br i1 [[ICMP]], label [[BB4:%.*]], label [[BB5:%.*]]
30+
; CHECK: bb4:
31+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 6
32+
; CHECK-NEXT: br label [[BB7:%.*]]
33+
; CHECK: bb5:
34+
; CHECK-NEXT: [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 7
35+
; CHECK-NEXT: br label [[BB7]]
36+
; CHECK: bb7:
37+
; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], [[BB4]] ], [ [[GETELEMENTPTR6]], [[BB5]] ]
38+
; CHECK-NEXT: [[GETELEMENTPTR8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PHI]], i64 4
39+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
40+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR8]], align 2
41+
; CHECK-NEXT: [[GETELEMENTPTR9:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
42+
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR9]], align 4
43+
; CHECK-NEXT: ret void
44+
;
45+
bb:
46+
%icmp = icmp ugt i32 %arg2, 10
47+
br i1 %icmp, label %bb4, label %bb5
48+
49+
bb4: ; preds = %bb
50+
%getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 6
51+
br label %bb7
52+
53+
bb5: ; preds = %bb
54+
%getelementptr6 = getelementptr i32, ptr addrspace(1) %arg, i32 7
55+
br label %bb7
56+
57+
bb7: ; preds = %bb5, %bb4
58+
%phi = phi ptr addrspace(1) [ %getelementptr, %bb4 ], [ %getelementptr6, %bb5 ]
59+
%getelementptr8 = getelementptr inbounds i32, ptr addrspace(1) %phi, i64 4
60+
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
61+
%load = load i32, ptr addrspace(1) %getelementptr8, align 2
62+
%getelementptr9 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
63+
store i32 %load, ptr addrspace(3) %getelementptr9, align 4
64+
ret void
65+
}
66+
67+
define void @ham(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
68+
; CHECK-LABEL: define void @ham(
69+
; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
70+
; CHECK-NEXT: bb:
71+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 0
72+
; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 10
73+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
74+
; CHECK-NEXT: br i1 [[ICMP]], label [[BB5:%.*]], label [[BB10:%.*]]
75+
; CHECK: bb5:
76+
; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], [[BB:%.*]] ], [ [[GETELEMENTPTR8:%.*]], [[BB5]] ]
77+
; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[ADD:%.*]], [[BB5]] ]
78+
; CHECK-NEXT: [[GETELEMENTPTR7:%.*]] = getelementptr i32, ptr addrspace(1) [[PHI]], i32 4
79+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
80+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR7]], align 4
81+
; CHECK-NEXT: [[ADD]] = add i32 [[PHI6]], [[LOAD]]
82+
; CHECK-NEXT: [[GETELEMENTPTR8]] = getelementptr i32, ptr addrspace(1) [[PHI]], i32 [[ARG2]]
83+
; CHECK-NEXT: [[ICMP9:%.*]] = icmp eq ptr addrspace(1) [[GETELEMENTPTR8]], [[GETELEMENTPTR4]]
84+
; CHECK-NEXT: br i1 [[ICMP9]], label [[BB5]], label [[BB10]]
85+
; CHECK: bb10:
86+
; CHECK-NEXT: [[PHI11:%.*]] = phi i32 [ 0, [[BB]] ], [ [[ADD]], [[BB5]] ]
87+
; CHECK-NEXT: [[GETELEMENTPTR12:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
88+
; CHECK-NEXT: store i32 [[PHI11]], ptr addrspace(3) [[GETELEMENTPTR12]], align 4
89+
; CHECK-NEXT: ret void
90+
;
91+
bb:
92+
%getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
93+
%getelementptr4 = getelementptr i32, ptr addrspace(1) %arg, i32 10
94+
%icmp = icmp ugt i32 %arg2, 10
95+
br i1 %icmp, label %bb5, label %bb10
96+
97+
bb5: ; preds = %bb5, %bb
98+
%phi = phi ptr addrspace(1) [ %getelementptr, %bb ], [ %getelementptr8, %bb5 ]
99+
%phi6 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
100+
%getelementptr7 = getelementptr i32, ptr addrspace(1) %phi, i32 4
101+
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
102+
%load = load i32, ptr addrspace(1) %getelementptr7, align 2
103+
%add = add i32 %phi6, %load
104+
%getelementptr8 = getelementptr i32, ptr addrspace(1) %phi, i32 %arg2
105+
%icmp9 = icmp eq ptr addrspace(1) %getelementptr8, %getelementptr4
106+
br i1 %icmp9, label %bb5, label %bb10
107+
108+
bb10: ; preds = %bb5, %bb
109+
%phi11 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
110+
%getelementptr12 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
111+
store i32 %phi11, ptr addrspace(3) %getelementptr12, align 4
112+
ret void
113+
}
114+
115+
define void @quux(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
116+
; CHECK-LABEL: define void @quux(
117+
; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
118+
; CHECK-NEXT: bb:
119+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
120+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 6
121+
; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 7
122+
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], ptr addrspace(1) [[GETELEMENTPTR]], ptr addrspace(1) [[GETELEMENTPTR4]]
123+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
124+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SELECT]], align 2
125+
; CHECK-NEXT: [[GETELEMENTPTR5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
126+
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR5]], align 4
127+
; CHECK-NEXT: ret void
128+
;
129+
bb:
130+
%icmp = icmp ugt i32 %arg2, 10
131+
%getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 6
132+
%getelementptr4 = getelementptr i32, ptr addrspace(1) %arg, i32 7
133+
%select = select i1 %icmp, ptr addrspace(1) %getelementptr, ptr addrspace(1) %getelementptr4
134+
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
135+
%load = load i32, ptr addrspace(1) %select, align 2
136+
%getelementptr5 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
137+
store i32 %load, ptr addrspace(3) %getelementptr5, align 4
138+
ret void
139+
}
140+
141+
define void @widget.1(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
142+
; CHECK-LABEL: define void @widget.1(
143+
; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
144+
; CHECK-NEXT: bb:
145+
; CHECK-NEXT: [[ADDRSPACECAST:%.*]] = addrspacecast ptr addrspace(3) [[ARG3]] to ptr addrspace(1)
146+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ADDRSPACECAST]]
147+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) [[ARG3]], i64 4) ]
148+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR]], align 2
149+
; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
150+
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR4]], align 4
151+
; CHECK-NEXT: ret void
152+
;
153+
bb:
154+
%addrspacecast = addrspacecast ptr addrspace(3) %arg3 to ptr addrspace(1)
155+
%getelementptr = getelementptr i32, ptr addrspace(1) %addrspacecast
156+
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg3, i64 4) ]
157+
%load = load i32, ptr addrspace(1) %getelementptr, align 2
158+
%getelementptr4 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
159+
store i32 %load, ptr addrspace(3) %getelementptr4, align 2
160+
ret void
161+
}
162+
163+
define void @baz(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
164+
; CHECK-LABEL: define void @baz(
165+
; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(3) nocapture [[ARG1:%.*]]) {
166+
; CHECK-NEXT: bb:
167+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[ARG]], i64 16
168+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
169+
; CHECK-NEXT: [[LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GETELEMENTPTR]], align 4
170+
; CHECK-NEXT: [[GETELEMENTPTR2:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG1]], i32 1
171+
; CHECK-NEXT: store ptr addrspace(1) [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR2]], align 2
172+
; CHECK-NEXT: ret void
173+
;
174+
bb:
175+
%getelementptr = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
176+
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
177+
%load = load ptr addrspace(1), ptr addrspace(1) %getelementptr, align 2
178+
%getelementptr2 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
179+
store ptr addrspace(1) %load, ptr addrspace(3) %getelementptr2, align 2
180+
ret void
181+
}
182+
183+
define void @foo(ptr addrspace(1) nocapture readonly %arg, i32 %arg1) {
184+
; CHECK-LABEL: define void @foo(
185+
; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG1:%.*]]) {
186+
; CHECK-NEXT: bb:
187+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(3), ptr addrspace(1) [[ARG]], i64 16
188+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
189+
; CHECK-NEXT: [[LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[GETELEMENTPTR]], align 4
190+
; CHECK-NEXT: store i32 [[ARG1]], ptr addrspace(3) [[LOAD]], align 2
191+
; CHECK-NEXT: ret void
192+
;
193+
bb:
194+
%getelementptr = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
195+
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
196+
%load = load ptr addrspace(3), ptr addrspace(1) %getelementptr, align 2
197+
store i32 %arg1, ptr addrspace(3) %load, align 2
198+
ret void
199+
}
200+
201+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
202+
declare void @llvm.assume(i1 noundef) #0
203+
204+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }

0 commit comments

Comments
 (0)