Skip to content

Commit 716b02d

Browse files
[LLVM][MemCpyOpt] Unify alias tags if we optimize allocas (#129537)
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: #133984 --------- Co-authored-by: Shilei Tian <[email protected]>
1 parent 2fe123a commit 716b02d

File tree

3 files changed

+92
-10
lines changed

3 files changed

+92
-10
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,7 +1516,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15161516
// to remove them.
15171517

15181518
SmallVector<Instruction *, 4> LifetimeMarkers;
1519-
SmallSet<Instruction *, 4> NoAliasInstrs;
1519+
SmallSet<Instruction *, 4> AAMetadataInstrs;
15201520
bool SrcNotDom = false;
15211521

15221522
// Recursively track the user and check whether modified alias exist.
@@ -1570,8 +1570,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15701570
continue;
15711571
}
15721572
}
1573-
if (UI->hasMetadata(LLVMContext::MD_noalias))
1574-
NoAliasInstrs.insert(UI);
1573+
AAMetadataInstrs.insert(UI);
1574+
15751575
if (!ModRefCallback(UI))
15761576
return false;
15771577
}
@@ -1680,11 +1680,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16801680
}
16811681

16821682
// As this transformation can cause memory accesses that didn't previously
1683-
// alias to begin to alias one another, we remove !noalias metadata from any
1684-
// uses of either alloca. This is conservative, but more precision doesn't
1685-
// seem worthwhile right now.
1686-
for (Instruction *I : NoAliasInstrs)
1683+
// alias to begin to alias one another, we remove !alias.scope, !noalias,
1684+
// !tbaa and !tbaa_struct metadata from any uses of either alloca.
1685+
// This is conservative, but more precision doesn't seem worthwhile
1686+
// right now.
1687+
for (Instruction *I : AAMetadataInstrs) {
1688+
I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
16871689
I->setMetadata(LLVMContext::MD_noalias, nullptr);
1690+
I->setMetadata(LLVMContext::MD_tbaa, nullptr);
1691+
I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
1692+
}
16881693

16891694
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
16901695
NumStackMove++;
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
3+
4+
define void @test() local_unnamed_addr {
5+
; CHECK-LABEL: define void @test() local_unnamed_addr {
6+
; CHECK-NEXT: [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4
7+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
8+
; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4
9+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
10+
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
11+
; CHECK-NEXT: ret void
12+
;
13+
%test_array_a = alloca [31 x float], align 4
14+
%test_array_b = alloca [31 x float], align 4
15+
%1 = getelementptr float, ptr %test_array_b, i64 1
16+
store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !4
17+
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false)
18+
%2 = getelementptr float, ptr %test_array_a, i64 1
19+
%3 = load float, ptr %2, align 4, !tbaa !7
20+
ret void
21+
}
22+
23+
%struct.Outer = type { float, double, %struct.Inner }
24+
%struct.Inner = type { i32, float }
25+
26+
; Function Attrs: nounwind uwtable
27+
define dso_local float @f() {
28+
; CHECK-LABEL: define dso_local float @f() {
29+
; CHECK-NEXT: [[ENTRY:.*:]]
30+
; CHECK-NEXT: [[TEST1:%.*]] = alloca [[STRUCT_OUTER:%.*]], align 8
31+
; CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
32+
; CHECK-NEXT: store float 0.000000e+00, ptr [[F]], align 8
33+
; CHECK-NEXT: [[F1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
34+
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F1]], align 8
35+
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], 2.000000e+00
36+
; CHECK-NEXT: store float [[ADD]], ptr [[F1]], align 8
37+
; CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
38+
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F2]], align 8
39+
; CHECK-NEXT: ret float [[TMP1]]
40+
;
41+
entry:
42+
%test = alloca %struct.Outer, align 8
43+
%test1 = alloca %struct.Outer, align 8
44+
%f = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 0
45+
store float 0.000000e+00, ptr %f, align 8, !tbaa !9
46+
%inner_a = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 2
47+
%i = getelementptr inbounds nuw %struct.Inner, ptr %inner_a, i32 0, i32 0
48+
store i32 0, ptr %i, align 8, !tbaa !17
49+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %test, ptr align 8 %test1, i64 24, i1 false)
50+
%f1 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
51+
%0 = load float, ptr %f1, align 8, !tbaa !9
52+
%add = fadd float %0, 2.000000e+00
53+
store float %add, ptr %f1, align 8, !tbaa !9
54+
%f2 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
55+
%1 = load float, ptr %f2, align 8, !tbaa !9
56+
ret float %1
57+
}
58+
59+
!1 = !{!"any data access", !2, i64 0}
60+
!2 = !{!"any access", !3, i64 0}
61+
!3 = !{!"Flang function root test"}
62+
!4 = !{!5, !5, i64 0}
63+
!5 = !{!"allocated data/test_array_a", !6, i64 0}
64+
!6 = !{!"allocated data", !1, i64 0}
65+
!7 = !{!8, !8, i64 0}
66+
!8 = !{!"allocated data/test_array_b", !6, i64 0}
67+
!9 = !{!10, !11, i64 0}
68+
!10 = !{!"Outer", !11, i64 0, !14, i64 8, !15, i64 16}
69+
!11 = !{!"float", !12, i64 0}
70+
!12 = !{!"omnipotent char", !13, i64 0}
71+
!13 = !{!"Simple C/C++ TBAA"}
72+
!14 = !{!"double", !12, i64 0}
73+
!15 = !{!"Inner", !16, i64 0, !11, i64 4}
74+
!16 = !{!"int", !12, i64 0}
75+
!17 = !{!10, !16, i64 16}
76+
77+

llvm/test/Transforms/MemCpyOpt/stack-move.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,8 @@ define void @remove_scoped_noalias() {
259259
; CHECK-LABEL: define void @remove_scoped_noalias() {
260260
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
261261
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
262-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]]), !alias.scope [[META0:![0-9]+]]
263262
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
263+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
264264
; CHECK-NEXT: ret void
265265
;
266266
%src = alloca %struct.Foo, align 4
@@ -283,8 +283,8 @@ define void @remove_alloca_metadata() {
283283
; CHECK-LABEL: define void @remove_alloca_metadata() {
284284
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
285285
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
286-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]]), !alias.scope [[META0]]
287286
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
287+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
288288
; CHECK-NEXT: ret void
289289
;
290290
%src = alloca %struct.Foo, align 4, !annotation !3
@@ -308,8 +308,8 @@ define void @noalias_on_lifetime() {
308308
; CHECK-LABEL: define void @noalias_on_lifetime() {
309309
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
310310
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
311-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]]), !alias.scope [[META0]]
312311
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
312+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
313313
; CHECK-NEXT: ret void
314314
;
315315
%src = alloca %struct.Foo, align 4

0 commit comments

Comments
 (0)