Skip to content

Commit e385f5c

Browse files
DominikAdamskishiltian
authored andcommitted
[LLVM][MemCpyOpt] Unify alias tags if we optimize allocas (llvm#129537)
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: llvm#133984 --------- Co-authored-by: Shilei Tian <[email protected]> (cherry picked from commit 716b02d)
1 parent e0db588 commit e385f5c

File tree

3 files changed

+94
-12
lines changed

3 files changed

+94
-12
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,7 +1518,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15181518
// to remove them.
15191519

15201520
SmallVector<Instruction *, 4> LifetimeMarkers;
1521-
SmallSet<Instruction *, 4> NoAliasInstrs;
1521+
SmallSet<Instruction *, 4> AAMetadataInstrs;
15221522
bool SrcNotDom = false;
15231523

15241524
// Recursively track the user and check whether modified alias exist.
@@ -1573,8 +1573,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15731573
continue;
15741574
}
15751575
}
1576-
if (UI->hasMetadata(LLVMContext::MD_noalias))
1577-
NoAliasInstrs.insert(UI);
1576+
AAMetadataInstrs.insert(UI);
1577+
15781578
if (!ModRefCallback(UI))
15791579
return false;
15801580
}
@@ -1679,11 +1679,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16791679
}
16801680

16811681
// As this transformation can cause memory accesses that didn't previously
1682-
// alias to begin to alias one another, we remove !noalias metadata from any
1683-
// uses of either alloca. This is conservative, but more precision doesn't
1684-
// seem worthwhile right now.
1685-
for (Instruction *I : NoAliasInstrs)
1682+
// alias to begin to alias one another, we remove !alias.scope, !noalias,
1683+
// !tbaa and !tbaa_struct metadata from any uses of either alloca.
1684+
// This is conservative, but more precision doesn't seem worthwhile
1685+
// right now.
1686+
for (Instruction *I : AAMetadataInstrs) {
1687+
I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
16861688
I->setMetadata(LLVMContext::MD_noalias, nullptr);
1689+
I->setMetadata(LLVMContext::MD_tbaa, nullptr);
1690+
I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
1691+
}
16871692

16881693
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
16891694
NumStackMove++;
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
3+
4+
define void @test() local_unnamed_addr {
5+
; CHECK-LABEL: define void @test() local_unnamed_addr {
6+
; CHECK-NEXT: [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4
7+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
8+
; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4
9+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
10+
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
11+
; CHECK-NEXT: ret void
12+
;
13+
%test_array_a = alloca [31 x float], align 4
14+
%test_array_b = alloca [31 x float], align 4
15+
%1 = getelementptr float, ptr %test_array_b, i64 1
16+
store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !4
17+
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false)
18+
%2 = getelementptr float, ptr %test_array_a, i64 1
19+
%3 = load float, ptr %2, align 4, !tbaa !7
20+
ret void
21+
}
22+
23+
%struct.Outer = type { float, double, %struct.Inner }
24+
%struct.Inner = type { i32, float }
25+
26+
; Function Attrs: nounwind uwtable
27+
define dso_local float @f() {
28+
; CHECK-LABEL: define dso_local float @f() {
29+
; CHECK-NEXT: [[ENTRY:.*:]]
30+
; CHECK-NEXT: [[TEST1:%.*]] = alloca [[STRUCT_OUTER:%.*]], align 8
31+
; CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
32+
; CHECK-NEXT: store float 0.000000e+00, ptr [[F]], align 8
33+
; CHECK-NEXT: [[F1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
34+
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F1]], align 8
35+
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], 2.000000e+00
36+
; CHECK-NEXT: store float [[ADD]], ptr [[F1]], align 8
37+
; CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
38+
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F2]], align 8
39+
; CHECK-NEXT: ret float [[TMP1]]
40+
;
41+
entry:
42+
%test = alloca %struct.Outer, align 8
43+
%test1 = alloca %struct.Outer, align 8
44+
%f = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 0
45+
store float 0.000000e+00, ptr %f, align 8, !tbaa !9
46+
%inner_a = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 2
47+
%i = getelementptr inbounds nuw %struct.Inner, ptr %inner_a, i32 0, i32 0
48+
store i32 0, ptr %i, align 8, !tbaa !17
49+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %test, ptr align 8 %test1, i64 24, i1 false)
50+
%f1 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
51+
%0 = load float, ptr %f1, align 8, !tbaa !9
52+
%add = fadd float %0, 2.000000e+00
53+
store float %add, ptr %f1, align 8, !tbaa !9
54+
%f2 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
55+
%1 = load float, ptr %f2, align 8, !tbaa !9
56+
ret float %1
57+
}
58+
59+
!1 = !{!"any data access", !2, i64 0}
60+
!2 = !{!"any access", !3, i64 0}
61+
!3 = !{!"Flang function root test"}
62+
!4 = !{!5, !5, i64 0}
63+
!5 = !{!"allocated data/test_array_a", !6, i64 0}
64+
!6 = !{!"allocated data", !1, i64 0}
65+
!7 = !{!8, !8, i64 0}
66+
!8 = !{!"allocated data/test_array_b", !6, i64 0}
67+
!9 = !{!10, !11, i64 0}
68+
!10 = !{!"Outer", !11, i64 0, !14, i64 8, !15, i64 16}
69+
!11 = !{!"float", !12, i64 0}
70+
!12 = !{!"omnipotent char", !13, i64 0}
71+
!13 = !{!"Simple C/C++ TBAA"}
72+
!14 = !{!"double", !12, i64 0}
73+
!15 = !{!"Inner", !16, i64 0, !11, i64 4}
74+
!16 = !{!"int", !12, i64 0}
75+
!17 = !{!10, !16, i64 16}
76+
77+

llvm/test/Transforms/MemCpyOpt/stack-move.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ define void @remove_scoped_noalias() {
259259
; CHECK-LABEL: define void @remove_scoped_noalias() {
260260
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
261261
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
262-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
262+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
263263
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
264264
; CHECK-NEXT: ret void
265265
;
@@ -283,7 +283,7 @@ define void @remove_alloca_metadata() {
283283
; CHECK-LABEL: define void @remove_alloca_metadata() {
284284
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
285285
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
286-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
286+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
287287
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
288288
; CHECK-NEXT: ret void
289289
;
@@ -308,7 +308,7 @@ define void @noalias_on_lifetime() {
308308
; CHECK-LABEL: define void @noalias_on_lifetime() {
309309
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
310310
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
311-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
311+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
312312
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
313313
; CHECK-NEXT: ret void
314314
;
@@ -399,10 +399,10 @@ define void @terminator_lastuse() personality i32 0 {
399399
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
400400
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
401401
; CHECK-NEXT: [[RV:%.*]] = invoke i32 @use_nocapture(ptr [[SRC]])
402-
; CHECK-NEXT: to label [[SUC:%.*]] unwind label [[UNW:%.*]]
402+
; CHECK-NEXT: to label [[SUC:%.*]] unwind label [[UNW:%.*]]
403403
; CHECK: unw:
404404
; CHECK-NEXT: [[LP:%.*]] = landingpad i32
405-
; CHECK-NEXT: cleanup
405+
; CHECK-NEXT: cleanup
406406
; CHECK-NEXT: resume i32 0
407407
; CHECK: suc:
408408
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)