-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLVM][MemCpyOpt] Unify alias tags if we optimize allocas #129537
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LLVM][MemCpyOpt] Unify alias tags if we optimize allocas #129537
Conversation
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can lead to wrong optimization results. This commit unifies alias tags if memcpy optimization replaces two arrays by one array.
@llvm/pr-subscribers-llvm-transforms Author: Dominik Adamski (DominikAdamski) ChangesOptimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: This commit unifies alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: Full diff: https://github.com/llvm/llvm-project/pull/129537.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 43496d1c80df5..10342a6b32725 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1516,6 +1516,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
SmallVector<Instruction *, 4> LifetimeMarkers;
SmallSet<Instruction *, 4> NoAliasInstrs;
bool SrcNotDom = false;
+ SmallSet<Instruction *, 4> SrcAllocaInstUsers;
+ SmallSet<Instruction *, 4> DestAllocaInstUsers;
// Recursively track the user and check whether modified alias exist.
auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
@@ -1524,8 +1526,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
};
auto CaptureTrackingWithModRef =
- [&](Instruction *AI,
- function_ref<bool(Instruction *)> ModRefCallback) -> bool {
+ [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
+ SmallSet<Instruction *, 4> &AllocaInstUsersWithTBAA) -> bool {
SmallVector<Instruction *, 8> Worklist;
Worklist.push_back(AI);
unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
@@ -1569,6 +1571,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
continue;
}
}
+ if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa)) {
+ AllocaInstUsersWithTBAA.insert(UI);
+ }
if (UI->hasMetadata(LLVMContext::MD_noalias))
NoAliasInstrs.insert(UI);
if (!ModRefCallback(UI))
@@ -1621,7 +1626,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
+ if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
+ DestAllocaInstUsers))
return false;
// Bailout if Dest may have any ModRef before Store.
if (!ReachabilityWorklist.empty() &&
@@ -1647,7 +1653,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
+ if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
+ SrcAllocaInstUsers))
return false;
// We can do the transformation. First, move the SrcAlloca to the start of the
@@ -1681,6 +1688,15 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
for (Instruction *I : NoAliasInstrs)
I->setMetadata(LLVMContext::MD_noalias, nullptr);
+ // If we merge two allocas we need to uniform alias tags as well
+ if (!SrcAllocaInstUsers.empty()) {
+ MDNode *mergeTBAA =
+ (*SrcAllocaInstUsers.begin())->getMetadata(LLVMContext::MD_tbaa);
+ for (Instruction *it : DestAllocaInstUsers) {
+ it->setMetadata(LLVMContext::MD_tbaa, mergeTBAA);
+ }
+ }
+
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
NumStackMove++;
return true;
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
new file mode 100644
index 0000000000000..4362892f0e8c2
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
+; The aim of this test is to check if MemCpyOpt pass merges alias tags
+; after memcpy optimization
+
+; ModuleID = 'FIRModule'
+source_filename = "FIRModule"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@data_arr = internal unnamed_addr constant [31 x float] [float 0x3E68DA0CA0000000, float 0x3E692863A0000000, float 0x3E6AEF5000000000, float 0x3E6E2272C0000000, float 0x3E7271B720000000, float 0x3E777DA440000000, float 0x3E7E8C46C0000000, float 0x3E8458EFC0000000, float 0x3E8D0123C0000000, float 0x3E95E78260000000, float 0x3EA0AB7AC0000000, float 0x3EA89F4B40000000, float 0x3EB10FFB60000000, float 0x3EB5F1D140000000, float 0x3EBB435260000000, float 0x3EC0DE9700000000, float 0x3EC51B11A0000000, float 0x3ECA419FC0000000, float 0x3ED01B2B20000000, float 0x3ED3B9CEC0000000, float 0x3ED7028C40000000, float 0x3EDA60C320000000, float 0x3EDD54AD40000000, float 0x3EDF6E9F00000000, float 0x3EE130BB20000000, float 0x3EE4332400000000, float 0x3EE7575F80000000, float 0x3EE8088A60000000, float 0x3EE3B0AE60000000, float 0x3ED9BB6800000000, float 0x3ED9BB6800000000]
+
+; CHECK-LABEL: @test(
+; CHECK: [[ARR_UNDER_TEST:%.*]] = alloca [31 x float], align 4
+; CHECK: store float 0x3E6AA51880000000, ptr [[ARR_UNDER_TEST]], align 4, !tbaa [[ARR_TAG:!.[0-9]+]]
+; CHECK-LABEL: init_loop:
+; CHECK: store float [[TMP0:%.*]], ptr [[TMP1:%.*]], align 4, !tbaa [[ARR_TAG]]
+; CHECK-LABEL: loop:
+; CHECK: [[TMP2:%.*]] = getelementptr float, ptr [[ARR_UNDER_TEST]], i64 [[TMP3:%.*]]
+; CHECK: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[ARR_TAG]]
+define void @test(ptr captures(none) %0, ptr readonly captures(none) %1, ptr readonly captures(none) %2, ptr readonly captures(none) %3) local_unnamed_addr #0 {
+ %5 = alloca [32 x float], align 4
+ %6 = alloca [31 x float], align 4
+ %7 = alloca [31 x float], align 4
+ %8 = load i32, ptr %2, align 4, !tbaa !4
+ %9 = sext i32 %8 to i64
+ %10 = load i32, ptr %3, align 4, !tbaa !10
+ %11 = add i32 %10, 1
+ %12 = sext i32 %11 to i64
+ %13 = sub nsw i64 %12, %9
+ %14 = tail call i64 @llvm.smax.i64(i64 %13, i64 -1)
+ %15 = add nsw i64 %14, 1
+ %16 = alloca float, i64 %15, align 4
+ store float 0x3E6AA51880000000, ptr %7, align 4, !tbaa !12
+ br label %init_loop
+
+init_loop:
+ %19 = phi float [ 0x3E68DA0CA0000000, %4 ], [ %22, %init_loop ]
+ %indvars.iv = phi i64 [ 2, %4 ], [ %indvars.iv.next, %init_loop ]
+ %20 = add nsw i64 %indvars.iv, -1
+ %21 = getelementptr float, ptr @data_arr, i64 %20
+ %22 = load float, ptr %21, align 4, !tbaa !15
+ %23 = fsub contract float %22, %19
+ %33 = getelementptr float, ptr %7, i64 %20
+ store float %23, ptr %33, align 4, !tbaa !12
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 32
+ br i1 %exitcond.not, label %.preheader55.preheader, label %init_loop
+
+.preheader55.preheader:
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %6, ptr noundef nonnull align 4 dereferenceable(124) %7, i64 124, i1 false), !tbaa !22
+ %154 = icmp sgt i64 %13, -1
+ br i1 %154, label %loop, label %._crit_edge56
+
+loop: ; preds = %.preheader, %211
+ %indvars.iv73 = phi i64 [ 0, %.preheader55.preheader ], [ %indvars.iv.next74, %loop ]
+ %indvars.iv.next74 = add nuw nsw i64 %indvars.iv73, 1
+ %223 = getelementptr float, ptr %6, i64 %indvars.iv73
+ %225 = load float, ptr %223, align 4, !tbaa !31
+ %exitcond76.not = icmp eq i64 %indvars.iv.next74, 32
+ br i1 %exitcond76.not, label %loop, label %._crit_edge56
+
+._crit_edge56: ; preds = %loop, %._crit_edge
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.smax.i64(i64, i64) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #2
+
+attributes #0 = { "target-cpu"="x86-64" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{!"flang version 21.0.0 (https://github.com/llvm/llvm-project.git 4d79f420ce5b5100f72f720eab2d3881f97abd0d)"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"dummy arg data/param_1", !6, i64 0}
+!6 = !{!"dummy arg data", !7, i64 0}
+!7 = !{!"any data access", !8, i64 0}
+!8 = !{!"any access", !9, i64 0}
+!9 = !{!"Flang function root test"}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"dummy arg data/param_2", !6, i64 0}
+!12 = !{!13, !13, i64 0}
+!13 = !{!"allocated data/test_array_a", !14, i64 0}
+!14 = !{!"allocated data", !7, i64 0}
+!15 = !{!16, !16, i64 0}
+!16 = !{!"global data/data_arr", !17, i64 0}
+!17 = !{!"global data", !7, i64 0}
+!22 = !{!14, !14, i64 0}
+!31 = !{!32, !32, i64 0}
+!32 = !{!"allocated data/test_array_b", !14, i64 0}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 from a flang perspective. This does solve the issue that led to local tbaa not being enabled by default.
The code changes look good so far as I could tell but it would be best if somebody with more experience with llvm transforms takes a look too.
Thanks for your work on this Dominik
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to reduce the input IR a bit more (and the number of TBAA tags) to make it easier to see what's going on?
Hi, |
@fhahn I have simplified the test case. Could you please review this PR? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, but would like @fhahn to do a final review
source_filename = "FIRModule" | ||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not needed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the review, I applied your suggestion.
Co-authored-by: Shilei Tian <[email protected]>
Co-authored-by: Shilei Tian <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test still doesn't look minimal. Wouldn't it be sufficient to have a store with one tbaa tag, a memcpy and a load with another tag?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed unnecessary instructions.
@@ -1569,6 +1571,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |||
continue; | |||
} | |||
} | |||
if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about MD_tbaa_struct?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added MD_tbaa_struct .
MDNode *mergeTBAA = | ||
(*SrcAllocaInstUsers.begin())->getMetadata(LLVMContext::MD_tbaa); | ||
for (Instruction *It : DestAllocaInstUsers) | ||
It->setMetadata(LLVMContext::MD_tbaa, mergeTBAA); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't look safe? You are just assigning one tag to all the accesses now. There may be accesses with different tags, and replacing them with a different one would be a miscompile.
The easy fix here would be to just drop all the tbaa data like is done for noalias metadata. Otherwise you need to do by-offset analysis of accesses and merge the tbaa tags for specific offsets.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In any case, we should have a test case where multiple tbaa tags are involved at different offsets.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I dropped tbaa metadata and I added another test.
Maybe https://godbolt.org/z/qv1Mqo7eP is an interesting test. It comes from a real miscompilation in julia and has alias scopes, no alias and tbaa |
[&](Instruction *AI, | ||
function_ref<bool(Instruction *)> ModRefCallback) -> bool { | ||
[&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback, | ||
SmallSet<Instruction *, 4> &AllocaInstUsersWithTBAA) -> bool { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need for the argument, the variable is captured.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
@@ -1681,6 +1687,12 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |||
for (Instruction *I : NoAliasInstrs) | |||
I->setMetadata(LLVMContext::MD_noalias, nullptr); | |||
|
|||
// Remove !tbaa and !tbaa_struct from the metadata, since they are invalid. | |||
for (Instruction *I : OptimizedAllocaInstUsers) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we combine NoAliasInstrs and OptimizedAllocaInstUsers? The purpose is basically the same, I don't think we need separate sets for different metadata.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I combined them.
Hi @gbaraldi |
So the original non minimized code is this. I've had some issue fully minimizing it without losing the behaviour https://godbolt.org/z/7jaanfTqM. And on trunk we are removing noalias in both memcpies but not the other aliasing stuff |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks basically fine to me.
I'd recommend rebasing this. The surrounding code has changed significantly, even if Git thinks it merges cleanly.
@@ -1569,6 +1569,10 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |||
continue; | |||
} | |||
} | |||
if (UI->hasMetadata(LLVMContext::MD_tbaa)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd combine these checks with ||
, no need to repeat the insert thrice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, I merged main branch and I simplified collection of instructions.
@@ -1569,6 +1569,10 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |||
continue; | |||
} | |||
} | |||
if (UI->hasMetadata(LLVMContext::MD_tbaa)) | |||
NoAliasInstrs.insert(UI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that this no longer holds only !noalias
, rename to AAMetadataInstrs
or something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Though at this point it would probably make sense to just collect all instructions here (regardless of whether they have metadata) and call setMetadata on all of them, so we don't have to list the MD kinds twice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, I just collect all instructions here
@gbaraldi Thank you for full LLVM IR code. The full LLVM IR code contains two types of memcpy optimization:
The minimized LLVM IR contains only the first optimization, which is not part of this PR. The second optimization for the full LLVM IR file affects memcpy operations in blocks |
// This is conservative, but more precision doesn't seem worthwhile | ||
// right now. | ||
for (Instruction *I : AAMetadataInstrs) { | ||
I->setMetadata(LLVMContext::MD_alias_scope, nullptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I also remove !alias.scope metadata
@nikic May I merge this PR? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: llvm#133984 --------- Co-authored-by: Shilei Tian <[email protected]> (cherry picked from commit 716b02d)
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: llvm#133984 --------- Co-authored-by: Shilei Tian <[email protected]> (cherry picked from commit 716b02d)
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: llvm#133984 --------- Co-authored-by: Shilei Tian <[email protected]>
Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed ( llvm#129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis.
Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed ( llvm#129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis.
Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed ( #129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis.
…ts (#139682) Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed ( llvm/llvm-project#129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis.
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: llvm#133984 --------- Co-authored-by: Shilei Tian <[email protected]> (cherry picked from commit 716b02d)
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: llvm#133984 --------- Co-authored-by: Shilei Tian <[email protected]> (cherry picked from commit 716b02d)
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: llvm#133984 --------- Co-authored-by: Shilei Tian <[email protected]> (cherry picked from commit 716b02d)
Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed ( llvm#129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis. More accurate alias analysis assumes that Cray pointers do not alias with other variables. This assumption is common among other compilers. If the code violates this assumption, it can lead to incorrect results (see: llvm#141928)
Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed (#129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis. More accurate alias analysis assumes that Cray pointers do not alias with other variables. This assumption is common among other compilers. If the code violates this assumption, it can lead to incorrect results (see: #141928)
…ts (#143489) Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed (llvm/llvm-project#129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis. More accurate alias analysis assumes that Cray pointers do not alias with other variables. This assumption is common among other compilers. If the code violates this assumption, it can lead to incorrect results (see: llvm/llvm-project#141928)
…3489) Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed (llvm#129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis. More accurate alias analysis assumes that Cray pointers do not alias with other variables. This assumption is common among other compilers. If the code violates this assumption, it can lead to incorrect results (see: llvm#141928)
…3489) Previously, a bug in the MemCptOpt LLVM IR pass caused issues with adding alias tags for locally allocated objects for Fortran code. However, the bug has now been fixed (llvm#129537 ), and we can safely enable alias tags for these objects. This change should improve the accuracy of the alias analysis. More accurate alias analysis assumes that Cray pointers do not alias with other variables. This assumption is common among other compilers. If the code violates this assumption, it can lead to incorrect results (see: llvm#141928)
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags:
-O3 -mmlir -local-alloc-tbaa -flto
.This commit unifies alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags:
-O3 -mmlir -local-alloc-tbaa -flto
.