Skip to content

Commit 63acf28

Browse files
committed
[SROA] Use !tbaa instead of !tbaa.struct if op matches field. (llvm#81289)
If a split memory access introduced by SROA accesses precisely a single field of the original operation's !tbaa.struct, use the !tbaa tag for the accessed field directly instead of the full !tbaa.struct. InstCombine already had a similar logic. Motivation for this and follow-on patches is to improve codegen for libc++, where using memcpy limits optimizations, like vectorization for code iteration over std::vector<std::complex<float>>: https://godbolt.org/z/f3vqYos3c Depends on llvm#81285. (cherry-picked from 53c0e80)
1 parent d7bf86f commit 63acf28

File tree

5 files changed

+101
-66
lines changed

5 files changed

+101
-66
lines changed

llvm/include/llvm/IR/Metadata.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,9 @@ struct AAMDNodes {
740740
/// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the
741741
/// field at offset 0, get the TBAA tag describing the accessed field.
742742
AAMDNodes adjustForAccess(unsigned AccessSize);
743+
AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy,
744+
const DataLayout &DL);
745+
AAMDNodes adjustForAccess(size_t Offset, unsigned AccessSize);
743746
};
744747

745748
// Specialize DenseMapInfo for AAMDNodes.

llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,3 +834,20 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
834834
}
835835
return New;
836836
}
837+
838+
AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy,
839+
const DataLayout &DL) {
840+
AAMDNodes New = shift(Offset);
841+
if (!DL.typeSizeEqualsStoreSize(AccessTy))
842+
return New;
843+
TypeSize Size = DL.getTypeStoreSize(AccessTy);
844+
if (Size.isScalable())
845+
return New;
846+
847+
return New.adjustForAccess(Size.getKnownMinValue());
848+
}
849+
850+
AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, unsigned AccessSize) {
851+
AAMDNodes New = shift(Offset);
852+
return New.adjustForAccess(AccessSize);
853+
}

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2715,7 +2715,8 @@ class llvm::sroa::AllocaSliceRewriter
27152715

27162716
// Do this after copyMetadataForLoad() to preserve the TBAA shift.
27172717
if (AATags)
2718-
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2718+
NewLI->setAAMetadata(AATags.adjustForAccess(
2719+
NewBeginOffset - BeginOffset, NewLI->getType(), DL));
27192720

27202721
// Try to preserve nonnull metadata
27212722
V = NewLI;
@@ -2736,8 +2737,11 @@ class llvm::sroa::AllocaSliceRewriter
27362737
LoadInst *NewLI =
27372738
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
27382739
getSliceAlign(), LI.isVolatile(), LI.getName());
2740+
27392741
if (AATags)
2740-
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2742+
NewLI->setAAMetadata(AATags.adjustForAccess(
2743+
NewBeginOffset - BeginOffset, NewLI->getType(), DL));
2744+
27412745
if (LI.isVolatile())
27422746
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
27432747
NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
@@ -2807,7 +2811,8 @@ class llvm::sroa::AllocaSliceRewriter
28072811
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
28082812
LLVMContext::MD_access_group});
28092813
if (AATags)
2810-
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2814+
Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
2815+
V->getType(), DL));
28112816
Pass.DeadInsts.push_back(&SI);
28122817

28132818
// NOTE: Careful to use OrigV rather than V.
@@ -2834,7 +2839,8 @@ class llvm::sroa::AllocaSliceRewriter
28342839
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
28352840
LLVMContext::MD_access_group});
28362841
if (AATags)
2837-
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2842+
Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
2843+
V->getType(), DL));
28382844

28392845
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
28402846
Store, Store->getPointerOperand(),
@@ -2910,7 +2916,8 @@ class llvm::sroa::AllocaSliceRewriter
29102916
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
29112917
LLVMContext::MD_access_group});
29122918
if (AATags)
2913-
NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2919+
NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
2920+
V->getType(), DL));
29142921
if (SI.isVolatile())
29152922
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
29162923
if (NewSI->isAtomic())
@@ -3011,12 +3018,14 @@ class llvm::sroa::AllocaSliceRewriter
30113018
// a single value type, just emit a memset.
30123019
if (!CanContinue) {
30133020
Type *SizeTy = II.getLength()->getType();
3014-
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3021+
unsigned Sz = NewEndOffset - NewBeginOffset;
3022+
Constant *Size = ConstantInt::get(SizeTy, Sz);
30153023
MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
30163024
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
30173025
MaybeAlign(getSliceAlign()), II.isVolatile()));
30183026
if (AATags)
3019-
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3027+
New->setAAMetadata(
3028+
AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
30203029

30213030
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
30223031
New, New->getRawDest(), nullptr, DL);
@@ -3092,7 +3101,8 @@ class llvm::sroa::AllocaSliceRewriter
30923101
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
30933102
LLVMContext::MD_access_group});
30943103
if (AATags)
3095-
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3104+
New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3105+
V->getType(), DL));
30963106

30973107
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
30983108
New, New->getPointerOperand(), V, DL);
@@ -3296,7 +3306,8 @@ class llvm::sroa::AllocaSliceRewriter
32963306
Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
32973307
LLVMContext::MD_access_group});
32983308
if (AATags)
3299-
Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3309+
Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3310+
Load->getType(), DL));
33003311
Src = Load;
33013312
}
33023313

@@ -3318,7 +3329,8 @@ class llvm::sroa::AllocaSliceRewriter
33183329
Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
33193330
LLVMContext::MD_access_group});
33203331
if (AATags)
3321-
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3332+
Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3333+
Src->getType(), DL));
33223334

33233335
APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
33243336
if (IsDest) {
@@ -3643,7 +3655,8 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
36433655
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
36443656
if (AATags &&
36453657
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
3646-
Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
3658+
Load->setAAMetadata(
3659+
AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
36473660

36483661
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
36493662
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
@@ -3694,8 +3707,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
36943707
APInt Offset(
36953708
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
36963709
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
3697-
if (AATags)
3698-
Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
3710+
if (AATags) {
3711+
Store->setAAMetadata(AATags.adjustForAccess(
3712+
Offset.getZExtValue(), ExtractValue->getType(), DL));
3713+
}
36993714

37003715
// migrateDebugInfo requires the base Alloca. Walk to it from this gep.
37013716
// If we cannot (because there's an intervening non-const or unbounded
@@ -4317,6 +4332,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
43174332

43184333
Value *StoreBasePtr = SI->getPointerOperand();
43194334
IRB.SetInsertPoint(SI);
4335+
AAMDNodes AATags = SI->getAAMetadata();
43204336

43214337
LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
43224338

@@ -4337,6 +4353,10 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
43374353
PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
43384354
LLVMContext::MD_access_group,
43394355
LLVMContext::MD_DIAssignID});
4356+
4357+
if (AATags)
4358+
PStore->setAAMetadata(
4359+
AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
43404360
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
43414361
}
43424362

llvm/test/Transforms/SROA/tbaa-struct2.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define double @bar(ptr %wishart) {
1313
; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4
1414
; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
1515
; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8
16-
; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
16+
; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA5:![0-9]+]]
1717
; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12
18-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
18+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
1919
; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]])
2020
; CHECK-NEXT: ret double [[CALL]]
2121
;
@@ -38,15 +38,14 @@ define double @bar(ptr %wishart) {
3838
;.
3939
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
4040
;.
41-
; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, !1, i64 8, i64 4, !5}
42-
; CHECK: [[META1:![0-9]+]] = !{!2, !2, i64 0}
43-
; CHECK: [[META2:![0-9]+]] = !{!"double", !3, i64 0}
44-
; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0}
45-
; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"}
46-
; CHECK: [[META5:![0-9]+]] = !{!6, !6, i64 0}
47-
; CHECK: [[META6:![0-9]+]] = !{!"int", !3, i64 0}
48-
; CHECK: [[TBAA_STRUCT7]] = !{i64 0, i64 4, !5}
49-
; CHECK: [[TBAA_STRUCT8]] = !{}
41+
; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, [[META1:![0-9]+]], i64 8, i64 4, [[TBAA5]]}
42+
; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
43+
; CHECK: [[META2]] = !{!"double", [[META3:![0-9]+]], i64 0}
44+
; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
45+
; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
46+
; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
47+
; CHECK: [[META6]] = !{!"int", [[META3]], i64 0}
48+
; CHECK: [[TBAA_STRUCT7]] = !{}
5049
;.
5150
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
5251
; CHECK-MODIFY-CFG: {{.*}}

0 commit comments

Comments
 (0)