Skip to content

Commit 90639e9

Browse files
committed
[SROA] Use !tbaa instead of !tbaa.struct if op matches field.
If a split memory access introduced by SROA accesses precisely a single field of the original operation's !tbaa.struct, use the !tbaa tag for the accessed field directly instead of the full !tbaa.struct. InstCombine already had a similar logic. Motivation for this and follow-on patches is to improve codegen for libc++, where using memcpy limits optimizations, like vectorization for code iteration over std::vector<std::complex<float>>: https://godbolt.org/z/f3vqYos3c Depends on #81285.
1 parent 99cf032 commit 90639e9

File tree

5 files changed

+67
-33
lines changed

5 files changed

+67
-33
lines changed

llvm/include/llvm/IR/Metadata.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,8 @@ struct AAMDNodes {
849849
/// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the
850850
/// field at offset 0, get the TBAA tag describing the accessed field.
851851
AAMDNodes adjustForAccess(unsigned AccessSize);
852+
AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy,
853+
const DataLayout &DL);
852854
};
853855

854856
// Specialize DenseMapInfo for AAMDNodes.

llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,3 +833,16 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
833833
}
834834
return New;
835835
}
836+
837+
AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy,
838+
const DataLayout &DL) {
839+
840+
AAMDNodes New = shift(Offset);
841+
if (!DL.typeSizeEqualsStoreSize(AccessTy))
842+
return New;
843+
TypeSize Size = DL.getTypeStoreSize(AccessTy);
844+
if (Size.isScalable())
845+
return New;
846+
847+
return New.adjustForAccess(Size.getKnownMinValue());
848+
}

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2914,7 +2914,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
29142914

29152915
// Do this after copyMetadataForLoad() to preserve the TBAA shift.
29162916
if (AATags)
2917-
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2917+
NewLI->setAAMetadata(AATags.adjustForAccess(
2918+
NewBeginOffset - BeginOffset, NewLI->getType(), DL));
29182919

29192920
// Try to preserve nonnull metadata
29202921
V = NewLI;
@@ -2936,7 +2937,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
29362937
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
29372938
getSliceAlign(), LI.isVolatile(), LI.getName());
29382939
if (AATags)
2939-
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2940+
NewLI->setAAMetadata(AATags.adjustForAccess(
2941+
NewBeginOffset - BeginOffset, NewLI->getType(), DL));
2942+
29402943
if (LI.isVolatile())
29412944
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
29422945
NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
@@ -3011,7 +3014,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
30113014
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
30123015
LLVMContext::MD_access_group});
30133016
if (AATags)
3014-
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3017+
Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3018+
V->getType(), DL));
30153019
Pass.DeadInsts.push_back(&SI);
30163020

30173021
// NOTE: Careful to use OrigV rather than V.
@@ -3038,7 +3042,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
30383042
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
30393043
LLVMContext::MD_access_group});
30403044
if (AATags)
3041-
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3045+
Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3046+
V->getType(), DL));
30423047

30433048
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
30443049
Store, Store->getPointerOperand(),
@@ -3097,8 +3102,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
30973102
}
30983103
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
30993104
LLVMContext::MD_access_group});
3100-
if (AATags)
3101-
NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3105+
if (AATags) {
3106+
NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3107+
V->getType(), DL));
3108+
}
31023109
if (SI.isVolatile())
31033110
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
31043111
if (NewSI->isAtomic())
@@ -3280,8 +3287,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
32803287
IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
32813288
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
32823289
LLVMContext::MD_access_group});
3283-
if (AATags)
3284-
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3290+
if (AATags) {
3291+
New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3292+
V->getType(), DL));
3293+
}
32853294

32863295
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
32873296
New, New->getPointerOperand(), V, DL);
@@ -3486,7 +3495,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
34863495
Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
34873496
LLVMContext::MD_access_group});
34883497
if (AATags)
3489-
Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3498+
Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3499+
Load->getType(), DL));
34903500
Src = Load;
34913501
}
34923502

@@ -3507,8 +3517,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
35073517
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
35083518
Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
35093519
LLVMContext::MD_access_group});
3510-
if (AATags)
3511-
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3520+
if (AATags) {
3521+
Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3522+
Src->getType(), DL));
3523+
}
35123524

35133525
APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
35143526
if (IsDest) {
@@ -3836,7 +3848,8 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
38363848
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
38373849
if (AATags &&
38383850
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
3839-
Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
3851+
Load->setAAMetadata(
3852+
AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
38403853

38413854
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
38423855
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
@@ -3887,8 +3900,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
38873900
APInt Offset(
38883901
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
38893902
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
3890-
if (AATags)
3891-
Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
3903+
if (AATags) {
3904+
Store->setAAMetadata(AATags.adjustForAccess(
3905+
Offset.getZExtValue(), ExtractValue->getType(), DL));
3906+
}
38923907

38933908
// migrateDebugInfo requires the base Alloca. Walk to it from this gep.
38943909
// If we cannot (because there's an intervening non-const or unbounded
@@ -4542,6 +4557,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
45424557

45434558
Value *StoreBasePtr = SI->getPointerOperand();
45444559
IRB.SetInsertPoint(SI);
4560+
AAMDNodes AATags = SI->getAAMetadata();
45454561

45464562
LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
45474563

@@ -4561,6 +4577,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
45614577
PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
45624578
LLVMContext::MD_access_group,
45634579
LLVMContext::MD_DIAssignID});
4580+
4581+
if (AATags)
4582+
PStore->setAAMetadata(
4583+
AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
45644584
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
45654585
}
45664586

llvm/test/Transforms/SROA/tbaa-struct2.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define double @bar(ptr %wishart) {
1313
; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4
1414
; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
1515
; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8
16-
; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
16+
; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA5:![0-9]+]]
1717
; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12
18-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
18+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
1919
; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]])
2020
; CHECK-NEXT: ret double [[CALL]]
2121
;
@@ -38,15 +38,14 @@ define double @bar(ptr %wishart) {
3838
;.
3939
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
4040
;.
41-
; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, !1, i64 8, i64 4, !5}
42-
; CHECK: [[META1:![0-9]+]] = !{!2, !2, i64 0}
43-
; CHECK: [[META2:![0-9]+]] = !{!"double", !3, i64 0}
44-
; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0}
45-
; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"}
46-
; CHECK: [[META5:![0-9]+]] = !{!6, !6, i64 0}
47-
; CHECK: [[META6:![0-9]+]] = !{!"int", !3, i64 0}
48-
; CHECK: [[TBAA_STRUCT7]] = !{i64 0, i64 4, !5}
49-
; CHECK: [[TBAA_STRUCT8]] = !{}
41+
; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, [[META1:![0-9]+]], i64 8, i64 4, [[TBAA5]]}
42+
; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
43+
; CHECK: [[META2]] = !{!"double", [[META3:![0-9]+]], i64 0}
44+
; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
45+
; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
46+
; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
47+
; CHECK: [[META6]] = !{!"int", [[META3]], i64 0}
48+
; CHECK: [[TBAA_STRUCT7]] = !{}
5049
;.
5150
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
5251
; CHECK-MODIFY-CFG: {{.*}}

llvm/test/Transforms/SROA/tbaa-struct3.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ define void @load_store_transfer_split_struct_tbaa_2_float(ptr dereferenceable(2
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A]] to i32
99
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[B]] to i32
10-
; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4
10+
; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
1111
; CHECK-NEXT: [[RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 4
12-
; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4
12+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[TBAA1:![0-9]+]]
1313
; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[RES]], align 8
1414
; CHECK-NEXT: ret void
1515
;
@@ -29,9 +29,9 @@ define void @memcpy_transfer(ptr dereferenceable(24) %res, float %a, float %b) {
2929
; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) {
3030
; CHECK-NEXT: entry:
3131
; CHECK-NEXT: [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8
32-
; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
32+
; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0]]
3333
; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
34-
; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
34+
; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[TBAA1]]
3535
; CHECK-NEXT: ret void
3636
;
3737
entry:
@@ -53,7 +53,7 @@ define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceabl
5353
; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
5454
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B]] to i32
5555
; CHECK-NEXT: [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
56-
; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5]]
56+
; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
5757
; CHECK-NEXT: ret void
5858
;
5959
entry:
@@ -98,10 +98,10 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
9898
!3 = !{!"omnipotent char", !4, i64 0}
9999
!4 = !{!"Simple C++ TBAA"}
100100
;.
101-
; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[META1:![0-9]+]], i64 4, i64 4, [[META1]]}
102-
; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
101+
; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 4, [[TBAA1]]}
102+
; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
103103
; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0}
104104
; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
105105
; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
106-
; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[META1]]}
106+
; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]}
107107
;.

0 commit comments

Comments
 (0)