Skip to content

Commit 3eb6fef

Browse files
committed
[LoopIdiom] Preserve alias information for memset_pattern
TBAA/NoAlias/AliasScope and other information is currently preserved when upgrading to a memcpy/memset. However, this is missing when upgrading to the macOS memset_pattern function. This adds the same alias information preservation to memset_pattern Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D152934
1 parent e0d5729 commit 3eb6fef

File tree

2 files changed

+136
-11
lines changed

2 files changed

+136
-11
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,20 +1074,24 @@ bool LoopIdiomRecognize::processLoopStridedStore(
10741074
Value *NumBytes =
10751075
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
10761076

1077+
if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16))
1078+
return Changed;
1079+
1080+
AAMDNodes AATags = TheStore->getAAMetadata();
1081+
for (Instruction *Store : Stores)
1082+
AATags = AATags.merge(Store->getAAMetadata());
1083+
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1084+
AATags = AATags.extendTo(CI->getZExtValue());
1085+
else
1086+
AATags = AATags.extendTo(-1);
1087+
10771088
CallInst *NewCall;
10781089
if (SplatValue) {
1079-
AAMDNodes AATags = TheStore->getAAMetadata();
1080-
for (Instruction *Store : Stores)
1081-
AATags = AATags.merge(Store->getAAMetadata());
1082-
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1083-
AATags = AATags.extendTo(CI->getZExtValue());
1084-
else
1085-
AATags = AATags.extendTo(-1);
1086-
10871090
NewCall = Builder.CreateMemSet(
10881091
BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
10891092
/*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1090-
} else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
1093+
} else {
1094+
assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16));
10911095
// Everything is emitted in default address space
10921096
Type *Int8PtrTy = DestInt8PtrTy;
10931097

@@ -1105,8 +1109,17 @@ bool LoopIdiomRecognize::processLoopStridedStore(
11051109
GV->setAlignment(Align(16));
11061110
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
11071111
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
1108-
} else
1109-
return Changed;
1112+
1113+
// Set the TBAA info if present.
1114+
if (AATags.TBAA)
1115+
NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA);
1116+
1117+
if (AATags.Scope)
1118+
NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope);
1119+
1120+
if (AATags.NoAlias)
1121+
NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias);
1122+
}
11101123

11111124
NewCall->setDebugLoc(TheStore->getDebugLoc());
11121125

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes="loop-idiom" < %s -S | FileCheck %s
3+
4+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
5+
6+
target triple = "x86_64-apple-darwin10.0.0"
7+
8+
9+
define dso_local void @double_memset(ptr nocapture %p) {
10+
; CHECK-LABEL: @double_memset(
11+
; CHECK-NEXT: entry:
12+
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 128), !tbaa [[TBAA0:![0-9]+]]
13+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
14+
; CHECK: for.cond.cleanup:
15+
; CHECK-NEXT: ret void
16+
; CHECK: for.body:
17+
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
18+
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
19+
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
20+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
21+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
22+
;
23+
entry:
24+
br label %for.body
25+
26+
for.cond.cleanup:
27+
ret void
28+
29+
for.body:
30+
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
31+
%ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07
32+
store double 3.14159e+00, ptr %ptr1, align 1, !tbaa !5
33+
%inc = add nuw nsw i64 %i.07, 1
34+
%exitcond.not = icmp eq i64 %inc, 16
35+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
36+
}
37+
38+
39+
define dso_local void @struct_memset(ptr nocapture %p) {
40+
; CHECK-LABEL: @struct_memset(
41+
; CHECK-NEXT: entry:
42+
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 128), !tbaa [[TBAA4:![0-9]+]]
43+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
44+
; CHECK: for.cond.cleanup:
45+
; CHECK-NEXT: ret void
46+
; CHECK: for.body:
47+
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
48+
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
49+
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
50+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
51+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
52+
;
53+
entry:
54+
br label %for.body
55+
56+
for.cond.cleanup:
57+
ret void
58+
59+
for.body:
60+
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
61+
%ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07
62+
store double 3.14159e+00, ptr %ptr1, align 1, !tbaa !10
63+
%inc = add nuw nsw i64 %i.07, 1
64+
%exitcond.not = icmp eq i64 %inc, 16
65+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
66+
}
67+
68+
define dso_local void @var_memset(ptr nocapture %p, i64 %len) {
69+
; CHECK-LABEL: @var_memset(
70+
; CHECK-NEXT: entry:
71+
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[LEN:%.*]], 3
72+
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.2, i64 [[TMP0]])
73+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
74+
; CHECK: for.cond.cleanup:
75+
; CHECK-NEXT: ret void
76+
; CHECK: for.body:
77+
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
78+
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
79+
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
80+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[LEN]]
81+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
82+
;
83+
entry:
84+
br label %for.body
85+
86+
for.cond.cleanup:
87+
ret void
88+
89+
for.body:
90+
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
91+
%ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07
92+
store double 3.14159e+00, ptr %ptr1, align 1, !tbaa !10
93+
%inc = add nuw nsw i64 %i.07, 1
94+
%exitcond.not = icmp eq i64 %inc, %len
95+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
96+
}
97+
98+
!5 = !{!6, !6, i64 0}
99+
!6 = !{!"double", !7, i64 0}
100+
!7 = !{!"omnipotent char", !8, i64 0}
101+
!8 = !{!"Simple C++ TBAA"}
102+
103+
!15 = !{!8, i64 0, !"omnipotent char"}
104+
!17 = !{!15, i64 8, !"double"}
105+
!9 = !{!15, i64 32, !"_ZTS1A", !17, i64 0, i64 8, !17, i64 8, i64 8, !17, i64 16, i64 8, !17, i64 24, i64 8}
106+
!10 = !{!9, !17, i64 0, i64 1}
107+
108+
!18 = !{!19, !20, i64 0}
109+
!19 = !{!"A", !20, i64 0, !22, i64 8}
110+
!20 = !{!"any pointer", !7, i64 0}
111+
!21 = !{!22, !20, i64 0}
112+
!22 = !{!"B", !20, i64 0}

0 commit comments

Comments
 (0)