-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[BasicAA] Treat different VScale intrinsics as the same value. #81152
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis Author: David Green (davemgreen) ChangesThe last patch of this pr builds upon #81144, The IR may contain multiple llvm.vscale intrinsics that have not been CSEd. Full diff: https://github.com/llvm/llvm-project/pull/81152.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index ae31814bb06735..790af79c5b6600 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -188,6 +188,12 @@ static bool isObjectSize(const Value *V, TypeSize Size, const DataLayout &DL,
return ObjectSize && *ObjectSize == Size;
}
+/// Return true if both V1 and V2 are VScale
+static bool areBothVScale(const Value *V1, const Value *V2) {
+ return PatternMatch::match(V1, PatternMatch::m_VScale()) &&
+ PatternMatch::match(V2, PatternMatch::m_VScale());
+}
+
//===----------------------------------------------------------------------===//
// CaptureInfo implementations
//===----------------------------------------------------------------------===//
@@ -679,7 +685,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
// A[x][x] -> x*16 + x*4 -> x*20
// This also ensures that 'x' only appears in the index list once.
for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
- if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
+ if ((Decomposed.VarIndices[i].Val.V == LE.Val.V ||
+ areBothVScale(Decomposed.VarIndices[i].Val.V, LE.Val.V)) &&
Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
Scale += Decomposed.VarIndices[i].Scale;
LE.IsNSW = false; // We cannot guarantee nsw for the merge.
@@ -1173,7 +1180,7 @@ AliasResult BasicAAResult::aliasGEP(
// VScale Alias Analysis - Given one scalable offset between accesses and a
// scalable typesize, we can divide each side by vscale, treating both values
// as a constant. We prove that Offset/vscale >= TypeSize/vscale.
- if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsNSW &&
+ if (DecompGEP1.VarIndices.size() == 1 &&
DecompGEP1.VarIndices[0].Val.TruncBits == 0 &&
DecompGEP1.Offset.isZero() &&
PatternMatch::match(DecompGEP1.VarIndices[0].Val.V,
@@ -1183,12 +1190,22 @@ AliasResult BasicAAResult::aliasGEP(
ScalableVar.IsNegated ? -ScalableVar.Scale : ScalableVar.Scale;
LocationSize VLeftSize = Scale.isNegative() ? V1Size : V2Size;
- // Note that we do not check that the typesize is scalable, as vscale >= 1
- // so noalias still holds so long as the dependency distance is at least as
- // big as the typesize.
- if (VLeftSize.hasValue() &&
- Scale.uge(VLeftSize.getValue().getKnownMinValue()))
- return AliasResult::NoAlias;
+ // Check if the offset is known to not overflow, if it does then attempt to
+ // prove it with the known values of vscale_range.
+ bool Overflows = !DecompGEP1.VarIndices[0].IsNSW;
+ if (Overflows) {
+ ConstantRange CR = getVScaleRange(&F, Scale.getBitWidth());
+ (void)CR.getSignedMax().smul_ov(Scale, Overflows);
+ }
+
+ if (!Overflows) {
+ // Note that we do not check that the typesize is scalable, as vscale >= 1
+ // so noalias still holds so long as the dependency distance is at least
+ // as big as the typesize.
+ if (VLeftSize.hasValue() &&
+ Scale.uge(VLeftSize.getValue().getKnownMinValue()))
+ return AliasResult::NoAlias;
+ }
}
// Bail on analysing scalable LocationSize
@@ -1782,7 +1799,8 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
bool Found = false;
for (auto I : enumerate(DestGEP.VarIndices)) {
VariableGEPIndex &Dest = I.value();
- if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) ||
+ if ((!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) &&
+ !areBothVScale(Dest.Val.V, Src.Val.V)) ||
!Dest.Val.hasSameCastsAs(Src.Val))
continue;
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index ce0c6f145d1c88..a8de4ae84323dd 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -458,11 +458,29 @@ define void @vscale_v1v2types(ptr %p) {
ret void
}
+; CHECK-LABEL: onevscale
+; CHECK-DAG: MustAlias: <vscale x 4 x i32>* %vp161, <vscale x 4 x i32>* %vp162
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %vp161, <vscale x 4 x i32>* %vp161b
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %vp161b, <vscale x 4 x i32>* %vp162
+define void @onevscale(ptr %p) vscale_range(1,16) {
+ %v1 = call i64 @llvm.vscale.i64()
+ %vp1 = mul nsw i64 %v1, 16
+ %vp2 = mul nsw i64 %v1, 16
+ %vp3 = mul nsw i64 %v1, 17
+ %vp161 = getelementptr i8, ptr %p, i64 %vp1
+ %vp162 = getelementptr i8, ptr %p, i64 %vp2
+ %vp161b = getelementptr i8, ptr %vp161, i64 %vp3
+ load <vscale x 4 x i32>, ptr %vp161
+ load <vscale x 4 x i32>, ptr %vp162
+ load <vscale x 4 x i32>, ptr %vp161b
+ ret void
+}
+
; CHECK-LABEL: twovscales
-; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %vp161, <vscale x 4 x i32>* %vp162
-; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %vp161, <vscale x 4 x i32>* %vp161b
-; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %vp161b, <vscale x 4 x i32>* %vp162
-define void @twovscales(ptr %p) {
+; CHECK-DAG: MustAlias: <vscale x 4 x i32>* %vp161, <vscale x 4 x i32>* %vp162
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %vp161, <vscale x 4 x i32>* %vp161b
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %vp161b, <vscale x 4 x i32>* %vp162
+define void @twovscales(ptr %p) vscale_range(1,16) {
%v1 = call i64 @llvm.vscale.i64()
%v2 = call i64 @llvm.vscale.i64()
%vp1 = mul nsw i64 %v1, 16
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Last commit LGTM.
The IR may contain multiple llvm.vscale intrinsics that have not been CSEd. This patch ensures that multiple vscales can be treated the same, either in the decomposition of geps and when we subtrack one decomposition from another.
f4a5741
to
b05d70e
Compare
Thanks. |
The last patch of this pr builds upon #81144,
The IR may contain multiple llvm.vscale intrinsics that have not been CSEd.
This patch ensures that multiple vscales can be treated the same, either in the
decomposition of geps and when we subtract one decomposition from another.