-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ValueTracking] Support non-constant idx for computeKnownFPClass
of insertelement
#87708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
… `insertelement` Its same logic as before, we just need to intersect what we know about the new Elt and the entire pre-existing Vec.
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/87708.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5ad4da43bca7db..685861a9116555 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5355,14 +5355,17 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
const Value *Vec = Op->getOperand(0);
const Value *Elt = Op->getOperand(1);
auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2));
- // Early out if the index is non-constant or out-of-range.
unsigned NumElts = DemandedElts.getBitWidth();
- if (!CIdx || CIdx->getValue().uge(NumElts))
- return;
+ APInt DemandedVecElts = DemandedElts;
+ bool NeedsElt = true;
+ // If we know the index we are inserting too, clear it from Vec check.
+ if (CIdx && CIdx->getValue().ult(NumElts)) {
+ DemandedVecElts.clearBit(CIdx->getZExtValue());
+ NeedsElt = DemandedElts[CIdx->getZExtValue()];
+ }
- unsigned EltIdx = CIdx->getZExtValue();
// Do we demand the inserted element?
- if (DemandedElts[EltIdx]) {
+ if (NeedsElt) {
computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q);
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -5371,10 +5374,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
Known.KnownFPClasses = fcNone;
}
- // We don't need the base vector element that has been inserted.
- APInt DemandedVecElts = DemandedElts;
- DemandedVecElts.clearBit(EltIdx);
- if (!!DemandedVecElts) {
+ // Do we need anymore elements from Vec?
+ if (!DemandedVecElts.isZero()) {
KnownFPClass Known2;
computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
Depth + 1, Q);
diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll
index 4df647cf3bb5b2..4370cea3c285cb 100644
--- a/llvm/test/Transforms/Attributor/nofpclass.ll
+++ b/llvm/test/Transforms/Attributor/nofpclass.ll
@@ -1513,6 +1513,25 @@ define <4 x float> @insertelement_constant_chain() {
ret <4 x float> %ins.3
}
+define <4 x float> @insertelement_non_constant_chain(i32 %idx) {
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define nofpclass(nan inf nzero sub) <4 x float> @insertelement_non_constant_chain
+; CHECK-SAME: (i32 [[IDX:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x float> poison, float 1.000000e+00, i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> [[INS_0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float -9.000000e+00, i32 2
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <4 x float> [[INS_2]], float 3.000000e+00, i32 3
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <4 x float> [[INS_2]], float 4.000000e+00, i32 [[IDX]]
+; CHECK-NEXT: ret <4 x float> [[INS_3]]
+;
+ %ins.0 = insertelement <4 x float> poison, float 1.0, i32 0
+ %ins.1 = insertelement <4 x float> %ins.0, float 0.0, i32 1
+ %ins.2 = insertelement <4 x float> %ins.1, float -9.0, i32 2
+ %ins.3 = insertelement <4 x float> %ins.2, float 3.0, i32 3
+ %ins.4 = insertelement <4 x float> %ins.2, float 4.0, i32 %idx
+ ret <4 x float> %ins.4
+}
+
define <vscale x 4 x float> @insertelement_scalable_constant_chain() {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define <vscale x 4 x float> @insertelement_scalable_constant_chain
@@ -1582,7 +1601,7 @@ define float @insertelement_extractelement_unknown(<4 x float> nofpclass(zero) %
define <4 x float> @insertelement_index_oob_chain() {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define <4 x float> @insertelement_index_oob_chain
+; CHECK-LABEL: define nofpclass(nan ninf nzero sub norm) <4 x float> @insertelement_index_oob_chain
; CHECK-SAME: () #[[ATTR3]] {
; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> zeroinitializer, float 0x7FF0000000000000, i32 4
; CHECK-NEXT: ret <4 x float> [[INSERT]]
|
computeKnownFPClass
of insertelement
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe computeKnownBits has the same issue
return; | ||
APInt DemandedVecElts = DemandedElts; | ||
bool NeedsElt = true; | ||
// If we know the index we are inserting too, clear it from Vec check. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// If we know the index we are inserting too, clear it from Vec check. | |
// If we know the index we are inserting to, clear it from Vec check. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will update before pushing.
Have a PR for that too: #87707 |
insertelement
; NFCcomputeKnownFPClass
ofinsertelement