-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LV] Fix missing MetaData for histogram instructions #134241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: None (RonDahan101) ChangesFull diff: https://github.com/llvm/llvm-project/pull/134241.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2cdb87fdd3f8d..1f9842f845893 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8642,6 +8642,8 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
HGramOps.push_back(Operands[1]);
// Increment value.
HGramOps.push_back(getVPValueOrAddLiveIn(HI->Update->getOperand(1)));
+ // Store Instruction.
+ HGramOps.push_back(getVPValueOrAddLiveIn(HI->Store));
// In case of predicated execution (due to tail-folding, or conditional
// execution, or both), pass the relevant mask.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fbbc466f2f7f6..62a6ea0d405b9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1466,7 +1466,7 @@ class VPHistogramRecipe : public VPRecipeBase {
/// Return the mask operand if one was provided, or a null pointer if all
/// lanes should be executed unconditionally.
VPValue *getMask() const {
- return getNumOperands() == 3 ? getOperand(2) : nullptr;
+ return getNumOperands() == 4 ? getOperand(3) : nullptr;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f5d5e12b1c85d..8079c27558a3d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1242,9 +1242,11 @@ void VPHistogramRecipe::execute(VPTransformState &State) {
else
assert(Opcode == Instruction::Add && "only add or sub supported for now");
- State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
- {VTy, IncAmt->getType()},
- {Address, IncAmt, Mask});
+ auto *HistogramInst = State.Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_histogram_add, {VTy, IncAmt->getType()},
+ {Address, IncAmt, Mask});
+ State.addMetadata(HistogramInst,
+ cast<Instruction>(getOperand(2)->getUnderlyingValue()));
}
InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
index 3b00312959d8a..0f60c315a582d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
@@ -927,6 +927,72 @@ for.exit:
ret void
}
+define void @simple_histogram_metadata(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
+; CHECK-LABEL: define void @simple_histogram_metadata(
+; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META26:![0-9]+]], !noalias [[META29:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP6]]
+; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP7]], i32 1, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META31:![0-9]+]], !noalias [[META26]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT: [[L_IDX:%.*]] = load i32, ptr [[GEP_INDICES]], align 4, !alias.scope [[META26]], !noalias [[META29]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[L_IDX]] to i64
+; CHECK-NEXT: [[GEP_BUCKET:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT: [[L_BUCKET:%.*]] = load i32, ptr [[GEP_BUCKET]], align 4, !alias.scope [[META31]], !noalias [[META26]]
+; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[L_BUCKET]], 1
+; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_BUCKET]], align 4, !alias.scope [[META31]], !noalias [[META26]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
+; CHECK: for.exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
+ %l.idx = load i32, ptr %gep.indices, align 4, !alias.scope !44, !noalias !47
+ %idxprom1 = zext i32 %l.idx to i64
+ %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
+ %l.bucket = load i32, ptr %gep.bucket, align 4, !alias.scope !52, !noalias !44
+ %inc = add nsw i32 %l.bucket, 1
+ store i32 %inc, ptr %gep.bucket, align 4, !alias.scope !52, !noalias !44
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %N
+ br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
+
+for.exit:
+ ret void
+}
+
attributes #0 = { "target-features"="+sve2" vscale_range(1,16) }
!0 = distinct !{!0, !1}
@@ -935,3 +1001,12 @@ attributes #0 = { "target-features"="+sve2" vscale_range(1,16) }
!3 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.interleave.count", i32 1}
+
+!44 = !{!45}
+!45 = distinct !{!45, !46, !"scope-index-1"}
+!46 = distinct !{!46, !"scopes-indices"}
+!47 = !{!48}
+!48 = distinct !{!48, !46, !"scope-bucket-1"}
+!52 = !{!48, !53}
+!53 = distinct !{!53, !54, !"scope-bucket-2"}
+!54 = distinct !{!54, !"scopes-buckets"}
|
related issue: #132757 |
@@ -8642,6 +8642,8 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI, | |||
HGramOps.push_back(Operands[1]); | |||
// Increment value. | |||
HGramOps.push_back(getVPValueOrAddLiveIn(HI->Update->getOperand(1))); | |||
// Store Instruction. | |||
HGramOps.push_back(getVPValueOrAddLiveIn(HI->Store)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding a live-in for the store as operand, just to get the underlying instruction to retrieve the metadata seems like a bit of a workaround.
I think it would be good to improve the existing metadata handling first and model it explicit in VPlan: #135272
Then it should be easy to preserve the metadata directly without needing any artificial operands
No description provided.