[VectorCombine] Fold binary op of reductions. #121567

mgudim · 2025-01-03T12:54:26Z

Replace binary of of two reductions with one reduction of the binary op applied to vectors. For example:

%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
%res = add i32 %v0_red, %v1_red

gets transformed to:

%1 = add <16 x i32> %v0, %v1
%res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %1)

llvmbot · 2025-01-03T12:54:57Z

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Mikhail Gudim (mgudim)

Changes

Replace binary of of two reductions with one reduction of the binary op applied to vectors. For example:

%v0_red = tail call i32 @<!-- -->llvm.vector.reduce.add.v16i32(&lt;16 x i32&gt; %v0)
%v1_red = tail call i32 @<!-- -->llvm.vector.reduce.add.v16i32(&lt;16 x i32&gt; %v1)
%res = add i32 %v0_red, %v1_red

gets transformed to:

%1 = add &lt;16 x i32&gt; %v0, %v1
%res = call i32 @<!-- -->llvm.vector.reduce.add.v16i32(&lt;16 x i32&gt; %1)

Full diff: https://github.com/llvm/llvm-project/pull/121567.diff

5 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+5-13)
(modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (+9)
(modified) llvm/lib/Transforms/InstCombine/InstCombineInternal.h (+1)
(modified) llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (+3)
(modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+52)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7a184a19d7c54a..42e816d527fcff 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1516,6 +1516,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *X = foldBinopOfReductions(I))
+    return replaceInstUsesWith(I, X);
+
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
@@ -2376,19 +2379,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
     }
   }
 
-  auto m_AddRdx = [](Value *&Vec) {
-    return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec)));
-  };
-  Value *V0, *V1;
-  if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) &&
-      V0->getType() == V1->getType()) {
-    // Difference of sums is sum of differences:
-    // add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1)
-    Value *Sub = Builder.CreateSub(V0, V1);
-    Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_add,
-                                         {Sub->getType()}, {Sub});
-    return replaceInstUsesWith(I, Rdx);
-  }
+  if (Instruction *X = foldBinopOfReductions(I))
+    return replaceInstUsesWith(I, X);
 
   if (Constant *C = dyn_cast<Constant>(Op0)) {
     Value *X;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index e576eea4ca36a1..d9fcaf124d459b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2388,6 +2388,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *X = foldBinopOfReductions(I))
+    return replaceInstUsesWith(I, X);
+
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
@@ -3588,6 +3591,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *X = foldBinopOfReductions(I))
+    return replaceInstUsesWith(I, X);
+
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
@@ -4713,6 +4719,9 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *X = foldBinopOfReductions(I))
+    return replaceInstUsesWith(I, X);
+
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 3a074ee70dc487..99301d3e991f55 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -594,6 +594,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
 
   /// Canonicalize the position of binops relative to shufflevector.
   Instruction *foldVectorBinop(BinaryOperator &Inst);
+  Instruction *foldBinopOfReductions(BinaryOperator &Inst);
   Instruction *foldVectorSelect(SelectInst &Sel);
   Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf);
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index f85a3c93651353..98023c5eb89e42 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -205,6 +205,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *X = foldBinopOfReductions(I))
+    return replaceInstUsesWith(I, X);
+
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 934156f04f7fdd..12c53e8a0869f7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2296,6 +2296,58 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   return nullptr;
 }
 
+static Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc) {
+  switch (Opc) {
+  default:
+    break;
+  case Instruction::Add:
+    return Intrinsic::vector_reduce_add;
+  case Instruction::Mul:
+    return Intrinsic::vector_reduce_mul;
+  case Instruction::And:
+    return Intrinsic::vector_reduce_and;
+  case Instruction::Or:
+    return Intrinsic::vector_reduce_or;
+  case Instruction::Xor:
+    return Intrinsic::vector_reduce_xor;
+  }
+  return Intrinsic::num_intrinsics;
+}
+
+Instruction *InstCombinerImpl::foldBinopOfReductions(BinaryOperator &Inst) {
+  IntrinsicInst *II0 = dyn_cast<IntrinsicInst>(Inst.getOperand(0));
+  if (!II0)
+    return nullptr;
+  IntrinsicInst *II1 = dyn_cast<IntrinsicInst>(Inst.getOperand(1));
+  if (!II1)
+    return nullptr;
+
+  Instruction::BinaryOps BinOpOpc = Inst.getOpcode();
+  Intrinsic::ID ReductionIID = getReductionForBinop(BinOpOpc);
+  if (BinOpOpc == Instruction::Sub)
+    ReductionIID = Intrinsic::vector_reduce_add;
+
+  if (ReductionIID == Intrinsic::num_intrinsics)
+    return nullptr;
+  if (II0->getIntrinsicID() != ReductionIID)
+    return nullptr;
+  if (II1->getIntrinsicID() != ReductionIID)
+    return nullptr;
+
+  Value *V0 = II0->getArgOperand(0);
+  Value *V1 = II1->getArgOperand(0);
+  Type *VTy = V0->getType();
+  if (V1->getType() != VTy)
+    return nullptr;
+
+  Value *VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
+  // if (auto *VectorInstBO = dyn_cast<BinaryOperator>(VectorBO))
+  //   VectorInstBO->copyIRFlags(&Inst);
+
+  Instruction *Rdx = Builder.CreateIntrinsic(ReductionIID, {VTy}, {VectorBO});
+  return Rdx;
+}
+
 /// Try to narrow the width of a binop if at least 1 operand is an extend of
 /// of a value. This requires a potentially expensive known bits check to make
 /// sure the narrow op does not overflow.

mgudim · 2025-01-03T13:05:40Z

Test is precommited in: #121568

dtcxzyw · 2025-01-03T13:14:50Z

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+    return nullptr;
+
+  Value *VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
+  // if (auto *VectorInstBO = dyn_cast<BinaryOperator>(VectorBO))


I don't think it is possible to propagate these flags.

You could ~~present~~ preserve disjoint, but I don't think any others.

done, thanks.

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

dtcxzyw · 2025-01-03T13:22:28Z

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+}
+
+Instruction *InstCombinerImpl::foldBinopOfReductions(BinaryOperator &Inst) {
+  IntrinsicInst *II0 = dyn_cast<IntrinsicInst>(Inst.getOperand(0));


Missing one-use check

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

goldsteinn · 2025-01-03T18:18:05Z

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+    ReductionIID = Intrinsic::vector_reduce_add;
+
+  if (ReductionIID == Intrinsic::num_intrinsics)
+    return nullptr;


You can drop this check as no intrin will be Instrinsic::no_intrinsic

I rewrote the code a little bit. From purely logical point of view, yes, we can drop this check. But this check is an early-exit check and also I think code looks much clearer with it.

github-actions · 2025-01-08T17:22:15Z

✅ With the latest revision this PR passed the C/C++ code formatter.

goldsteinn · 2025-01-08T17:42:45Z

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+  switch (Opc) {
+  default:
+    break;
+  case Instruction::Add:


You could also fold or disjoint with vector_reduce_add, but its doubtful such a pattern shows up in practice.

I think or disjoint should get canonicalized first to add.

hmm? We do add -> or disjoint, not the other way around.

You could also fold or disjoint with vector_reduce_add

what would be the benefit of doing this?

None I think, was just a note

dtcxzyw

LGTM. Please wait for additional approval from other reviewers.
IIRC vertical vector ops are always cheaper than horizontal ones. It is suitable to perform this transformation in InstCombine.

dtcxzyw · 2025-01-09T08:09:53Z

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(V);
+    if (!II)
+      return nullptr;
+    if ((II->getIntrinsicID() == IID) && II->hasOneUse())


Suggested change

if ((II->getIntrinsicID() == IID) && II->hasOneUse())

if (II->getIntrinsicID() == IID && II->hasOneUse())

mgudim · 2025-01-10T18:12:08Z

@goldsteinn @nikic does this look ready to merge?

davemgreen

This will likely cause performance issues unless they are mitigated. There is a combine in the backend that does that same thing, but it gets guarded by shouldReassociateReduction and being in the backend allows the target to pattern recognize before it gets reassociated.

dtcxzyw · 2025-01-11T02:51:22Z

This will likely cause performance issues unless they are mitigated.

Can you provide a test to demonstrate the performance issue? As I said before, vertical vector ops are always cheaper than horizontal ones. If not, we can move it into VectorCombine and use InstructionCost to see whether the transformation is a win.

mgudim · 2025-01-11T06:45:36Z

This will likely cause performance issues unless they are mitigated. There is a combine in the backend that does that same thing, but it gets guarded by shouldReassociateReduction and being in the backend allows the target to pattern recognize before it gets reassociated.

@davemgreen

Before this combine worked for Sub in inst combine - was this an oversight?

I also find it surprising that this will cause a degradation on some targets. Can you please give an example.

I also think reassociate is not the right name. Reassociate means (a + b) + c -> a + (b + c). Here I think a better term would be "factor out reduction" as in a * b + a * c => a * (b + c)

dtcxzyw · 2025-01-11T06:53:00Z

Related patch: https://reviews.llvm.org/D141870

dtcxzyw · 2025-01-11T06:55:46Z

I also think reassociate is not the right name.

reassociate is correct here. For example, this patch converts (v0 + v1 + v2 + v3) + (v4 + v5 + v6 + v7) into (v0 + v4) + (v1 + v5) + (v2 + v6) + (v3 + v7).

mgudim · 2025-01-11T13:49:17Z

I also think reassociate is not the right name.

reassociate is correct here. For example, this patch converts (v0 + v1 + v2 + v3) + (v4 + v5 + v6 + v7) into (v0 + v4) + (v1 + v5) + (v2 + v6) + (v3 + v7).

got it, thanks

mgudim · 2025-01-11T13:56:51Z

Perhaps we could have both InstCombine and DAGCombiner transformations guarded by different target hooks?

davemgreen · 2025-01-12T11:11:53Z

Before this combine worked for Sub in inst combine - was this an oversight?

Yeah maybe. In my experience adds are much more common than the other reductions, and sub(reduce.add, reduce.add) would be less common than plain adds. Maybe it wasn't done for the more obvious add case because it can cause problems.
The reduction thing is more obvious when you look at the flags needed to make the fp version valid.

As I said before, vertical vector ops are always cheaper than horizontal ones

I believe that is mostly true, but some targets can have relatively efficient reductions. It looks like MVE will already convert back if needed https://godbolt.org/z/6YGqde37s.
I think the larger problem is extending reductions where the pattern can be broken by the reassociation. https://godbolt.org/z/cohazhd4x. Especially for mla variants like the dotproduct instructions https://godbolt.org/z/K1E3WqrcP.

If not, we can move it into VectorCombine and use InstructionCost to see whether the transformation is a win.

We might not have costs (that don't go via getExtendedReductionCost / getMulAccReductionCost), as multiple-instructions patterns can be awkward to cost model well and it might not have come up before.

Perhaps we could have both InstCombine and DAGCombiner transformations guarded by different target hooks?

Instcombine, for better or worse, is considered a canonicalization pass that isn't controlled by target hooks. Vector combine is the place cost-modelled combines for vector is usually done. We could also consider this to be the canonical pattern and undo the transform in the backend if needed and we could do so reliably.

dtcxzyw · 2025-01-12T11:17:34Z

We could also consider this to be the canonical pattern and undo the transform in the backend if needed and we could do so reliably.

Agree.

mgudim · 2025-01-20T12:46:45Z

@davemgreen Sorry, I am still not sure how to proceed.

(1) Should I close this PR?

or

(2)

We could also consider this to be the canonical pattern and undo the transform in the backend if needed and we could do so reliably.

or

(3) Move this to vector combine and guard it by target hook?

I prefer (2) because this looks more canonical to me.

RKSimon · 2025-01-20T13:21:04Z

@mgudim - I've had in mind a followup VectorCombine fold to this PR:

define i32 @src(<4 x i32> %a0, <4 x i32> %a1) {
  %r0 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a0)
  %r1 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a1)
  %r = and i32 %r0, %r1
  ret i32 %r
}
define i32 @tgt1(<4 x i32> %a0, <4 x i32> %a1) {
  %a = and <4 x i32> %a0, %a1
  %r = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
  ret i32 %r
}
define i32 @tgt2(<4 x i32> %a0, <4 x i32> %a1) {
  %a01 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %r = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a01)
  ret i32 %r
}

No objections if you want to move this PR to a cost driven fold in VectorCombine as well.

mgudim · 2025-01-31T14:44:51Z

Is it possible to make the cost model detect and make use getExtendedReductionCost / getMulAccReductionCost? Otherwise it might not correctly detect that the extending reductions are cheap when they have a vecreduce.add(s/zext()) or vecreduce.add(mul(zext(), zext())) pattern.

I'll do that when I come back (in about a week).

mgudim · 2025-02-10T15:49:20Z

@davemgreen
I rewrote the cost estimation part. Does that look like something you had in mind?

mgudim · 2025-02-11T09:05:49Z

@davemgreen

I looked at LoopVectorizer and there are at least three IR patterns to which the getMulAccReductionCost applies:
reduce.add(ext(mul(ext(A), ext(B))),
reduce.add(mul(ext(A), ext(B))),
reduce.add(mul()).

In all three cases the code looks quite similar. Also, it seems like I would have to repeat most of it in my patch too. Should we first come up with some API that simplifies LoopVectorizer and can be reused in my patch?

davemgreen

Hi - Thanks for adding the cost model. It looks descent, and seems to work quite well for MVE. Unfortunately there are cases where it isn't working as well at least for one case for udot on aarch64, as we don't always generate the reduction pattern until during instruction selection.

I was going to suggest I added a phase ordering test, but it looks like it needs LTO to go wrong as it only sees the vectors after loopvect+unroll+vector-combine, which do not usually happen in that order. I'll see if I can add one, but we might need to end up undoing this in the backend (or add a bailout from the target, or just leave it to the backend to do).

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

mgudim · 2025-02-11T13:06:07Z

@davemgreen

I think AArch64 / ARM may run into similar problem with other combines: i.e. those combines may brake the dot-product pattern for which there is native support.

Now I think the best solution would be to call the result of this transformation as the canonical form and undo it in the backend? Or maybe insert intrinsics early enough so that other combines don't break the pattern? Or both?

RKSimon

still need to add cost driven tests

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

RKSimon · 2025-02-12T12:38:32Z

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

+      CostOfRedOperand0 + CostOfRedOperand1 +
+      TTI.getArithmeticInstrCost(BinOpOpc, VTy, CostKind) +
+      TTI.getArithmeticReductionCost(ReductionOpc, VTy, std::nullopt, CostKind);
+  // TODO: remove this


cleanup TODO and dbg, we usually just do something like:

LLVM_DEBUG(dbgs() << "Found mergeable reductions: " << I << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost << "\n");

Yes, I'll remove that. I need it while I am adding more tests.
It looks like @davemgreen is still thinking what should be the right approach to avoid regressions.
Also, if I should continue with this, I still need advice on this: #121567 (comment)

davemgreen · 2025-02-13T10:30:13Z

I might be able to add a cost for extending add reductions for AArch64. Whilst not actually matching the correct instruction (it ends up using a udot), it should still be correct in terms of costs. I might be able to look into that tomorrow.

davemgreen · 2025-02-16T15:38:23Z

I added some quick cost-models for AArch64 extending reductions. I agree this isn't the most reliable thing, and having a more representative intrinsic might help. IMO this is better left for the backend, but not all architectures lower reduction nodes using sdag at the moment.

mgudim · 2025-02-17T17:06:39Z

@davemgreen I still need your input on this: #121567 (comment) Should I basically repeat all the code from LoopVectorizer here?

In general, I find it weird that opt has to know about some pattern which exists on AArch64 (dot product) and try to preserve it

Replace binary of of two reductions with one reduction of the binary op applied to vectors. For example: ``` %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0) %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1) %res = add i32 %v0_red, %v1_red ``` gets transformed to: ``` %1 = add <16 x i32> %v0, %v1 %res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %1) ```

mgudim · 2025-02-20T09:05:20Z

still need to add cost driven tests

Added test for ARM. @RKSimon do you think more tests are necessary? You mentioned adding a test for X86 but, X86 doesn't implement getMulAccReductionCost. Can you clarify what test you had in mind

mgudim · 2025-02-20T09:15:21Z

@davemgreen I added the cost model for the reduce.add(ext(mul(ext(A), ext(B))) pattern. There are other dot product patterns. I would be happy to work on them if only you told me what is your preferred solution to #121567 (comment) Can you please let me know? Or maybe you can address this later yourself?

davemgreen

@davemgreen I added the cost model for the reduce.add(ext(mul(ext(A), ext(B))) pattern. There are other dot product patterns. I would be happy to work on them if only you told me what is your preferred solution to #121567 (comment) Can you please let me know? Or maybe you can address this later yourself?

Thanks. I think this might just always be a bit of a bad thing for Arm/AArch64 (for add's at least, the others are fine), and is more likely to make things worse than better. The backend already handles it in the cases we need it, and controlling it with a cost model is a little unreliable. But I ran some tests on this version and didn't see any problems in the tests I tried. The code you have looks decent and all the cases that were a problem before no longer are.

(I'm not sure of the most ideal way to structure it, if I was writing the whole compiler myself I would be tempted to add a smuladd/umuladd reduction intrinsic so that the cost modelling can be more accurate, but that sounds like quite a lot of work and might not even perfectly solve the problem in every case without effectively doing type/operation legalization earlier. Undoing arbitrarily reassociated/combined add trees can be difficult but is maybe better because it might capture more cases. "We can add that in the future if we need it" is probably the best way forward).

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

mgudim · 2025-02-20T17:15:40Z

@davemgreen Thanks for testing the patch. So is this LGTM from you? )

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

RKSimon

LGTM

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

llvm-ci · 2025-02-22T11:28:09Z

LLVM Buildbot has detected a new failure on builder clang-aarch64-quick running on linaro-clang-aarch64-quick while building llvm at step 5 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/65/builds/12655

Here is the relevant piece of the build log for the reference

Step 5 (ninja check 1) failure: stage 1 checked (failure)
******************** TEST 'Clangd Unit Tests :: ./ClangdTests/51/161' FAILED ********************
Script(shard):
--
GTEST_OUTPUT=json:/home/tcwg-buildbot/worker/clang-aarch64-quick/stage1/tools/clang/tools/extra/clangd/unittests/./ClangdTests-Clangd Unit Tests-4113409-51-161.json GTEST_SHUFFLE=0 GTEST_TOTAL_SHARDS=161 GTEST_SHARD_INDEX=51 /home/tcwg-buildbot/worker/clang-aarch64-quick/stage1/tools/clang/tools/extra/clangd/unittests/./ClangdTests
--

Script:
--
/home/tcwg-buildbot/worker/clang-aarch64-quick/stage1/tools/clang/tools/extra/clangd/unittests/./ClangdTests --gtest_filter=ClangdServerTest.ReparseOpenedFiles
--
ASTWorker building file /clangd-test/foo.cpp version null with command 
[/clangd-test]
clang -DMACRO=1 /clangd-test/foo.cpp
Driver produced command: cc1 -cc1 -triple aarch64-unknown-linux-gnu -fsyntax-only -disable-free -clear-ast-before-backend -main-file-name foo.cpp -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=non-leaf -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -enable-tlsdesc -target-cpu generic -target-feature +v8a -target-feature +fp-armv8 -target-feature +neon -target-abi aapcs -debugger-tuning=gdb -fdebug-compilation-dir=/clangd-test -fcoverage-compilation-dir=/clangd-test -resource-dir lib/clang/21 -D MACRO=1 -internal-isystem lib/clang/21/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fno-signed-char -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -no-round-trip-args -target-feature -fmv -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -x c++ /clangd-test/foo.cpp
Building first preamble for /clangd-test/foo.cpp version null
ASTWorker building file /clangd-test/baz.cpp version null with command 
[/clangd-test]
clang -DMACRO=1 /clangd-test/baz.cpp
Driver produced command: cc1 -cc1 -triple aarch64-unknown-linux-gnu -fsyntax-only -disable-free -clear-ast-before-backend -main-file-name baz.cpp -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=non-leaf -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -enable-tlsdesc -target-cpu generic -target-feature +v8a -target-feature +fp-armv8 -target-feature +neon -target-abi aapcs -debugger-tuning=gdb -fdebug-compilation-dir=/clangd-test -fcoverage-compilation-dir=/clangd-test -resource-dir lib/clang/21 -D MACRO=1 -internal-isystem lib/clang/21/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fno-signed-char -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -no-round-trip-args -target-feature -fmv -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -x c++ /clangd-test/baz.cpp
Building first preamble for /clangd-test/baz.cpp version null
Built preamble of size 730612 for file /clangd-test/foo.cpp version null in 0.39 seconds
Built preamble of size 730592 for file /clangd-test/baz.cpp version null in 0.78 seconds
ASTWorker building file /clangd-test/bar.cpp version null with command 
[/clangd-test]
clang -DMACRO=1 /clangd-test/bar.cpp
Driver produced command: cc1 -cc1 -triple aarch64-unknown-linux-gnu -fsyntax-only -disable-free -clear-ast-before-backend -main-file-name bar.cpp -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=non-leaf -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -enable-tlsdesc -target-cpu generic -target-feature +v8a -target-feature +fp-armv8 -target-feature +neon -target-abi aapcs -debugger-tuning=gdb -fdebug-compilation-dir=/clangd-test -fcoverage-compilation-dir=/clangd-test -resource-dir lib/clang/21 -D MACRO=1 -internal-isystem lib/clang/21/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fno-signed-char -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -no-round-trip-args -target-feature -fmv -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -x c++ /clangd-test/bar.cpp
Building first preamble for /clangd-test/bar.cpp version null
Built preamble of size 730612 for file /clangd-test/bar.cpp version null in 0.09 seconds
ASTWorker running Definitions on version null of /clangd-test/foo.cpp
File version went from null to null
File version went from null to null
ASTWorker building file /clangd-test/foo.cpp version null with command clangd fallback
[/clangd-test]
clang /clangd-test/foo.cpp
Driver produced command: cc1 -cc1 -triple aarch64-unknown-linux-gnu -fsyntax-only -disable-free -clear-ast-before-backend -main-file-name foo.cpp -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=non-leaf -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -enable-tlsdesc -target-cpu generic -target-feature +v8a -target-feature +fp-armv8 -target-feature +neon -target-abi aapcs -debugger-tuning=gdb -fdebug-compilation-dir=/clangd-test -fcoverage-compilation-dir=/clangd-test -resource-dir lib/clang/21 -internal-isystem lib/clang/21/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fno-signed-char -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -no-round-trip-args -target-feature -fmv -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -x c++ /clangd-test/foo.cpp
ASTWorker building file /clangd-test/bar.cpp version null with command clangd fallback
[/clangd-test]
clang /clangd-test/bar.cpp
Driver produced command: cc1 -cc1 -triple aarch64-unknown-linux-gnu -fsyntax-only -disable-free -clear-ast-before-backend -main-file-name bar.cpp -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=non-leaf -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -enable-tlsdesc -target-cpu generic -target-feature +v8a -target-feature +fp-armv8 -target-feature +neon -target-abi aapcs -debugger-tuning=gdb -fdebug-compilation-dir=/clangd-test -fcoverage-compilation-dir=/clangd-test -resource-dir lib/clang/21 -internal-isystem lib/clang/21/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fno-signed-char -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -no-round-trip-args -target-feature -fmv -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -x c++ /clangd-test/bar.cpp
Rebuilding invalidated preamble for /clangd-test/foo.cpp version null (previous was version null)
Rebuilding invalidated preamble for /clangd-test/bar.cpp version null (previous was version null)
Built preamble of size 730532 for file /clangd-test/bar.cpp version null in 0.37 seconds
../llvm/clang-tools-extra/clangd/unittests/ClangdTests.cpp:562: Failure
Value of: Server.blockUntilIdleForTest()
  Actual: false
Expected: true

Built preamble of size 730532 for file /clangd-test/foo.cpp version null in 12.86 seconds

../llvm/clang-tools-extra/clangd/unittests/ClangdTests.cpp:562
...

mgudim requested review from michaelmaitland and topperc January 3, 2025 12:54

mgudim requested a review from nikic as a code owner January 3, 2025 12:54

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jan 3, 2025

dtcxzyw reviewed Jan 3, 2025

View reviewed changes

goldsteinn reviewed Jan 3, 2025

View reviewed changes

mgudim force-pushed the binary_op_reduce branch from 71a72dd to 50c3ac4 Compare January 8, 2025 17:18

goldsteinn reviewed Jan 8, 2025

View reviewed changes

mgudim force-pushed the binary_op_reduce branch 2 times, most recently from 5a57194 to d683355 Compare January 8, 2025 22:20

dtcxzyw approved these changes Jan 9, 2025

View reviewed changes

mgudim force-pushed the binary_op_reduce branch 2 times, most recently from ead55cd to d4de22e Compare January 10, 2025 18:10

davemgreen reviewed Jan 10, 2025

View reviewed changes

dtcxzyw requested a review from RKSimon January 11, 2025 02:48

mgudim force-pushed the binary_op_reduce branch from 86ae393 to a58cfeb Compare February 10, 2025 15:47

mgudim force-pushed the binary_op_reduce branch 2 times, most recently from ce85315 to 8afa5b8 Compare February 11, 2025 06:27

mgudim force-pushed the binary_op_reduce branch 2 times, most recently from 6b25c35 to 9a976f8 Compare February 11, 2025 09:21

davemgreen reviewed Feb 11, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Show resolved Hide resolved

RKSimon reviewed Feb 12, 2025

View reviewed changes

mgudim force-pushed the binary_op_reduce branch 2 times, most recently from 8fad569 to 20b783d Compare February 20, 2025 08:53

Improve cost model

bc1a198

mgudim force-pushed the binary_op_reduce branch from 20b783d to bc1a198 Compare February 20, 2025 08:55

davemgreen reviewed Feb 20, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Show resolved Hide resolved

topperc reviewed Feb 20, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Show resolved Hide resolved

RKSimon approved these changes Feb 21, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Show resolved Hide resolved

mgudim merged commit f5d153e into llvm:main Feb 22, 2025
8 checks passed

	if ((II->getIntrinsicID() == IID) && II->hasOneUse())
	if (II->getIntrinsicID() == IID && II->hasOneUse())

[VectorCombine] Fold binary op of reductions. #121567

[VectorCombine] Fold binary op of reductions. #121567

Uh oh!

Conversation

mgudim commented Jan 3, 2025

Uh oh!

llvmbot commented Jan 3, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mgudim commented Jan 3, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

goldsteinn Jan 3, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Jan 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

mgudim commented Jan 10, 2025

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

dtcxzyw commented Jan 11, 2025

Uh oh!

mgudim commented Jan 11, 2025

Uh oh!

dtcxzyw commented Jan 11, 2025

Uh oh!

dtcxzyw commented Jan 11, 2025

Uh oh!

mgudim commented Jan 11, 2025

Uh oh!

mgudim commented Jan 11, 2025

Uh oh!

davemgreen commented Jan 12, 2025

Uh oh!

dtcxzyw commented Jan 12, 2025

Uh oh!

mgudim commented Jan 20, 2025

Uh oh!

RKSimon commented Jan 20, 2025

Uh oh!

mgudim commented Jan 31, 2025

Uh oh!

mgudim commented Feb 10, 2025

Uh oh!

llvmbot commented Jan 3, 2025 •

edited

Loading

goldsteinn Jan 3, 2025 •

edited

Loading

github-actions bot commented Jan 8, 2025 •

edited

Loading