@@ -2579,6 +2579,97 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2579
2579
return new ShuffleVectorInst (X, Y, NewMask);
2580
2580
}
2581
2581
2582
+ // Extract `(scalar_ops... x)` from `(vector_ops... (insert ?, x, C)`
2583
+ static Value *
2584
+ getScalarizationOfInsertElement (Value *V, int ReqIndexC,
2585
+ InstCombiner::BuilderTy &Builder) {
2586
+ Value *X, *Base;
2587
+ ConstantInt *IndexC;
2588
+ // Found a select.
2589
+ if (match (V, m_InsertElt (m_Value (Base), m_Value (X), m_ConstantInt (IndexC)))) {
2590
+ // See if matches the index we need.
2591
+ if (match (IndexC, m_SpecificInt (ReqIndexC)))
2592
+ return X;
2593
+ // Otherwise continue searching. This is necessary for finding both elements
2594
+ // in the common pattern:
2595
+ // V0 = (insert poison x, 0)
2596
+ // V1 = (insert V0, y, 1)
2597
+ return getScalarizationOfInsertElement (Base, ReqIndexC, Builder);
2598
+ }
2599
+
2600
+ // We can search through a splat of a single element for an insert.
2601
+ int SplatIndex;
2602
+ if (match (V, m_Shuffle (m_Value (Base), m_Value (X),
2603
+ m_SplatOrUndefMask (SplatIndex))) &&
2604
+ SplatIndex >= 0 ) {
2605
+ if (auto *VType = dyn_cast<FixedVectorType>(V->getType ())) {
2606
+ // Chase whichever vector (Base/X) we are splatting from.
2607
+ if (static_cast <unsigned >(SplatIndex) >= VType->getNumElements ())
2608
+ return getScalarizationOfInsertElement (
2609
+ X, SplatIndex - VType->getNumElements (), Builder);
2610
+ // New index we need to find is the index we are splatting from.
2611
+ return getScalarizationOfInsertElement (Base, SplatIndex, Builder);
2612
+ }
2613
+ return nullptr ;
2614
+ }
2615
+
2616
+ // We don't want to duplicate `vector_ops...` if they have multiple uses.
2617
+ if (!V->hasOneUse ())
2618
+ return nullptr ;
2619
+
2620
+ Value *R = nullptr ;
2621
+ // Scalarize any unary op.
2622
+ if (match (V, m_UnOp (m_Value (X)))) {
2623
+ if (auto *Scalar = getScalarizationOfInsertElement (X, ReqIndexC, Builder))
2624
+ R = Builder.CreateUnOp (cast<UnaryOperator>(V)->getOpcode (), Scalar);
2625
+ }
2626
+
2627
+ // Scalarize any cast but bitcast.
2628
+ // TODO: We skip bitcasts, but they would be okay if they are elementwise.
2629
+ if (isa<CastInst>(V) && !match (V, m_BitCast (m_Value ()))) {
2630
+ X = cast<CastInst>(V)->getOperand (0 );
2631
+ if (auto *Scalar = getScalarizationOfInsertElement (X, ReqIndexC, Builder))
2632
+ R = Builder.CreateCast (cast<CastInst>(V)->getOpcode (), Scalar,
2633
+ V->getType ()->getScalarType ());
2634
+ }
2635
+
2636
+ // Binop with a constant.
2637
+ Constant *C;
2638
+ if (match (V, m_c_BinOp (m_Value (X), m_ImmConstant (C)))) {
2639
+ BinaryOperator *BO = cast<BinaryOperator>(V);
2640
+ if (isSafeToSpeculativelyExecute (BO)) {
2641
+ if (auto *Scalar =
2642
+ getScalarizationOfInsertElement (X, ReqIndexC, Builder)) {
2643
+ auto *ScalarC =
2644
+ ConstantExpr::getExtractElement (C, Builder.getInt64 (ReqIndexC));
2645
+
2646
+ BinaryOperator::BinaryOps Opc = BO->getOpcode ();
2647
+ if (match (V, m_c_BinOp (m_Value (X), m_ImmConstant (C))))
2648
+ R = Builder.CreateBinOp (Opc, Scalar, ScalarC);
2649
+ else
2650
+ R = Builder.CreateBinOp (Opc, ScalarC, Scalar);
2651
+ }
2652
+ }
2653
+ }
2654
+
2655
+ // Cmp with a constant.
2656
+ CmpInst::Predicate Pred;
2657
+ if (match (V, m_Cmp (Pred, m_Value (X), m_ImmConstant (C)))) {
2658
+ if (auto *Scalar = getScalarizationOfInsertElement (X, ReqIndexC, Builder)) {
2659
+ auto *ScalarC =
2660
+ ConstantExpr::getExtractElement (C, Builder.getInt64 (ReqIndexC));
2661
+ R = Builder.CreateCmp (Pred, Scalar, ScalarC);
2662
+ }
2663
+ }
2664
+ // TODO: Intrinsics
2665
+
2666
+ // If we created a new scalar instruction, copy flags from the vec version.
2667
+ if (R != nullptr )
2668
+ cast<Instruction>(R)->copyIRFlags (V);
2669
+
2670
+ return R;
2671
+ }
2672
+
2582
2673
// / Try to replace a shuffle with an insertelement or try to replace a shuffle
2583
2674
// / operand with the operand of an insertelement.
2584
2675
static Instruction *foldShuffleWithInsert (ShuffleVectorInst &Shuf,
@@ -2616,13 +2707,11 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2616
2707
if (NumElts != InpNumElts)
2617
2708
return nullptr ;
2618
2709
2619
- // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2620
- auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2621
- // We need an insertelement with a constant index.
2622
- if (!match (V0, m_InsertElt (m_Value (), m_Value (Scalar),
2623
- m_ConstantInt (IndexC))))
2624
- return false ;
2625
2710
2711
+ // (shuffle (vec_ops... (insert ?, Scalar, IndexC)), V1, Mask)
2712
+ // --> insert V1, (scalar_ops... Scalar), IndexC'
2713
+ auto GetScalarizationOfInsertEle =
2714
+ [&Mask, &NumElts, &IC](Value *V) -> std::pair<Value *, int > {
2626
2715
// Test the shuffle mask to see if it splices the inserted scalar into the
2627
2716
// operand 1 vector of the shuffle.
2628
2717
int NewInsIndex = -1 ;
@@ -2631,40 +2720,45 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2631
2720
if (Mask[i] == -1 )
2632
2721
continue ;
2633
2722
2634
- // The shuffle takes elements of operand 1 without lane changes .
2635
- if (Mask[i] == NumElts + i )
2723
+ // The shuffle takes elements of operand 1.
2724
+ if (Mask[i] >= NumElts)
2636
2725
continue ;
2637
2726
2638
2727
// The shuffle must choose the inserted scalar exactly once.
2639
- if (NewInsIndex != -1 || Mask[i] != IndexC-> getSExtValue () )
2640
- return false ;
2728
+ if (NewInsIndex != -1 )
2729
+ return { nullptr , - 1 } ;
2641
2730
2642
- // The shuffle is placing the inserted scalar into element i.
2731
+ // The shuffle is placing the inserted scalar into element i from operand
2732
+ // 0.
2643
2733
NewInsIndex = i;
2644
2734
}
2645
2735
2646
- assert (NewInsIndex != -1 && " Did not fold shuffle with unused operand?" );
2736
+ // Operand is unused.
2737
+ if (NewInsIndex < 0 )
2738
+ return {nullptr , -1 };
2647
2739
2648
- // Index is updated to the potentially translated insertion lane.
2649
- IndexC = ConstantInt::get (IndexC->getIntegerType (), NewInsIndex);
2650
- return true ;
2651
- };
2740
+ Value *Scalar =
2741
+ getScalarizationOfInsertElement (V, Mask[NewInsIndex], IC.Builder );
2652
2742
2653
- // If the shuffle is unnecessary, insert the scalar operand directly into
2654
- // operand 1 of the shuffle. Example:
2655
- // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2656
- Value *Scalar;
2657
- ConstantInt *IndexC;
2658
- if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2659
- return InsertElementInst::Create (V1, Scalar, IndexC);
2743
+ return {Scalar, NewInsIndex};
2744
+ };
2660
2745
2661
- // Try again after commuting shuffle. Example:
2662
- // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2663
- // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2664
- std::swap (V0, V1);
2746
+ auto [V0Scalar, V0NewInsertIdx] = GetScalarizationOfInsertEle (V0);
2665
2747
ShuffleVectorInst::commuteShuffleMask (Mask, NumElts);
2666
- if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2667
- return InsertElementInst::Create (V1, Scalar, IndexC);
2748
+ auto [V1Scalar, V1NewInsertIdx] = GetScalarizationOfInsertEle (V1);
2749
+
2750
+ if (V0Scalar != nullptr && V1Scalar != nullptr ) {
2751
+ Value *R = IC.Builder .CreateInsertElement (Shuf.getType (), V0Scalar,
2752
+ V0NewInsertIdx);
2753
+ return InsertElementInst::Create (R, V1Scalar,
2754
+ IC.Builder .getInt64 (V1NewInsertIdx));
2755
+ } else if (V0Scalar != nullptr ) {
2756
+ return InsertElementInst::Create (V1, V0Scalar,
2757
+ IC.Builder .getInt64 (V0NewInsertIdx));
2758
+ } else if (V1Scalar != nullptr ) {
2759
+ return InsertElementInst::Create (V0, V1Scalar,
2760
+ IC.Builder .getInt64 (V1NewInsertIdx));
2761
+ }
2668
2762
2669
2763
return nullptr ;
2670
2764
}
0 commit comments