|
18 | 18 | #include "llvm/ADT/STLExtras.h"
|
19 | 19 | #include "llvm/ADT/SmallBitVector.h"
|
20 | 20 | #include "llvm/ADT/SmallVector.h"
|
| 21 | +#include "llvm/ADT/Statistic.h" |
21 | 22 | #include "llvm/Analysis/InstructionSimplify.h"
|
22 | 23 | #include "llvm/Analysis/VectorUtils.h"
|
23 | 24 | #include "llvm/IR/BasicBlock.h"
|
@@ -46,6 +47,10 @@ using namespace PatternMatch;
|
46 | 47 |
|
47 | 48 | #define DEBUG_TYPE "instcombine"
|
48 | 49 |
|
| 50 | +STATISTIC(NumAggregateReconstructionsSimplified, |
| 51 | + "Number of aggregate reconstructions turned into reuse of the " |
| 52 | + "original aggregate"); |
| 53 | + |
49 | 54 | /// Return true if the value is cheaper to scalarize than it is to leave as a
|
50 | 55 | /// vector operation. IsConstantExtractIndex indicates whether we are extracting
|
51 | 56 | /// one known element from a vector constant.
|
@@ -694,6 +699,243 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
|
694 | 699 | return std::make_pair(V, nullptr);
|
695 | 700 | }
|
696 | 701 |
|
| 702 | +/// Look for chain of insertvalue's that fully define an aggregate, and trace |
| 703 | +/// back the values inserted, see if they are all were extractvalue'd from |
| 704 | +/// the same source aggregate from the exact same element indexes. |
| 705 | +/// If they were, just reuse the source aggregate. |
| 706 | +/// This potentially deals with PHI indirections. |
| 707 | +Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( |
| 708 | + InsertValueInst &OrigIVI) { |
| 709 | + BasicBlock *UseBB = OrigIVI.getParent(); |
| 710 | + Type *AggTy = OrigIVI.getType(); |
| 711 | + unsigned NumAggElts; |
| 712 | + switch (AggTy->getTypeID()) { |
| 713 | + case Type::StructTyID: |
| 714 | + NumAggElts = AggTy->getStructNumElements(); |
| 715 | + break; |
| 716 | + case Type::ArrayTyID: |
| 717 | + NumAggElts = AggTy->getArrayNumElements(); |
| 718 | + break; |
| 719 | + default: |
| 720 | + llvm_unreachable("Unhandled aggregate type?"); |
| 721 | + } |
| 722 | + |
| 723 | + // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able |
| 724 | + // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}), |
| 725 | + // FIXME: any interesting patterns to be caught with larger limit? |
| 726 | + assert(NumAggElts > 0 && "Aggregate should have elements."); |
| 727 | + if (NumAggElts > 2) |
| 728 | + return nullptr; |
| 729 | + |
| 730 | + static constexpr auto NotFound = None; |
| 731 | + static constexpr auto FoundMismatch = nullptr; |
| 732 | + |
| 733 | + // Try to find a value of each element of an aggregate. |
| 734 | + // FIXME: deal with more complex, not one-dimensional, aggregate types |
| 735 | + SmallVector<Optional<Value *>, 2> AggElts(NumAggElts, NotFound); |
| 736 | + |
| 737 | + // Do we know values for each element of the aggregate? |
| 738 | + auto KnowAllElts = [&AggElts]() { |
| 739 | + return all_of(AggElts, |
| 740 | + [](Optional<Value *> Elt) { return Elt != NotFound; }); |
| 741 | + }; |
| 742 | + |
| 743 | + int Depth = 0; |
| 744 | + |
| 745 | + // Arbitrary `insertvalue` visitation depth limit. Let's be okay with |
| 746 | + // every element being overwritten twice, which should never happen. |
| 747 | + static const int DepthLimit = 2 * NumAggElts; |
| 748 | + |
| 749 | + // Recurse up the chain of `insertvalue` aggregate operands until either we've |
| 750 | + // reconstructed full initializer or can't visit any more `insertvalue`'s. |
| 751 | + for (InsertValueInst *CurrIVI = &OrigIVI; |
| 752 | + Depth < DepthLimit && CurrIVI && !KnowAllElts(); |
| 753 | + CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()), |
| 754 | + ++Depth) { |
| 755 | + Value *InsertedValue = CurrIVI->getInsertedValueOperand(); |
| 756 | + ArrayRef<unsigned int> Indices = CurrIVI->getIndices(); |
| 757 | + |
| 758 | + // Don't bother with more than single-level aggregates. |
| 759 | + if (Indices.size() != 1) |
| 760 | + return nullptr; // FIXME: deal with more complex aggregates? |
| 761 | + |
| 762 | + // Now, we may have already previously recorded the value for this element |
| 763 | + // of an aggregate. If we did, that means the CurrIVI will later be |
| 764 | + // overwritten with the already-recorded value. But if not, let's record it! |
| 765 | + Optional<Value *> &Elt = AggElts[Indices.front()]; |
| 766 | + Elt = Elt.getValueOr(InsertedValue); |
| 767 | + |
| 768 | + // FIXME: should we handle chain-terminating undef base operand? |
| 769 | + } |
| 770 | + |
| 771 | + // Was that sufficient to deduce the full initializer for the aggregate? |
| 772 | + if (!KnowAllElts()) |
| 773 | + return nullptr; // Give up then. |
| 774 | + |
| 775 | + // We now want to find the source[s] of the aggregate elements we've found. |
| 776 | + // And with "source" we mean the original aggregate[s] from which |
| 777 | + // the inserted elements were extracted. This may require PHI translation. |
| 778 | + |
| 779 | + enum class SourceAggregate { |
| 780 | + /// When analyzing the value that was inserted into an aggregate, we did |
| 781 | + /// not manage to find defining `extractvalue` instruction to analyze. |
| 782 | + NotFound, |
| 783 | + /// When analyzing the value that was inserted into an aggregate, we did |
| 784 | + /// manage to find defining `extractvalue` instruction[s], and everything |
| 785 | + /// matched perfectly - aggregate type, element insertion/extraction index. |
| 786 | + Found, |
| 787 | + /// When analyzing the value that was inserted into an aggregate, we did |
| 788 | + /// manage to find defining `extractvalue` instruction, but there was |
| 789 | + /// a mismatch: either the source type from which the extraction was didn't |
| 790 | + /// match the aggregate type into which the insertion was, |
| 791 | + /// or the extraction/insertion channels mismatched, |
| 792 | + /// or different elements had different source aggregates. |
| 793 | + FoundMismatch |
| 794 | + }; |
| 795 | + auto Describe = [](Optional<Value *> SourceAggregate) { |
| 796 | + if (SourceAggregate == NotFound) |
| 797 | + return SourceAggregate::NotFound; |
| 798 | + if (*SourceAggregate == FoundMismatch) |
| 799 | + return SourceAggregate::FoundMismatch; |
| 800 | + return SourceAggregate::Found; |
| 801 | + }; |
| 802 | + |
| 803 | + // Given the value \p Elt that was being inserted into element \p EltIdx of an |
| 804 | + // aggregate AggTy, see if \p Elt was originally defined by an |
| 805 | + // appropriate extractvalue (same element index, same aggregate type). |
| 806 | + // If found, return the source aggregate from which the extraction was. |
| 807 | + // If \p PredBB is provided, does PHI translation of an \p Elt first. |
| 808 | + auto FindSourceAggregate = |
| 809 | + [&](Value *Elt, unsigned EltIdx, |
| 810 | + Optional<BasicBlock *> PredBB) -> Optional<Value *> { |
| 811 | + // For now(?), only deal with, at most, a single level of PHI indirection. |
| 812 | + if (PredBB) |
| 813 | + Elt = Elt->DoPHITranslation(UseBB, *PredBB); |
| 814 | + // FIXME: deal with multiple levels of PHI indirection? |
| 815 | + |
| 816 | + // Did we find an extraction? |
| 817 | + auto *EVI = dyn_cast<ExtractValueInst>(Elt); |
| 818 | + if (!EVI) |
| 819 | + return NotFound; |
| 820 | + |
| 821 | + Value *SourceAggregate = EVI->getAggregateOperand(); |
| 822 | + |
| 823 | + // Is the extraction from the same type into which the insertion was? |
| 824 | + if (SourceAggregate->getType() != AggTy) |
| 825 | + return FoundMismatch; |
| 826 | + // And the element index doesn't change between extraction and insertion? |
| 827 | + if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front()) |
| 828 | + return FoundMismatch; |
| 829 | + |
| 830 | + return SourceAggregate; // SourceAggregate::Found |
| 831 | + }; |
| 832 | + |
| 833 | + // Given elements AggElts that were constructing an aggregate OrigIVI, |
| 834 | + // see if we can find appropriate source aggregate for each of the elements, |
| 835 | + // and see it's the same aggregate for each element. If so, return it. |
| 836 | + auto FindCommonSourceAggregate = |
| 837 | + [&](Optional<BasicBlock *> PredBB) -> Optional<Value *> { |
| 838 | + Optional<Value *> SourceAggregate; |
| 839 | + |
| 840 | + for (auto I : enumerate(AggElts)) { |
| 841 | + assert(Describe(SourceAggregate) != SourceAggregate::FoundMismatch && |
| 842 | + "We don't store nullptr in SourceAggregate!"); |
| 843 | + assert((Describe(SourceAggregate) == SourceAggregate::Found) == |
| 844 | + (I.index() != 0) && |
| 845 | + "SourceAggregate should be valid after the the first element,"); |
| 846 | + |
| 847 | + // For this element, is there a plausible source aggregate? |
| 848 | + // FIXME: we could special-case undef element, IFF we know that in the |
| 849 | + // source aggregate said element isn't poison. |
| 850 | + Optional<Value *> SourceAggregateForElement = |
| 851 | + FindSourceAggregate(*I.value(), I.index(), PredBB); |
| 852 | + |
| 853 | + // Okay, what have we found? Does that correlate with previous findings? |
| 854 | + |
| 855 | + // Regardless of whether or not we have previously found source |
| 856 | + // aggregate for previous elements (if any), if we didn't find one for |
| 857 | + // this element, passthrough whatever we have just found. |
| 858 | + if (Describe(SourceAggregateForElement) != SourceAggregate::Found) |
| 859 | + return SourceAggregateForElement; |
| 860 | + |
| 861 | + // Okay, we have found source aggregate for this element. |
| 862 | + // Let's see what we already know from previous elements, if any. |
| 863 | + switch (Describe(SourceAggregate)) { |
| 864 | + case SourceAggregate::NotFound: |
| 865 | + // This is apparently the first element that we have examined. |
| 866 | + SourceAggregate = SourceAggregateForElement; // Record the aggregate! |
| 867 | + continue; // Great, now look at next element. |
| 868 | + case SourceAggregate::Found: |
| 869 | + // We have previously already successfully examined other elements. |
| 870 | + // Is this the same source aggregate we've found for other elements? |
| 871 | + if (*SourceAggregateForElement != *SourceAggregate) |
| 872 | + return FoundMismatch; |
| 873 | + continue; // Still the same aggregate, look at next element. |
| 874 | + case SourceAggregate::FoundMismatch: |
| 875 | + llvm_unreachable("Can't happen. We would have early-exited then."); |
| 876 | + }; |
| 877 | + } |
| 878 | + |
| 879 | + assert(Describe(SourceAggregate) == SourceAggregate::Found && |
| 880 | + "Must be a valid Value"); |
| 881 | + return *SourceAggregate; |
| 882 | + }; |
| 883 | + |
| 884 | + Optional<Value *> SourceAggregate; |
| 885 | + |
| 886 | + // Can we find the source aggregate without looking at predecessors? |
| 887 | + SourceAggregate = FindCommonSourceAggregate(/*PredBB=*/None); |
| 888 | + if (Describe(SourceAggregate) != SourceAggregate::NotFound) { |
| 889 | + if (Describe(SourceAggregate) == SourceAggregate::FoundMismatch) |
| 890 | + return nullptr; // Conflicting source aggregates! |
| 891 | + ++NumAggregateReconstructionsSimplified; |
| 892 | + return replaceInstUsesWith(OrigIVI, *SourceAggregate); |
| 893 | + } |
| 894 | + |
| 895 | + // If we didn't manage to find source aggregate without looking at |
| 896 | + // predecessors, and there are no predecessors to look at, then we're done. |
| 897 | + if (pred_empty(UseBB)) |
| 898 | + return nullptr; |
| 899 | + |
| 900 | + // Okay, apparently we need to look at predecessors. |
| 901 | + |
| 902 | + // Arbitrary predecessor count limit. |
| 903 | + static const int PredCountLimit = 64; |
| 904 | + // Don't bother if there are too many predecessors. |
| 905 | + if (UseBB->hasNPredecessorsOrMore(PredCountLimit + 1)) |
| 906 | + return nullptr; |
| 907 | + |
| 908 | + // For each predecessor, what is the source aggregate, |
| 909 | + // from which all the elements were originally extracted from? |
| 910 | + // Note that we want for the map to have stable iteration order! |
| 911 | + SmallMapVector<BasicBlock *, Value *, 4> SourceAggregates; |
| 912 | + for (BasicBlock *Pred : predecessors(UseBB)) { |
| 913 | + std::pair<decltype(SourceAggregates)::iterator, bool> IV = |
| 914 | + SourceAggregates.insert({Pred, nullptr}); |
| 915 | + // Did we already evaluate this predecessor? |
| 916 | + if (!IV.second) |
| 917 | + continue; |
| 918 | + |
| 919 | + // Let's hope that when coming from predecessor Pred, all elements of the |
| 920 | + // aggregate produced by OrigIVI must have been originally extracted from |
| 921 | + // the same aggregate. Is that so? Can we find said original aggregate? |
| 922 | + SourceAggregate = FindCommonSourceAggregate(Pred); |
| 923 | + if (Describe(SourceAggregate) != SourceAggregate::Found) |
| 924 | + return nullptr; // Give up. |
| 925 | + IV.first->second = *SourceAggregate; |
| 926 | + } |
| 927 | + |
| 928 | + // All good! Now we just need to thread the source aggregates here. |
| 929 | + auto *PHI = PHINode::Create(AggTy, SourceAggregates.size(), |
| 930 | + OrigIVI.getName() + ".merged"); |
| 931 | + for (const std::pair<BasicBlock *, Value *> &SourceAggregate : |
| 932 | + SourceAggregates) |
| 933 | + PHI->addIncoming(SourceAggregate.second, SourceAggregate.first); |
| 934 | + |
| 935 | + ++NumAggregateReconstructionsSimplified; |
| 936 | + return PHI; |
| 937 | +}; |
| 938 | + |
697 | 939 | /// Try to find redundant insertvalue instructions, like the following ones:
|
698 | 940 | /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
|
699 | 941 | /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
|
@@ -726,6 +968,10 @@ Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
|
726 | 968 |
|
727 | 969 | if (IsRedundant)
|
728 | 970 | return replaceInstUsesWith(I, I.getOperand(0));
|
| 971 | + |
| 972 | + if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I)) |
| 973 | + return NewI; |
| 974 | + |
729 | 975 | return nullptr;
|
730 | 976 | }
|
731 | 977 |
|
|
0 commit comments