@@ -29774,20 +29774,22 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
29774
29774
// UnpairedInputs contains values yet to be paired, mapping an unpaired value to
29775
29775
// its current neighbor's value and index.
29776
29776
// Do not use llvm::DenseMap as ~0 is reserved key.
29777
- template <typename InputTy,
29778
- typename PermutationTy,
29779
- typename MapTy = SmallMapVector<typename InputTy::value_type,
29780
- std::pair<typename InputTy::value_type, typename PermutationTy::value_type>, 8>>
29781
- static bool PermuteAndPairVector(const InputTy& Inputs,
29782
- PermutationTy &Permutation,
29783
- MapTy UnpairedInputs = SmallMapVector<typename InputTy::value_type,
29784
- std::pair<typename InputTy::value_type, typename PermutationTy::value_type>, 8>()) {
29777
+ template <typename InputTy, typename PermutationTy,
29778
+ typename MapTy =
29779
+ SmallMapVector<typename InputTy::value_type,
29780
+ std::pair<typename InputTy::value_type,
29781
+ typename PermutationTy::value_type>,
29782
+ 8>>
29783
+ static bool PermuteAndPairVector(
29784
+ const InputTy &Inputs, PermutationTy &Permutation,
29785
+ MapTy UnpairedInputs = MapTy()) {
29785
29786
const auto Wildcard = ~typename InputTy::value_type();
29786
29787
SmallVector<typename PermutationTy::value_type, 16> WildcardPairs;
29787
29788
29788
29789
size_t OutputOffset = Permutation.size();
29789
29790
typename PermutationTy::value_type I = 0;
29790
- for (auto InputIt = Inputs.begin(), InputEnd = Inputs.end(); InputIt != InputEnd;) {
29791
+ for (auto InputIt = Inputs.begin(), InputEnd = Inputs.end();
29792
+ InputIt != InputEnd;) {
29791
29793
Permutation.push_back(OutputOffset + I);
29792
29794
Permutation.push_back(OutputOffset + I + 1);
29793
29795
@@ -29802,14 +29804,18 @@ static bool PermuteAndPairVector(const InputTy& Inputs,
29802
29804
29803
29805
// If both are equal, they are in good position.
29804
29806
if (Even != Odd) {
29805
- auto DoWork = [&] (auto &This, auto ThisIndex, auto Other, auto OtherIndex) {
29807
+ auto DoWork = [&](auto &This, auto ThisIndex, auto Other,
29808
+ auto OtherIndex) {
29806
29809
if (This != Wildcard) {
29807
29810
// For non-wildcard value, check if it can pair with an exisiting
29808
29811
// unpaired value from UnpairedInputs, if so, swap with the unpaired
29809
29812
// value's neighbor, otherwise the current value is added to the map.
29810
- if (auto [MapIt, Inserted] = UnpairedInputs.try_emplace(This, std::make_pair(Other, OtherIndex)); !Inserted) {
29813
+ if (auto [MapIt, Inserted] = UnpairedInputs.try_emplace(
29814
+ This, std::make_pair(Other, OtherIndex));
29815
+ !Inserted) {
29811
29816
auto [SwapValue, SwapIndex] = MapIt->second;
29812
- std::swap(Permutation[OutputOffset + SwapIndex], Permutation[OutputOffset + ThisIndex]);
29817
+ std::swap(Permutation[OutputOffset + SwapIndex],
29818
+ Permutation[OutputOffset + ThisIndex]);
29813
29819
This = SwapValue;
29814
29820
UnpairedInputs.erase(MapIt);
29815
29821
@@ -29831,7 +29837,9 @@ static bool PermuteAndPairVector(const InputTy& Inputs,
29831
29837
UnpairedInputs[This] = std::make_pair(Other, OtherIndex);
29832
29838
}
29833
29839
// If its neighbor is also in UnpairedInputs, update its info too.
29834
- if (auto OtherMapIt = UnpairedInputs.find(Other); OtherMapIt != UnpairedInputs.end() && OtherMapIt->second.second == ThisIndex) {
29840
+ if (auto OtherMapIt = UnpairedInputs.find(Other);
29841
+ OtherMapIt != UnpairedInputs.end() &&
29842
+ OtherMapIt->second.second == ThisIndex) {
29835
29843
OtherMapIt->second.first = This;
29836
29844
}
29837
29845
}
@@ -29849,11 +29857,12 @@ static bool PermuteAndPairVector(const InputTy& Inputs,
29849
29857
// Now check if each remaining unpaired neighboring values can be swapped with
29850
29858
// a wildcard pair to form two paired values.
29851
29859
for (auto &[Unpaired, V] : UnpairedInputs) {
29852
- auto [Neighbor, NeighborIndex] = V;
29860
+ auto [Neighbor, NeighborIndex] = V;
29853
29861
if (Neighbor != Wildcard) {
29854
29862
assert(UnpairedInputs.count(Neighbor));
29855
29863
if (WildcardPairs.size()) {
29856
- std::swap(Permutation[OutputOffset + WildcardPairs.back()], Permutation[OutputOffset + NeighborIndex]);
29864
+ std::swap(Permutation[OutputOffset + WildcardPairs.back()],
29865
+ Permutation[OutputOffset + NeighborIndex]);
29857
29866
WildcardPairs.pop_back();
29858
29867
// Mark the neighbor as processed.
29859
29868
UnpairedInputs[Neighbor].first = Wildcard;
@@ -30151,8 +30160,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
30151
30160
// (shift (shuffle X P1) S1) ->
30152
30161
// (shuffle (shift (shuffle X (shuffle P2 P1)) S2) P2^-1) where S2 can be
30153
30162
// widened, and P2^-1 is the inverse shuffle of P2.
30154
- if (ConstantAmt && (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) && R.hasOneUse()
30155
- && Subtarget.hasSSE3() && !Subtarget.hasAVX512()) {
30163
+ if (ConstantAmt &&
30164
+ (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) &&
30165
+ R.hasOneUse() && Subtarget.hasSSE3() && !Subtarget.hasAVX512()) {
30156
30166
constexpr size_t LaneBytes = 16;
30157
30167
const size_t NumLanes = VT.getVectorNumElements() / LaneBytes;
30158
30168
@@ -30169,7 +30179,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
30169
30179
// if so, this transformation may be profitable.
30170
30180
bool Profitable;
30171
30181
for (size_t I = 0; I < NumLanes; ++I) {
30172
- if (!(Profitable = PermuteAndPairVector(ArrayRef(&ShiftAmt[I * LaneBytes], LaneBytes), Permutation)))
30182
+ if (!(Profitable = PermuteAndPairVector(
30183
+ ArrayRef(&ShiftAmt[I * LaneBytes], LaneBytes), Permutation)))
30173
30184
break;
30174
30185
}
30175
30186
@@ -30187,7 +30198,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
30187
30198
}
30188
30199
SmallVector<int, 32> Permutation2;
30189
30200
for (size_t I = 0; I < NumLanes; ++I) {
30190
- if (!(IsAdjacentQuads = PermuteAndPairVector(ArrayRef(&EveryOtherShiftAmt[I * LaneBytes / 2], LaneBytes / 2), Permutation2)))
30201
+ if (!(IsAdjacentQuads = PermuteAndPairVector(
30202
+ ArrayRef(&EveryOtherShiftAmt[I * LaneBytes / 2],
30203
+ LaneBytes / 2),
30204
+ Permutation2)))
30191
30205
break;
30192
30206
}
30193
30207
if (IsAdjacentQuads) {
@@ -30235,7 +30249,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
30235
30249
if (!IsAdjacentQuads || (VT == MVT::v64i8 && Opc == ISD::SHL))
30236
30250
Profitable = false;
30237
30251
} else {
30238
- if (Opc == ISD::SHL || ((VT == MVT::v16i8 || VT == MVT::v32i8) && Opc == ISD::SRL))
30252
+ if (Opc == ISD::SHL ||
30253
+ ((VT == MVT::v16i8 || VT == MVT::v32i8) && Opc == ISD::SRL))
30239
30254
Profitable = false;
30240
30255
}
30241
30256
}
0 commit comments