@@ -818,6 +818,105 @@ struct InstructionsState {
818
818
819
819
} // end anonymous namespace
820
820
821
+ struct InterchangeableInstruction {
822
+ unsigned Opcode;
823
+ SmallVector<Value *> Ops;
824
+ template <class... ArgTypes>
825
+ InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args)
826
+ : Opcode(Opcode), Ops{std::forward<decltype(Args)>(Args)...} {}
827
+ };
828
+
829
+ bool operator<(const InterchangeableInstruction &LHS,
830
+ const InterchangeableInstruction &RHS) {
831
+ return LHS.Opcode < RHS.Opcode;
832
+ }
833
+
834
+ /// \returns a list of interchangeable instructions which \p I can be converted
835
+ /// to.
836
+ /// e.g.,
837
+ /// x << y -> x * (2^y)
838
+ /// x << 1 -> x * 2
839
+ /// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0
840
+ /// x * 0 -> x & 0
841
+ /// x * -1 -> 0 - x
842
+ /// TODO: support more patterns
843
+ static SmallVector<InterchangeableInstruction, 6>
844
+ getInterchangeableInstruction(Instruction *I) {
845
+ // PII = Possible Interchangeable Instruction
846
+ SmallVector<InterchangeableInstruction, 6> PII;
847
+ unsigned Opcode = I->getOpcode();
848
+ PII.emplace_back(Opcode, I->operands());
849
+ if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub,
850
+ Instruction::Add},
851
+ Opcode))
852
+ return PII;
853
+ Constant *C;
854
+ if (match(I, m_BinOp(m_Value(), m_Constant(C)))) {
855
+ ConstantInt *V = nullptr;
856
+ if (auto *CI = dyn_cast<ConstantInt>(C)) {
857
+ V = CI;
858
+ } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
859
+ if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
860
+ V = CI;
861
+ }
862
+ if (!V)
863
+ return PII;
864
+ Value *Op0 = I->getOperand(0);
865
+ Type *Op1Ty = I->getOperand(1)->getType();
866
+ const APInt &Op1Int = V->getValue();
867
+ Constant *Zero =
868
+ ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth()));
869
+ Constant *UnsignedMax =
870
+ ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth()));
871
+ switch (Opcode) {
872
+ case Instruction::Shl: {
873
+ PII.emplace_back(Instruction::Mul, Op0,
874
+ ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue()));
875
+ if (Op1Int.isZero()) {
876
+ PII.emplace_back(Instruction::Sub, Op0, Zero);
877
+ PII.emplace_back(Instruction::Add, Op0, Zero);
878
+ PII.emplace_back(Instruction::And, Op0, UnsignedMax);
879
+ PII.emplace_back(Instruction::Or, Op0, Zero);
880
+ }
881
+ break;
882
+ }
883
+ case Instruction::Mul: {
884
+ switch (Op1Int.getSExtValue()) {
885
+ case 1:
886
+ PII.emplace_back(Instruction::Sub, Op0, Zero);
887
+ PII.emplace_back(Instruction::Add, Op0, Zero);
888
+ PII.emplace_back(Instruction::And, Op0, UnsignedMax);
889
+ PII.emplace_back(Instruction::Or, Op0, Zero);
890
+ break;
891
+ case 0:
892
+ PII.emplace_back(Instruction::And, Op0, Zero);
893
+ break;
894
+ case -1:
895
+ PII.emplace_back(Instruction::Sub, Zero, Op0);
896
+ break;
897
+ }
898
+ break;
899
+ }
900
+ case Instruction::Sub:
901
+ if (Op1Int.isZero()) {
902
+ PII.emplace_back(Instruction::Add, Op0, Zero);
903
+ PII.emplace_back(Instruction::And, Op0, UnsignedMax);
904
+ PII.emplace_back(Instruction::Or, Op0, Zero);
905
+ }
906
+ break;
907
+ case Instruction::Add:
908
+ if (Op1Int.isZero()) {
909
+ PII.emplace_back(Instruction::And, Op0, UnsignedMax);
910
+ PII.emplace_back(Instruction::Or, Op0, Zero);
911
+ }
912
+ break;
913
+ }
914
+ }
915
+ // std::set_intersection requires a sorted range.
916
+ sort(PII);
917
+ return PII;
918
+ }
919
+
821
920
/// \returns true if \p Opcode is allowed as part of the main/alternate
822
921
/// instruction for SLP vectorization.
823
922
///
@@ -922,18 +1021,54 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
922
1021
if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty())
923
1022
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
924
1023
}
1024
+ // Currently, this is only used for binary ops.
1025
+ // TODO: support all instructions
1026
+ SmallVector<InterchangeableInstruction> InterchangeableOpcode =
1027
+ getInterchangeableInstruction(cast<Instruction>(VL[BaseIndex]));
1028
+ SmallVector<InterchangeableInstruction> AlternateInterchangeableOpcode;
1029
+ auto UpdateInterchangeableOpcode =
1030
+ [](SmallVector<InterchangeableInstruction> &LHS,
1031
+ ArrayRef<InterchangeableInstruction> RHS) {
1032
+ SmallVector<InterchangeableInstruction> NewInterchangeableOpcode;
1033
+ std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
1034
+ std::back_inserter(NewInterchangeableOpcode));
1035
+ if (NewInterchangeableOpcode.empty())
1036
+ return false;
1037
+ LHS = std::move(NewInterchangeableOpcode);
1038
+ return true;
1039
+ };
925
1040
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
926
1041
auto *I = cast<Instruction>(VL[Cnt]);
927
1042
unsigned InstOpcode = I->getOpcode();
928
1043
if (IsBinOp && isa<BinaryOperator>(I)) {
929
- if (InstOpcode == Opcode || InstOpcode == AltOpcode)
1044
+ SmallVector<InterchangeableInstruction> ThisInterchangeableOpcode(
1045
+ getInterchangeableInstruction(I));
1046
+ if (UpdateInterchangeableOpcode(InterchangeableOpcode,
1047
+ ThisInterchangeableOpcode))
930
1048
continue;
931
- if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
932
- isValidForAlternation(Opcode)) {
933
- AltOpcode = InstOpcode;
934
- AltIndex = Cnt;
1049
+ if (AlternateInterchangeableOpcode.empty()) {
1050
+ InterchangeableOpcode.erase(
1051
+ std::remove_if(InterchangeableOpcode.begin(),
1052
+ InterchangeableOpcode.end(),
1053
+ [](const InterchangeableInstruction &I) {
1054
+ return !isValidForAlternation(I.Opcode);
1055
+ }),
1056
+ InterchangeableOpcode.end());
1057
+ ThisInterchangeableOpcode.erase(
1058
+ std::remove_if(ThisInterchangeableOpcode.begin(),
1059
+ ThisInterchangeableOpcode.end(),
1060
+ [](const InterchangeableInstruction &I) {
1061
+ return !isValidForAlternation(I.Opcode);
1062
+ }),
1063
+ ThisInterchangeableOpcode.end());
1064
+ if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty())
1065
+ return InstructionsState(VL[BaseIndex], nullptr, nullptr);
1066
+ AlternateInterchangeableOpcode = std::move(ThisInterchangeableOpcode);
935
1067
continue;
936
1068
}
1069
+ if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode,
1070
+ ThisInterchangeableOpcode))
1071
+ continue;
937
1072
} else if (IsCastOp && isa<CastInst>(I)) {
938
1073
Value *Op0 = IBase->getOperand(0);
939
1074
Type *Ty0 = Op0->getType();
@@ -1027,6 +1162,22 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
1027
1162
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
1028
1163
}
1029
1164
1165
+ if (IsBinOp) {
1166
+ auto FindOp =
1167
+ [&](const SmallVector<InterchangeableInstruction> &CandidateOp) {
1168
+ for (Value *V : VL)
1169
+ for (const InterchangeableInstruction &I : CandidateOp)
1170
+ if (cast<Instruction>(V)->getOpcode() == I.Opcode)
1171
+ return cast<Instruction>(V);
1172
+ llvm_unreachable(
1173
+ "Cannot find the candidate instruction for InstructionsState.");
1174
+ };
1175
+ Instruction *MainOp = FindOp(InterchangeableOpcode);
1176
+ Instruction *AltOp = AlternateInterchangeableOpcode.empty()
1177
+ ? MainOp
1178
+ : FindOp(AlternateInterchangeableOpcode);
1179
+ return InstructionsState(VL[BaseIndex], MainOp, AltOp);
1180
+ }
1030
1181
return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
1031
1182
cast<Instruction>(VL[AltIndex]));
1032
1183
}
@@ -2318,24 +2469,41 @@ class BoUpSLP {
2318
2469
: cast<Instruction>(VL[0])->getNumOperands();
2319
2470
OpsVec.resize(NumOperands);
2320
2471
unsigned NumLanes = VL.size();
2321
- for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
2472
+ InstructionsState S = getSameOpcode(VL, TLI);
2473
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
2322
2474
OpsVec[OpIdx].resize(NumLanes);
2323
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
2324
- assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
2325
- // Our tree has just 3 nodes: the root and two operands.
2326
- // It is therefore trivial to get the APO. We only need to check the
2327
- // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
2328
- // RHS operand. The LHS operand of both add and sub is never attached
2329
- // to an inversese operation in the linearized form, therefore its APO
2330
- // is false. The RHS is true only if VL[Lane] is an inverse operation.
2331
-
2332
- // Since operand reordering is performed on groups of commutative
2333
- // operations or alternating sequences (e.g., +, -), we can safely
2334
- // tell the inverse operations by checking commutativity.
2335
- bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
2475
+ for (auto [I, V] : enumerate(VL)) {
2476
+ assert(isa<Instruction>(V) && "Expected instruction");
2477
+ SmallVector<InterchangeableInstruction> IIList =
2478
+ getInterchangeableInstruction(cast<Instruction>(V));
2479
+ Value *SelectedOp;
2480
+ auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
2481
+ return II.Opcode == S.MainOp->getOpcode();
2482
+ });
2483
+ if (Iter == IIList.end()) {
2484
+ Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
2485
+ return II.Opcode == S.AltOp->getOpcode();
2486
+ });
2487
+ SelectedOp = S.AltOp;
2488
+ } else {
2489
+ SelectedOp = S.MainOp;
2490
+ }
2491
+ assert(Iter != IIList.end() &&
2492
+ "Cannot find an interchangeable instruction.");
2493
+ // Our tree has just 3 nodes: the root and two operands.
2494
+ // It is therefore trivial to get the APO. We only need to check the
2495
+ // opcode of V and whether the operand at OpIdx is the LHS or RHS
2496
+ // operand. The LHS operand of both add and sub is never attached to an
2497
+ // inversese operation in the linearized form, therefore its APO is
2498
+ // false. The RHS is true only if V is an inverse operation.
2499
+
2500
+ // Since operand reordering is performed on groups of commutative
2501
+ // operations or alternating sequences (e.g., +, -), we can safely
2502
+ // tell the inverse operations by checking commutativity.
2503
+ bool IsInverseOperation = !isCommutative(cast<Instruction>(SelectedOp));
2504
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
2336
2505
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
2337
- OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
2338
- APO, false};
2506
+ OpsVec[OpIdx][I] = {Iter->Ops[OpIdx], APO, false};
2339
2507
}
2340
2508
}
2341
2509
}
@@ -3227,15 +3395,25 @@ class BoUpSLP {
3227
3395
auto *I0 = cast<Instruction>(Scalars[0]);
3228
3396
Operands.resize(I0->getNumOperands());
3229
3397
unsigned NumLanes = Scalars.size();
3230
- for ( unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3231
- OpIdx != NumOperands; ++OpIdx) {
3398
+ unsigned NumOperands = I0->getNumOperands();
3399
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
3232
3400
Operands[OpIdx].resize(NumLanes);
3233
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3234
- auto *I = cast<Instruction>(Scalars[Lane]);
3235
- assert(I->getNumOperands() == NumOperands &&
3236
- "Expected same number of operands");
3237
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3238
- }
3401
+ for (auto [I, V] : enumerate(Scalars)) {
3402
+ SmallVector<InterchangeableInstruction> IIList =
3403
+ getInterchangeableInstruction(cast<Instruction>(V));
3404
+ auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
3405
+ return II.Opcode == MainOp->getOpcode();
3406
+ });
3407
+ if (Iter == IIList.end())
3408
+ Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
3409
+ return II.Opcode == AltOp->getOpcode();
3410
+ });
3411
+ assert(Iter != IIList.end() &&
3412
+ "Cannot find an interchangeable instruction.");
3413
+ assert(Iter->Ops.size() == NumOperands &&
3414
+ "Expected same number of operands");
3415
+ for (auto [J, Op] : enumerate(Iter->Ops))
3416
+ Operands[J][I] = Op;
3239
3417
}
3240
3418
}
3241
3419
0 commit comments