@@ -14097,25 +14097,52 @@ bool BoUpSLP::collectValuesToDemote(
14097
14097
}
14098
14098
return false;
14099
14099
};
14100
- bool NeedToExit = false;
14100
+ auto TryProcessInstruction =
14101
+ [&](Instruction *I, const TreeEntry &ITE, unsigned &BitWidth,
14102
+ ArrayRef<Value *> Operands = std::nullopt,
14103
+ function_ref<bool(unsigned, unsigned)> Checker = {}) {
14104
+ if (Operands.empty()) {
14105
+ if (!IsTruncRoot)
14106
+ MaxDepthLevel = 1;
14107
+ (void)IsPotentiallyTruncated(V, BitWidth);
14108
+ } else {
14109
+ // Several vectorized uses? Check if we can truncate it, otherwise -
14110
+ // exit.
14111
+ if (ITE.UserTreeIndices.size() > 1 &&
14112
+ !IsPotentiallyTruncated(I, BitWidth))
14113
+ return false;
14114
+ bool NeedToExit = false;
14115
+ if (Checker && !AttemptCheckBitwidth(Checker, NeedToExit))
14116
+ return false;
14117
+ if (NeedToExit)
14118
+ return true;
14119
+ if (!ProcessOperands(Operands, NeedToExit))
14120
+ return false;
14121
+ if (NeedToExit)
14122
+ return true;
14123
+ }
14124
+
14125
+ ++MaxDepthLevel;
14126
+ // Gather demoted constant operands.
14127
+ for (unsigned Idx : seq<unsigned>(Start, End))
14128
+ if (isa<Constant>(I->getOperand(Idx)))
14129
+ DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx);
14130
+ // Record the value that we can demote.
14131
+ ToDemote.push_back(V);
14132
+ return IsProfitableToDemote;
14133
+ };
14101
14134
switch (I->getOpcode()) {
14102
14135
14103
14136
// We can always demote truncations and extensions. Since truncations can
14104
14137
// seed additional demotion, we save the truncated value.
14105
14138
case Instruction::Trunc:
14106
- if (!IsTruncRoot)
14107
- MaxDepthLevel = 1;
14108
14139
if (IsProfitableToDemoteRoot)
14109
14140
IsProfitableToDemote = true;
14110
- (void)IsPotentiallyTruncated(V, BitWidth);
14111
- break;
14141
+ return TryProcessInstruction(I, *ITE, BitWidth);
14112
14142
case Instruction::ZExt:
14113
14143
case Instruction::SExt:
14114
- if (!IsTruncRoot)
14115
- MaxDepthLevel = 1;
14116
14144
IsProfitableToDemote = true;
14117
- (void)IsPotentiallyTruncated(V, BitWidth);
14118
- break;
14145
+ return TryProcessInstruction(I, *ITE, BitWidth);
14119
14146
14120
14147
// We can demote certain binary operations if we can demote both of their
14121
14148
// operands.
@@ -14125,140 +14152,83 @@ bool BoUpSLP::collectValuesToDemote(
14125
14152
case Instruction::And:
14126
14153
case Instruction::Or:
14127
14154
case Instruction::Xor: {
14128
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14129
- return false;
14130
- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14131
- return false;
14132
- break;
14155
+ return TryProcessInstruction(I, *ITE, BitWidth,
14156
+ {I->getOperand(0), I->getOperand(1)});
14133
14157
}
14134
14158
case Instruction::Shl: {
14135
- // Several vectorized uses? Check if we can truncate it, otherwise - exit.
14136
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14137
- return false;
14138
14159
// If we are truncating the result of this SHL, and if it's a shift of an
14139
14160
// inrange amount, we can always perform a SHL in a smaller type.
14140
- if (!AttemptCheckBitwidth(
14141
- [&](unsigned BitWidth, unsigned) {
14142
- KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
14143
- return AmtKnownBits.getMaxValue().ult(BitWidth);
14144
- },
14145
- NeedToExit))
14146
- return false;
14147
- if (NeedToExit)
14148
- return true;
14149
- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14150
- return false;
14151
- break;
14161
+ auto ShlChecker = [&](unsigned BitWidth, unsigned) {
14162
+ KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
14163
+ return AmtKnownBits.getMaxValue().ult(BitWidth);
14164
+ };
14165
+ return TryProcessInstruction(
14166
+ I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, ShlChecker);
14152
14167
}
14153
14168
case Instruction::LShr: {
14154
- // Several vectorized uses? Check if we can truncate it, otherwise - exit.
14155
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14156
- return false;
14157
14169
// If this is a truncate of a logical shr, we can truncate it to a smaller
14158
14170
// lshr iff we know that the bits we would otherwise be shifting in are
14159
14171
// already zeros.
14160
- if (!AttemptCheckBitwidth(
14161
- [&](unsigned BitWidth, unsigned OrigBitWidth) {
14162
- KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
14163
- APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
14164
- return AmtKnownBits.getMaxValue().ult(BitWidth) &&
14165
- MaskedValueIsZero(I->getOperand(0), ShiftedBits,
14166
- SimplifyQuery(*DL));
14167
- },
14168
- NeedToExit))
14169
- return false;
14170
- if (NeedToExit)
14171
- return true;
14172
- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14173
- return false;
14174
- break;
14172
+ auto LShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
14173
+ KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
14174
+ APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
14175
+ return AmtKnownBits.getMaxValue().ult(BitWidth) &&
14176
+ MaskedValueIsZero(I->getOperand(0), ShiftedBits,
14177
+ SimplifyQuery(*DL));
14178
+ };
14179
+ return TryProcessInstruction(
14180
+ I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, LShrChecker);
14175
14181
}
14176
14182
case Instruction::AShr: {
14177
- // Several vectorized uses? Check if we can truncate it, otherwise - exit.
14178
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14179
- return false;
14180
14183
// If this is a truncate of an arithmetic shr, we can truncate it to a
14181
14184
// smaller ashr iff we know that all the bits from the sign bit of the
14182
14185
// original type and the sign bit of the truncate type are similar.
14183
- if (!AttemptCheckBitwidth(
14184
- [&](unsigned BitWidth, unsigned OrigBitWidth) {
14185
- KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
14186
- unsigned ShiftedBits = OrigBitWidth - BitWidth;
14187
- return AmtKnownBits.getMaxValue().ult(BitWidth) &&
14188
- ShiftedBits < ComputeNumSignBits(I->getOperand(0), *DL, 0,
14189
- AC, nullptr, DT);
14190
- },
14191
- NeedToExit))
14192
- return false;
14193
- if (NeedToExit)
14194
- return true;
14195
- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14196
- return false;
14197
- break;
14186
+ auto AShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
14187
+ KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
14188
+ unsigned ShiftedBits = OrigBitWidth - BitWidth;
14189
+ return AmtKnownBits.getMaxValue().ult(BitWidth) &&
14190
+ ShiftedBits <
14191
+ ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT);
14192
+ };
14193
+ return TryProcessInstruction(
14194
+ I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, AShrChecker);
14198
14195
}
14199
14196
case Instruction::UDiv:
14200
14197
case Instruction::URem: {
14201
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14202
- return false;
14203
14198
// UDiv and URem can be truncated if all the truncated bits are zero.
14204
- if (!AttemptCheckBitwidth(
14205
- [&](unsigned BitWidth, unsigned OrigBitWidth) {
14206
- assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
14207
- APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
14208
- return MaskedValueIsZero(I->getOperand(0), Mask,
14209
- SimplifyQuery(*DL)) &&
14210
- MaskedValueIsZero(I->getOperand(1), Mask,
14211
- SimplifyQuery(*DL));
14212
- },
14213
- NeedToExit))
14214
- return false;
14215
- if (NeedToExit)
14216
- return true;
14217
- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14218
- return false;
14219
- break;
14199
+ auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
14200
+ assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
14201
+ APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
14202
+ return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
14203
+ MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
14204
+ };
14205
+ return TryProcessInstruction(I, *ITE, BitWidth,
14206
+ {I->getOperand(0), I->getOperand(1)}, Checker);
14220
14207
}
14221
14208
14222
14209
// We can demote selects if we can demote their true and false values.
14223
14210
case Instruction::Select: {
14224
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14225
- return false;
14226
14211
Start = 1;
14227
14212
auto *SI = cast<SelectInst>(I);
14228
- if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14229
- return false;
14230
- break;
14213
+ return TryProcessInstruction(I, *ITE, BitWidth,
14214
+ {SI->getTrueValue(), SI->getFalseValue()});
14231
14215
}
14232
14216
14233
14217
// We can demote phis if we can demote all their incoming operands. Note that
14234
14218
// we don't need to worry about cycles since we ensure single use above.
14235
14219
case Instruction::PHI: {
14236
14220
PHINode *PN = cast<PHINode>(I);
14237
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14238
- return false;
14239
14221
SmallVector<Value *> Ops(PN->incoming_values().begin(),
14240
14222
PN->incoming_values().end());
14241
- if (!ProcessOperands(Ops, NeedToExit))
14242
- return false;
14243
- break;
14223
+ return TryProcessInstruction(I, *ITE, BitWidth, Ops);
14244
14224
}
14245
14225
14246
14226
// Otherwise, conservatively give up.
14247
14227
default:
14248
- MaxDepthLevel = 1;
14249
- return FinalAnalysis();
14228
+ break;
14250
14229
}
14251
- if (NeedToExit)
14252
- return true;
14253
-
14254
- ++MaxDepthLevel;
14255
- // Gather demoted constant operands.
14256
- for (unsigned Idx : seq<unsigned>(Start, End))
14257
- if (isa<Constant>(I->getOperand(Idx)))
14258
- DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx);
14259
- // Record the value that we can demote.
14260
- ToDemote.push_back(V);
14261
- return IsProfitableToDemote;
14230
+ MaxDepthLevel = 1;
14231
+ return FinalAnalysis();
14262
14232
}
14263
14233
14264
14234
void BoUpSLP::computeMinimumValueSizes() {
@@ -14309,7 +14279,8 @@ void BoUpSLP::computeMinimumValueSizes() {
14309
14279
DenseMap<Instruction *, SmallVector<unsigned>> DemotedConsts;
14310
14280
auto ComputeMaxBitWidth = [&](ArrayRef<Value *> TreeRoot, unsigned VF,
14311
14281
bool IsTopRoot, bool IsProfitableToDemoteRoot,
14312
- unsigned Opcode, unsigned Limit, bool IsTruncRoot) {
14282
+ unsigned Opcode, unsigned Limit,
14283
+ bool IsTruncRoot) {
14313
14284
ToDemote.clear();
14314
14285
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
14315
14286
if (!TreeRootIT || !Opcode)
0 commit comments