Skip to content

Commit 8826e81

Browse files
committed
!fixup address lates comments, thanks!
1 parent a4bc0ba commit 8826e81

File tree

3 files changed

+26
-10
lines changed

3 files changed

+26
-10
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ class VPLane {
168168
static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
169169

170170
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
171+
assert(Offset <= VF.getKnownMinValue() &&
172+
"trying to extract with invalid offset");
171173
unsigned LaneOffset = VF.getKnownMinValue() - Offset;
172174
Kind LaneKind;
173175
if (VF.isScalable())
@@ -1186,6 +1188,10 @@ class VPInstruction : public VPRecipeWithIRFlags {
11861188
BranchOnCount,
11871189
BranchOnCond,
11881190
ComputeReductionResult,
1191+
// Takes the VPValue to extract from as first operand and the lane to
1192+
// extract from as second operand. The second operand must be a constant and
1193+
// <= VF when extracting from a vector or <= UF when extracting from a
1194+
// scalar.
11891195
ExtractFromEnd,
11901196
LogicalAnd, // Non-poison propagating logical And.
11911197
// Add an offset in bytes (second operand) to a base pointer (first
@@ -1224,6 +1230,10 @@ class VPInstruction : public VPRecipeWithIRFlags {
12241230
/// value for lane \p Lane.
12251231
Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
12261232

1233+
/// Returns true if this VPInstruction converts a vector value to a scalar,
1234+
/// e.g. by performing a reduction or extracting a lane.
1235+
bool isVectorToScalar() const;
1236+
12271237
#if !defined(NDEBUG)
12281238
/// Return true if the VPInstruction is a floating point math operation, i.e.
12291239
/// has fast-math flags.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
4545
CachedTypes[OtherV] = ResTy;
4646
return ResTy;
4747
}
48+
case VPInstruction::ExtractFromEnd: {
49+
Type *BaseTy = inferScalarType(R->getOperand(0));
50+
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
51+
return VecTy->getElementType();
52+
return BaseTy;
53+
}
4854
case VPInstruction::Not: {
4955
Type *ResTy = inferScalarType(R->getOperand(0));
5056
assert(IntegerType::get(Ctx, 1) == ResTy &&

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -294,14 +294,13 @@ bool VPInstruction::doesGeneratePerAllLanes() const {
294294
bool VPInstruction::canGenerateScalarForFirstLane() const {
295295
if (Instruction::isBinaryOp(getOpcode()))
296296
return true;
297-
297+
if (isVectorToScalar())
298+
return true;
298299
switch (Opcode) {
299300
case VPInstruction::BranchOnCond:
300301
case VPInstruction::BranchOnCount:
301302
case VPInstruction::CalculateTripCountMinusVF:
302303
case VPInstruction::CanonicalIVIncrementForPart:
303-
case VPInstruction::ComputeReductionResult:
304-
case VPInstruction::ExtractFromEnd:
305304
case VPInstruction::PtrAdd:
306305
case VPInstruction::ExplicitVectorLength:
307306
return true;
@@ -567,17 +566,15 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
567566
auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
568567
unsigned Offset = CI->getZExtValue();
569568

570-
// Extract lane VF - Offset in the from the operand.
571569
Value *Res;
572570
if (State.VF.isVector()) {
571+
// Extract lane VF - Offset from the operand.
573572
Res = State.get(
574573
getOperand(0),
575574
VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset)));
576575
} else {
577576
assert(State.UF > 1 && "VF and UF cannot both be 1");
578-
// When loop is unrolled without vectorizing, retrieve the value just
579-
// prior to the final unrolled value. This is analogous to the vectorized
580-
// case above: extracting the second last element when VF > 1.
577+
// When loop is unrolled without vectorizing, retrieve UF - Offset.
581578
Res = State.get(getOperand(0), State.UF - Offset);
582579
}
583580
Res->setName(Name);
@@ -600,6 +597,11 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
600597
}
601598
}
602599

600+
bool VPInstruction::isVectorToScalar() const {
601+
return getOpcode() == VPInstruction::ExtractFromEnd ||
602+
getOpcode() == VPInstruction::ComputeReductionResult;
603+
}
604+
603605
#if !defined(NDEBUG)
604606
bool VPInstruction::isFPMathOp() const {
605607
// Inspired by FPMathOperator::classof. Notable differences are that we don't
@@ -622,9 +624,7 @@ void VPInstruction::execute(VPTransformState &State) {
622624
State.setDebugLocFrom(getDebugLoc());
623625
bool GeneratesPerFirstLaneOnly =
624626
canGenerateScalarForFirstLane() &&
625-
(vputils::onlyFirstLaneUsed(this) ||
626-
getOpcode() == VPInstruction::ExtractFromEnd ||
627-
getOpcode() == VPInstruction::ComputeReductionResult);
627+
(vputils::onlyFirstLaneUsed(this) || isVectorToScalar());
628628
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
629629
for (unsigned Part = 0; Part < State.UF; ++Part) {
630630
if (GeneratesPerAllLanes) {

0 commit comments

Comments
 (0)