Skip to content

Commit dadf6f2

Browse files
authored
[VPlan] Ignore incoming values with constant false mask. (#89384)
Ignore incoming values with constant false masks when trying to simplify VPBlendRecipes. As a follow-on optimization, we should also be able to drop all incoming values with false masks by creating a new VPBlendRecipe with those operands dropped. PR: #89384
1 parent 8ab3caf commit dadf6f2

File tree

3 files changed

+16
-12
lines changed

3 files changed

+16
-12
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ template <typename Class> struct bind_ty {
5151
};
5252

5353
/// Match a specified integer value or vector of all elements of that
54-
/// value.
55-
struct specific_intval {
54+
/// value. \p BitWidth optionally specifies the bitwidth the matched constant
55+
/// must have. If it is 0, the matched constant can have any bitwidth.
56+
template <unsigned BitWidth = 0> struct specific_intval {
5657
APInt Val;
5758

5859
specific_intval(APInt V) : Val(std::move(V)) {}
@@ -66,15 +67,21 @@ struct specific_intval {
6667
if (const auto *C = dyn_cast<Constant>(V))
6768
CI = dyn_cast_or_null<ConstantInt>(
6869
C->getSplatValue(/*AllowPoison=*/false));
70+
if (!CI)
71+
return false;
6972

70-
return CI && APInt::isSameValue(CI->getValue(), Val);
73+
assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
74+
"Trying the match constant with unexpected bitwidth.");
75+
return APInt::isSameValue(CI->getValue(), Val);
7176
}
7277
};
7378

74-
inline specific_intval m_SpecificInt(uint64_t V) {
75-
return specific_intval(APInt(64, V));
79+
inline specific_intval<0> m_SpecificInt(uint64_t V) {
80+
return specific_intval<0>(APInt(64, V));
7681
}
7782

83+
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
84+
7885
/// Matching combinators
7986
template <typename LTy, typename RTy> struct match_combine_or {
8087
LTy L;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -884,18 +884,19 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
884884

885885
/// Try to simplify recipe \p R.
886886
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
887+
using namespace llvm::VPlanPatternMatch;
887888
// Try to remove redundant blend recipes.
888889
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
889890
VPValue *Inc0 = Blend->getIncomingValue(0);
890891
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
891-
if (Inc0 != Blend->getIncomingValue(I))
892+
if (Inc0 != Blend->getIncomingValue(I) &&
893+
!match(Blend->getMask(I), m_False()))
892894
return;
893895
Blend->replaceAllUsesWith(Inc0);
894896
Blend->eraseFromParent();
895897
return;
896898
}
897899

898-
using namespace llvm::VPlanPatternMatch;
899900
VPValue *A;
900901
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
901902
VPValue *Trunc = R.getVPSingleValue();

llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,13 @@ define void @test_not_first_lane_only_constant(ptr %A, ptr noalias %B) {
1010
; CHECK-NEXT: entry:
1111
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1212
; CHECK: vector.ph:
13-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0
14-
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT3]], <4 x ptr> poison, <4 x i32> zeroinitializer
1513
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1614
; CHECK: vector.body:
1715
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1816
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
1917
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
2018
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[TMP0]]
21-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> zeroinitializer, <4 x ptr> poison, <4 x ptr> [[BROADCAST_SPLAT4]]
22-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[PREDPHI]], i32 0
23-
; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 2
19+
; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr %B, align 2
2420
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0
2521
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT5]], <4 x i16> poison, <4 x i32> zeroinitializer
2622
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0

0 commit comments

Comments
 (0)