Skip to content

Commit 14df426

Browse files
committed
[VPlan] Simplify select c, x, x -> x, add m_Deferred
As noted in 1a9358c, some simplifications can produce a redundant select where the true and false operands are the same. This simplifies these selects and adds m_Deferred to VPlanPatternMatch (just copied over from IR/PatternMatch) to simplify the pattern matching. Some upcoming patches I plan on posting can also be made cleaner with m_Deferred. The is_fpclass was changed so the condition wasn't made dead. I didn't notice any legacy-vplan cost model mismatches on llvm-test-suite or SPEC with this patch.
1 parent b82fd71 commit 14df426

File tree

5 files changed

+34
-17
lines changed

5 files changed

+34
-17
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,27 @@ struct specificval_ty {
6666

6767
inline specificval_ty m_Specific(const VPValue *VPV) { return VPV; }
6868

69+
/// Stores a reference to the VPValue *, not the VPValue * itself,
70+
/// thus can be used in commutative matchers.
71+
template <typename Class> struct deferredval_ty {
72+
Class *const &Val;
73+
74+
deferredval_ty(Class *const &V) : Val(V) {}
75+
76+
template <typename ITy> bool match(ITy *const V) { return V == Val; }
77+
};
78+
79+
/// Like m_Specific(), but works if the specific value to match is determined
80+
/// as part of the same match() expression. For example:
81+
/// m_Mul(m_VPValue(X), m_Specific(X)) is incorrect, because m_Specific() will
82+
/// bind X before the pattern match starts.
83+
/// m_Mul(m_VPValue(X), m_Deferred(X)) is correct, and will check against
84+
/// whichever value m_VPValue(X) populated.
85+
inline deferredval_ty<VPValue> m_Deferred(VPValue *const &V) { return V; }
86+
inline deferredval_ty<const VPValue> m_Deferred(const VPValue *const &V) {
87+
return V;
88+
}
89+
6990
/// Match a specified integer value or vector of all elements of that
7091
/// value. \p BitWidth optionally specifies the bitwidth the matched constant
7192
/// must have. If it is 0, the matched constant can have any bitwidth.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10591059
return;
10601060
}
10611061

1062+
if (match(&R, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) {
1063+
R.getVPSingleValue()->replaceAllUsesWith(X);
1064+
R.eraseFromParent();
1065+
return;
1066+
}
1067+
10621068
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
10631069
return R.getVPSingleValue()->replaceAllUsesWith(A);
10641070

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,16 +1046,11 @@ define i64 @live_in_known_1_via_scev() {
10461046
; CHECK: vector.body:
10471047
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
10481048
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
1049-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
1050-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1051-
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
1052-
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 5)
1053-
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_PHI]]
10541049
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
10551050
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
10561051
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
10571052
; CHECK: middle.block:
1058-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[TMP1]])
1053+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]])
10591054
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
10601055
; CHECK: scalar.ph:
10611056
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[PH]] ]

llvm/test/Transforms/LoopVectorize/is_fpclass.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ define void @d() {
99
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1010
; CHECK: vector.body:
1111
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
12-
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr null, align 4
13-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
12+
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr null, align 4
13+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
1414
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
1515
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr @d, i64 [[INDEX]]
1616
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> [[BROADCAST_SPLAT]], i32 0)
17-
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x float> zeroinitializer, <2 x float> zeroinitializer
17+
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x float> zeroinitializer, <2 x float> splat (float 1.000000e+00)
1818
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
1919
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[TMP4]], align 4
2020
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -30,7 +30,7 @@ define void @d() {
3030
; CHECK-NEXT: [[I3:%.*]] = load float, ptr null, align 4
3131
; CHECK-NEXT: [[I4:%.*]] = getelementptr float, ptr @d, i64 [[I]]
3232
; CHECK-NEXT: [[I5:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[I3]], i32 0)
33-
; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], float 0.000000e+00, float 0.000000e+00
33+
; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], float 0.000000e+00, float 1.000000e+00
3434
; CHECK-NEXT: store float [[I6]], ptr [[I4]], align 4
3535
; CHECK-NEXT: [[I7]] = add i64 [[I]], 1
3636
; CHECK-NEXT: [[I8:%.*]] = icmp eq i64 [[I7]], 128
@@ -46,7 +46,7 @@ loop:
4646
%i3 = load float, ptr null, align 4
4747
%i4 = getelementptr float, ptr @d, i64 %i
4848
%i5 = tail call i1 @llvm.is.fpclass.f32(float %i3, i32 0)
49-
%i6 = select i1 %i5, float 0.0, float 0.0
49+
%i6 = select i1 %i5, float 0.0, float 1.0
5050
store float %i6, ptr %i4, align 4
5151
%i7 = add i64 %i, 1
5252
%i8 = icmp eq i64 %i7, 128

llvm/test/Transforms/LoopVectorize/select-reduction.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,16 +88,11 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) {
8888
; CHECK: vector.body:
8989
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
9090
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
91-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
92-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
93-
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
94-
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 12)
95-
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_PHI]], <4 x i32> [[VEC_PHI]]
9691
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
9792
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
9893
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
9994
; CHECK: middle.block:
100-
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1]])
95+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_PHI]])
10196
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
10297
; CHECK: scalar.ph:
10398
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -4, [[MIDDLE_BLOCK]] ], [ 12, [[ENTRY:%.*]] ]

0 commit comments

Comments
 (0)