Skip to content

Commit 1730329

Browse files
committed
Revert "[VPlan] Mark Select VPInstructions as not having sideeffects."
This reverts commit 19918ac. Fixes #75298. There is still a case where we miss the correct users outside the main vector loop for reductions, and that is tail-folded loops with reductions where the final value is stored after the loop. This should be handled explicitly in #70253
1 parent 8d893f2 commit 1730329

File tree

3 files changed

+22
-12
lines changed

3 files changed

+22
-12
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
118118
case VPInstructionSC:
119119
switch (cast<VPInstruction>(this)->getOpcode()) {
120120
case Instruction::ICmp:
121-
case Instruction::Select:
122121
case VPInstruction::Not:
123122
case VPInstruction::CalculateTripCountMinusVF:
124123
case VPInstruction::CanonicalIVIncrementForPart:

llvm/test/Transforms/LoopVectorize/reduction-small-size.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
1111
; CHECK: vector.ph:
1212
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
1313
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
14+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0
15+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
1416
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
1517
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
1618
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1719
; CHECK: vector.body:
1820
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1921
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
22+
; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2023
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
2124
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]]
2225
; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8>
@@ -37,7 +40,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
3740
; CHECK: for.body:
3841
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
3942
; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[R_NEXT:%.*]], [[IF_END]] ]
40-
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END]]
43+
; CHECK-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
4144
; CHECK: if.then:
4245
; CHECK-NEXT: [[T0:%.*]] = sdiv i32 undef, undef
4346
; CHECK-NEXT: br label [[IF_END]]

llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,32 @@ define void @pr75298_store_reduction_value_in_folded_loop(i64 %iv.start) optsize
2020
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
2121
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
2222
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_START]], [[N_VEC]]
23+
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
24+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
25+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
2326
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2427
; CHECK: vector.body:
2528
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
26-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
27-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @c, align 4
28-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
29-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
30-
; CHECK-NEXT: [[TMP2]] = xor <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]]
29+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
30+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
31+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
32+
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
33+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
34+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @c, align 4
35+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
36+
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
37+
; CHECK-NEXT: [[TMP3]] = xor <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT4]]
38+
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
3139
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
32-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
33-
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
40+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
41+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3442
; CHECK: middle.block:
35-
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP2]])
36-
; CHECK-NEXT: store i32 [[TMP4]], ptr @a, align 4
43+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]])
44+
; CHECK-NEXT: store i32 [[TMP6]], ptr @a, align 4
3745
; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3846
; CHECK: scalar.ph:
3947
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[PH]] ]
40-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PH]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
48+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
4149
; CHECK-NEXT: br label [[LOOP:%.*]]
4250
; CHECK: loop:
4351
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

0 commit comments

Comments
 (0)