Skip to content

Commit c4c2d77

Browse files
committed
[VPlan] Fix handling of ReductionStartVector for rdxs when unrolling.
Update handling of ReductionStartVector in VPlanUnroll for partial reductions. The new code makes sure all parts are properly set to the cloned ReductionStartVector. Fixes a mis-compile reported for #142290.
1 parent e478a22 commit c4c2d77

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,14 +231,19 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
231231
if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
232232
assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
233233
"unexpected start VPInstruction");
234+
if (Part != 1)
235+
continue;
236+
VPValue *StartV;
234237
if (match(VPI->getOperand(2), m_SpecificInt(1))) {
235-
Copy->setOperand(0, VPI->getOperand(1));
236-
} else if (Part == 1) {
238+
StartV = VPI->getOperand(1);
239+
} else {
237240
auto *C = VPI->clone();
238241
C->setOperand(0, C->getOperand(1));
239242
C->insertAfter(VPI);
240-
addUniformForAllParts(C);
243+
StartV = C;
241244
}
245+
for (unsigned Part = 1; Part != UF; ++Part)
246+
VPV2Parts[VPI][Part - 1] = StartV;
242247
}
243248
Copy->addOperand(getConstantVPV(Part));
244249
} else {

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star
1818
; IC2: [[VECTOR_BODY]]:
1919
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
2020
; IC2-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ]
21-
; IC2-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], %[[VECTOR_BODY]] ]
21+
; IC2-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], %[[VECTOR_BODY]] ]
2222
; IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
2323
; IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
2424
; IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16
@@ -70,9 +70,9 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star
7070
; IC4: [[VECTOR_BODY]]:
7171
; IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
7272
; IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ]
73-
; IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE7:%.*]], %[[VECTOR_BODY]] ]
74-
; IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], %[[VECTOR_BODY]] ]
75-
; IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], %[[VECTOR_BODY]] ]
73+
; IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE7:%.*]], %[[VECTOR_BODY]] ]
74+
; IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], %[[VECTOR_BODY]] ]
75+
; IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], %[[VECTOR_BODY]] ]
7676
; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
7777
; IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
7878
; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16

0 commit comments

Comments
 (0)