Skip to content

Commit 082c81a

Browse files
committed
[LV] Properly extend versioned constant strides.
We only version unknown strides to 1. If the original type is i1, then the sign of the extension matters. Properly extend the stride value before replacing it. Fixes #91369.
1 parent 9a28814 commit 082c81a

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8841,8 +8841,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
88418841
VPValue *StrideVPV = Plan->getLiveIn(U);
88428842
if (!StrideVPV)
88438843
continue;
8844-
VPValue *CI = Plan->getOrAddLiveIn(ConstantInt::get(
8845-
U->getType(), ScevStride->getAPInt().getSExtValue()));
8844+
unsigned BW = U->getType()->getScalarSizeInBits();
8845+
APInt C = isa<SExtInst>(U) ? ScevStride->getAPInt().sext(BW)
8846+
: ScevStride->getAPInt().zext(BW);
8847+
VPValue *CI = Plan->getOrAddLiveIn(ConstantInt::get(U->getType(), C));
88468848
StrideVPV->replaceAllUsesWith(CI);
88478849
}
88488850
}

llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,6 @@ exit:
415415

416416
; Test case to make sure that uses of versioned strides of type i1 are properly
417417
; extended. From https://github.com/llvm/llvm-project/issues/91369.
418-
; FIXME: Currently miscompiled.
419418
define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
420419
; CHECK-LABEL: define void @zext_of_i1_stride(
421420
; CHECK-SAME: i1 [[G:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -441,7 +440,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
441440
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], [[TMP2]]
442441
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]]
443442
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
444-
; CHECK-NEXT: store <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, ptr [[TMP5]], align 2
443+
; CHECK-NEXT: store <4 x i16> <i16 1, i16 1, i16 1, i16 1>, ptr [[TMP5]], align 2
445444
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
446445
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
447446
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

0 commit comments

Comments
 (0)