Skip to content

Commit f758bb6

Browse files
committed
[SLP]Fix PR89988: do extra analysis of the icmp args to correctly handle signed/unsigned comparison.
If operands of icmp has different signedness, need to consider extending unsigned operands to correctly handle comparison with the signed operands.
1 parent bef6687 commit f758bb6

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15072,11 +15072,16 @@ void BoUpSLP::computeMinimumValueSizes() {
1507215072
IsSignedCmp =
1507315073
NodeIdx < VectorizableTree.size() &&
1507415074
any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
15075-
[](const EdgeInfo &EI) {
15075+
[&](const EdgeInfo &EI) {
1507615076
return EI.UserTE->getOpcode() == Instruction::ICmp &&
15077-
any_of(EI.UserTE->Scalars, [](Value *V) {
15077+
any_of(EI.UserTE->Scalars, [&](Value *V) {
1507815078
auto *IC = dyn_cast<ICmpInst>(V);
15079-
return IC && IC->isSigned();
15079+
return IC &&
15080+
(IC->isSigned() ||
15081+
!isKnownNonNegative(IC->getOperand(0),
15082+
SimplifyQuery(*DL)) ||
15083+
!isKnownNonNegative(IC->getOperand(1),
15084+
SimplifyQuery(*DL)));
1508015085
});
1508115086
});
1508215087
}

llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ define i32 @test(ptr %f, i16 %0) {
88
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[F]], align 2
99
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> <i16 0, i16 poison, i16 0, i16 0>, i16 [[TMP0]], i32 1
1010
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> <i16 0, i16 poison, i16 0, i16 0>, i16 [[TMP1]], i32 1
11-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i16> [[TMP3]], [[TMP2]]
11+
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
12+
; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
13+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[TMP6]], [[TMP7]]
1214
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
1315
; CHECK-NEXT: [[ZEXT_4:%.*]] = zext i1 [[TMP5]] to i32
1416
; CHECK-NEXT: ret i32 [[ZEXT_4]]

0 commit comments

Comments
 (0)