Skip to content

Commit b6f5178

Browse files
committed
[SLP]Fix signedness analysis for scalars in graph.
Cannot use the sign info for the roots for all scalars in the graph, need to perform the analysis for each particular scalar (tree node).
1 parent ac378ac commit b6f5178

File tree

3 files changed

+23
-8
lines changed

3 files changed

+23
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13182,8 +13182,23 @@ void BoUpSLP::computeMinimumValueSizes() {
1318213182
collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
1318313183

1318413184
// Finally, map the values we can demote to the maximum bit with we computed.
13185-
for (auto *Scalar : ToDemote)
13186-
MinBWs.try_emplace(Scalar, MaxBitWidth, !IsKnownPositive);
13185+
DenseMap<const TreeEntry *, bool> Signendness;
13186+
for (auto *Scalar : ToDemote) {
13187+
bool IsSigned = true;
13188+
if (auto *TE = getTreeEntry(Scalar)) {
13189+
auto It = Signendness.find(TE);
13190+
if (It != Signendness.end()) {
13191+
IsSigned = It->second;
13192+
} else {
13193+
IsSigned = any_of(TE->Scalars, [&](Value *R) {
13194+
KnownBits Known = computeKnownBits(R, *DL);
13195+
return !Known.isNonNegative();
13196+
});
13197+
Signendness.try_emplace(TE, IsSigned);
13198+
}
13199+
}
13200+
MinBWs.try_emplace(Scalar, MaxBitWidth, IsSigned);
13201+
}
1318713202
}
1318813203

1318913204
PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {

llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,9 @@ define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture read
295295
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
296296
; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
297297
; GENERIC-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
298-
; GENERIC-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[TMP0]] to <8 x i32>
298+
; GENERIC-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
299299
; GENERIC-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
300-
; GENERIC-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
300+
; GENERIC-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
301301
; GENERIC-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
302302
; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
303303
; GENERIC-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
@@ -368,9 +368,9 @@ define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture read
368368
; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
369369
; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
370370
; KRYO-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
371-
; KRYO-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[TMP0]] to <8 x i32>
371+
; KRYO-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
372372
; KRYO-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
373-
; KRYO-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
373+
; KRYO-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
374374
; KRYO-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
375375
; KRYO-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
376376
; KRYO-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64

llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,9 +235,9 @@ define void @test_i16_extend(ptr %p.1, ptr %p.2, i32 %idx.i32) {
235235
; CHECK-NEXT: [[T53:%.*]] = getelementptr inbounds i16, ptr [[P_1:%.*]], i64 [[IDX_0]]
236236
; CHECK-NEXT: [[T56:%.*]] = getelementptr inbounds i16, ptr [[P_2:%.*]], i64 [[IDX_0]]
237237
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[T53]], align 2
238-
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32>
238+
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
239239
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr [[T56]], align 2
240-
; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[TMP3]] to <8 x i32>
240+
; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[TMP3]] to <8 x i32>
241241
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP4]]
242242
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP5]], i64 0
243243
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64

0 commit comments

Comments
 (0)