Skip to content

Commit 63c3c28

Browse files
author
git apple-llvm automerger
committed
Merge commit '373180b440d0' from llvm.org/release/19.x into stable/20240723
2 parents d375285 + 373180b commit 63c3c28

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(
1521115211
if (any_of(E.Scalars, [&](Value *V) {
1521215212
return !all_of(V->users(), [=](User *U) {
1521315213
return getTreeEntry(U) ||
15214-
(UserIgnoreList && UserIgnoreList->contains(U)) ||
15214+
(E.Idx == 0 && UserIgnoreList &&
15215+
UserIgnoreList->contains(U)) ||
1521515216
(!isa<CmpInst>(U) && U->getType()->isSized() &&
1521615217
!U->getType()->isScalableTy() &&
1521715218
DL->getTypeSizeInBits(U->getType()) <= BitWidth);
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s -slp-threshold=-10 | FileCheck %s
3+
4+
define i64 @src(i32 %a) {
5+
; CHECK-LABEL: define i64 @src(
6+
; CHECK-SAME: i32 [[A:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[A]] to i64
9+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
11+
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
12+
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], <i64 4294967297, i64 4294967297, i64 4294967297, i64 4294967297>
13+
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], <i64 1, i64 1, i64 1, i64 1>
14+
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
15+
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
16+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0
17+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP18]], i32 1
18+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> <i64 poison, i64 4294967297>, i64 [[TMP17]], i32 0
19+
; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP9]], [[TMP10]]
20+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
21+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
22+
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP12]], [[TMP13]]
23+
; CHECK-NEXT: ret i64 [[TMP21]]
24+
;
25+
entry:
26+
%0 = sext i32 %a to i64
27+
%1 = add nsw i64 %0, 4294967297
28+
%2 = sext i32 %a to i64
29+
%3 = add nsw i64 %2, 4294967297
30+
%4 = add i64 %3, %1
31+
%5 = and i64 %3, 1
32+
%6 = add i64 %4, %5
33+
%7 = sext i32 %a to i64
34+
%8 = add nsw i64 %7, 4294967297
35+
%9 = add i64 %8, %6
36+
%10 = and i64 %8, 1
37+
%11 = add i64 %9, %10
38+
%12 = sext i32 %a to i64
39+
%13 = add nsw i64 %12, 4294967297
40+
%14 = add i64 %13, %11
41+
%15 = and i64 %13, 1
42+
%16 = add i64 %14, %15
43+
%17 = sext i32 %a to i64
44+
%18 = add nsw i64 %17, 4294967297
45+
%19 = add i64 %18, %16
46+
%20 = and i64 %18, 1
47+
%21 = add i64 %19, %20
48+
ret i64 %21
49+
}

0 commit comments

Comments
 (0)