Skip to content

Commit 62b7e35

Browse files
authored
[SystemZ] Don't assert for i128 vectors in getInterleavedMemoryOpCost() (#78009)
This assert does not seem justified given that the LoopVectorizer can form interleave groups containing i128 elements where the number of elements per vector is indeed just one.
1 parent 0ff3d72 commit 62b7e35

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1236,7 +1236,6 @@ InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(
12361236
// dst vector for vperm (S.A.).
12371237
unsigned NumSrcVecs = std::min(NumEltsPerVecReg, Factor);
12381238
unsigned NumDstVecs = NumVectorMemOps;
1239-
assert (NumSrcVecs > 1 && "Expected at least two source vectors.");
12401239
NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs;
12411240
}
12421241

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z16 -passes=loop-vectorize \
3+
; RUN: -debug-only=loop-vectorize -force-vector-width=4 \
4+
; RUN: -disable-output < %s 2>&1 | FileCheck %s
5+
;
6+
; Check cost function for <8 x i128> store interleave group.
7+
8+
; CHECK: LV: Checking a loop in 'fun'
9+
; CHECK: LV: Found an estimated cost of 8 for VF 4 For instruction: store i128 8721036757475490113
10+
11+
define noundef i32 @fun(i32 %argc, ptr nocapture readnone %argv) {
12+
entry:
13+
%l_4774.i = alloca [4 x [2 x i128]], align 8
14+
call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %l_4774.i)
15+
br label %for.cond4.preheader.i
16+
17+
for.cond4.preheader.i: ; preds = %for.cond4.preheader.i, %entry
18+
%indvars.iv8.i = phi i64 [ 0, %entry ], [ %indvars.iv.next9.i, %for.cond4.preheader.i ]
19+
%arrayidx10.i = getelementptr inbounds [4 x [2 x i128]], ptr %l_4774.i, i64 0, i64 %indvars.iv8.i, i64 0
20+
store i128 8721036757475490113, ptr %arrayidx10.i, align 8
21+
%arrayidx10.i.c = getelementptr inbounds [4 x [2 x i128]], ptr %l_4774.i, i64 0, i64 %indvars.iv8.i, i64 1
22+
store i128 8721036757475490113, ptr %arrayidx10.i.c, align 8
23+
%indvars.iv.next9.i = add nuw nsw i64 %indvars.iv8.i, 1
24+
%exitcond.not.i = icmp eq i64 %indvars.iv.next9.i, 4
25+
br i1 %exitcond.not.i, label %func_1.exit, label %for.cond4.preheader.i
26+
27+
func_1.exit: ; preds = %for.cond4.preheader.i
28+
%arrayidx195.i = getelementptr inbounds [4 x [2 x i128]], ptr %l_4774.i, i64 0, i64 1
29+
%0 = load i128, ptr %arrayidx195.i, align 8
30+
%cmp200.i = icmp ne i128 %0, 0
31+
%conv202.i = zext i1 %cmp200.i to i64
32+
%call203.i = tail call i64 @safe_sub_func_int64_t_s_s(i64 noundef %conv202.i, i64 noundef 9139899272418802852)
33+
call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %l_4774.i)
34+
br label %for.cond
35+
36+
for.cond: ; preds = %for.cond, %func_1.exit
37+
br label %for.cond
38+
}
39+
40+
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
41+
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
42+
declare dso_local i64 @safe_sub_func_int64_t_s_s(i64, i64)

0 commit comments

Comments
 (0)