Skip to content

Commit 1ad29a5

Browse files
committed
[SLP][NFC]Add a test with truncated loads, but incorrect trunc after
minbitwidth analysis.
1 parent 0c3e24f commit 1ad29a5

File tree

1 file changed

+41
-0
lines changed

1 file changed

+41
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr="+v" < %s | FileCheck %s
3+
4+
@c = global [12 x i64] zeroinitializer
5+
6+
define i32 @test() {
7+
; CHECK-LABEL: define i32 @test(
8+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: entry:
10+
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
11+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16>
12+
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[TMP1]], <i16 -1, i16 -1, i16 -1, i16 -1>
13+
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP2]], <i16 -1, i16 -1, i16 -1, i16 -1>
14+
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]])
15+
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32
16+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1)
17+
; CHECK-NEXT: ret i32 [[TMP6]]
18+
;
19+
entry:
20+
%0 = load i64, ptr @c, align 8
21+
%conv = trunc i64 %0 to i32
22+
%conv3 = and i32 %conv, 65535
23+
%conv4 = xor i32 %conv3, 65535
24+
%.conv4 = tail call i32 @llvm.umax.i32(i32 1, i32 %conv4)
25+
%1 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 3), align 8
26+
%conv.1 = trunc i64 %1 to i32
27+
%conv3.1 = and i32 %conv.1, 65535
28+
%conv4.1 = xor i32 %conv3.1, 65535
29+
%.conv4.1 = tail call i32 @llvm.umax.i32(i32 %.conv4, i32 %conv4.1)
30+
%2 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 6), align 8
31+
%conv.2 = trunc i64 %2 to i32
32+
%conv3.2 = and i32 %conv.2, 65535
33+
%conv4.2 = xor i32 %conv3.2, 65535
34+
%.conv4.2 = tail call i32 @llvm.umax.i32(i32 %.conv4.1, i32 %conv4.2)
35+
%3 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 9), align 8
36+
%conv.3 = trunc i64 %3 to i32
37+
%conv3.3 = and i32 %conv.3, 65535
38+
%conv4.3 = xor i32 %conv3.3, 65535
39+
%.conv4.3 = tail call i32 @llvm.umax.i32(i32 %.conv4.2, i32 %conv4.3)
40+
ret i32 %.conv4.3
41+
}

0 commit comments

Comments
 (0)