|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 |
| 2 | +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr="+v" < %s | FileCheck %s |
| 3 | + |
| 4 | +@c = global [12 x i64] zeroinitializer |
| 5 | + |
| 6 | +define i32 @test() { |
| 7 | +; CHECK-LABEL: define i32 @test( |
| 8 | +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { |
| 9 | +; CHECK-NEXT: entry: |
| 10 | +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4) |
| 11 | +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16> |
| 12 | +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[TMP1]], <i16 -1, i16 -1, i16 -1, i16 -1> |
| 13 | +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP2]], <i16 -1, i16 -1, i16 -1, i16 -1> |
| 14 | +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]]) |
| 15 | +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32 |
| 16 | +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1) |
| 17 | +; CHECK-NEXT: ret i32 [[TMP6]] |
| 18 | +; |
| 19 | +entry: |
| 20 | + %0 = load i64, ptr @c, align 8 |
| 21 | + %conv = trunc i64 %0 to i32 |
| 22 | + %conv3 = and i32 %conv, 65535 |
| 23 | + %conv4 = xor i32 %conv3, 65535 |
| 24 | + %.conv4 = tail call i32 @llvm.umax.i32(i32 1, i32 %conv4) |
| 25 | + %1 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 3), align 8 |
| 26 | + %conv.1 = trunc i64 %1 to i32 |
| 27 | + %conv3.1 = and i32 %conv.1, 65535 |
| 28 | + %conv4.1 = xor i32 %conv3.1, 65535 |
| 29 | + %.conv4.1 = tail call i32 @llvm.umax.i32(i32 %.conv4, i32 %conv4.1) |
| 30 | + %2 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 6), align 8 |
| 31 | + %conv.2 = trunc i64 %2 to i32 |
| 32 | + %conv3.2 = and i32 %conv.2, 65535 |
| 33 | + %conv4.2 = xor i32 %conv3.2, 65535 |
| 34 | + %.conv4.2 = tail call i32 @llvm.umax.i32(i32 %.conv4.1, i32 %conv4.2) |
| 35 | + %3 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 9), align 8 |
| 36 | + %conv.3 = trunc i64 %3 to i32 |
| 37 | + %conv3.3 = and i32 %conv.3, 65535 |
| 38 | + %conv4.3 = xor i32 %conv3.3, 65535 |
| 39 | + %.conv4.3 = tail call i32 @llvm.umax.i32(i32 %.conv4.2, i32 %conv4.3) |
| 40 | + ret i32 %.conv4.3 |
| 41 | +} |
0 commit comments