|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
2 |
| -; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s |
| 2 | +; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE2 |
| 3 | +; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE |
3 | 4 |
|
4 | 5 | define <vscale x 2 x i64> @signed_wide_add_nxv4i32(<vscale x 2 x i64> %acc, <vscale x 4 x i32> %input){
|
5 |
| -; CHECK-LABEL: signed_wide_add_nxv4i32: |
6 |
| -; CHECK: // %bb.0: // %entry |
7 |
| -; CHECK-NEXT: saddwb z0.d, z0.d, z1.s |
8 |
| -; CHECK-NEXT: saddwt z0.d, z0.d, z1.s |
9 |
| -; CHECK-NEXT: ret |
| 6 | +; CHECK-SVE2-LABEL: signed_wide_add_nxv4i32: |
| 7 | +; CHECK-SVE2: // %bb.0: // %entry |
| 8 | +; CHECK-SVE2-NEXT: saddwb z0.d, z0.d, z1.s |
| 9 | +; CHECK-SVE2-NEXT: saddwt z0.d, z0.d, z1.s |
| 10 | +; CHECK-SVE2-NEXT: ret |
| 11 | +; |
| 12 | +; CHECK-SVE-LABEL: signed_wide_add_nxv4i32: |
| 13 | +; CHECK-SVE: // %bb.0: // %entry |
| 14 | +; CHECK-SVE-NEXT: sunpklo z2.d, z1.s |
| 15 | +; CHECK-SVE-NEXT: sunpkhi z1.d, z1.s |
| 16 | +; CHECK-SVE-NEXT: add z0.d, z0.d, z2.d |
| 17 | +; CHECK-SVE-NEXT: add z0.d, z1.d, z0.d |
| 18 | +; CHECK-SVE-NEXT: ret |
10 | 19 | entry:
|
11 | 20 | %input.wide = sext <vscale x 4 x i32> %input to <vscale x 4 x i64>
|
12 | 21 | %partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64(<vscale x 2 x i64> %acc, <vscale x 4 x i64> %input.wide)
|
13 | 22 | ret <vscale x 2 x i64> %partial.reduce
|
14 | 23 | }
|
15 | 24 |
|
16 | 25 | define <vscale x 2 x i64> @unsigned_wide_add_nxv4i32(<vscale x 2 x i64> %acc, <vscale x 4 x i32> %input){
|
17 |
| -; CHECK-LABEL: unsigned_wide_add_nxv4i32: |
18 |
| -; CHECK: // %bb.0: // %entry |
19 |
| -; CHECK-NEXT: uaddwb z0.d, z0.d, z1.s |
20 |
| -; CHECK-NEXT: uaddwt z0.d, z0.d, z1.s |
21 |
| -; CHECK-NEXT: ret |
| 26 | +; CHECK-SVE2-LABEL: unsigned_wide_add_nxv4i32: |
| 27 | +; CHECK-SVE2: // %bb.0: // %entry |
| 28 | +; CHECK-SVE2-NEXT: uaddwb z0.d, z0.d, z1.s |
| 29 | +; CHECK-SVE2-NEXT: uaddwt z0.d, z0.d, z1.s |
| 30 | +; CHECK-SVE2-NEXT: ret |
| 31 | +; |
| 32 | +; CHECK-SVE-LABEL: unsigned_wide_add_nxv4i32: |
| 33 | +; CHECK-SVE: // %bb.0: // %entry |
| 34 | +; CHECK-SVE-NEXT: uunpklo z2.d, z1.s |
| 35 | +; CHECK-SVE-NEXT: uunpkhi z1.d, z1.s |
| 36 | +; CHECK-SVE-NEXT: add z0.d, z0.d, z2.d |
| 37 | +; CHECK-SVE-NEXT: add z0.d, z1.d, z0.d |
| 38 | +; CHECK-SVE-NEXT: ret |
22 | 39 | entry:
|
23 | 40 | %input.wide = zext <vscale x 4 x i32> %input to <vscale x 4 x i64>
|
24 | 41 | %partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64(<vscale x 2 x i64> %acc, <vscale x 4 x i64> %input.wide)
|
25 | 42 | ret <vscale x 2 x i64> %partial.reduce
|
26 | 43 | }
|
27 | 44 |
|
28 | 45 | define <vscale x 4 x i32> @signed_wide_add_nxv8i16(<vscale x 4 x i32> %acc, <vscale x 8 x i16> %input){
|
29 |
| -; CHECK-LABEL: signed_wide_add_nxv8i16: |
30 |
| -; CHECK: // %bb.0: // %entry |
31 |
| -; CHECK-NEXT: saddwb z0.s, z0.s, z1.h |
32 |
| -; CHECK-NEXT: saddwt z0.s, z0.s, z1.h |
33 |
| -; CHECK-NEXT: ret |
| 46 | +; CHECK-SVE2-LABEL: signed_wide_add_nxv8i16: |
| 47 | +; CHECK-SVE2: // %bb.0: // %entry |
| 48 | +; CHECK-SVE2-NEXT: saddwb z0.s, z0.s, z1.h |
| 49 | +; CHECK-SVE2-NEXT: saddwt z0.s, z0.s, z1.h |
| 50 | +; CHECK-SVE2-NEXT: ret |
| 51 | +; |
| 52 | +; CHECK-SVE-LABEL: signed_wide_add_nxv8i16: |
| 53 | +; CHECK-SVE: // %bb.0: // %entry |
| 54 | +; CHECK-SVE-NEXT: sunpklo z2.s, z1.h |
| 55 | +; CHECK-SVE-NEXT: sunpkhi z1.s, z1.h |
| 56 | +; CHECK-SVE-NEXT: add z0.s, z0.s, z2.s |
| 57 | +; CHECK-SVE-NEXT: add z0.s, z1.s, z0.s |
| 58 | +; CHECK-SVE-NEXT: ret |
34 | 59 | entry:
|
35 | 60 | %input.wide = sext <vscale x 8 x i16> %input to <vscale x 8 x i32>
|
36 | 61 | %partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv8i32(<vscale x 4 x i32> %acc, <vscale x 8 x i32> %input.wide)
|
37 | 62 | ret <vscale x 4 x i32> %partial.reduce
|
38 | 63 | }
|
39 | 64 |
|
40 | 65 | define <vscale x 4 x i32> @unsigned_wide_add_nxv8i16(<vscale x 4 x i32> %acc, <vscale x 8 x i16> %input){
|
41 |
| -; CHECK-LABEL: unsigned_wide_add_nxv8i16: |
42 |
| -; CHECK: // %bb.0: // %entry |
43 |
| -; CHECK-NEXT: uaddwb z0.s, z0.s, z1.h |
44 |
| -; CHECK-NEXT: uaddwt z0.s, z0.s, z1.h |
45 |
| -; CHECK-NEXT: ret |
| 66 | +; CHECK-SVE2-LABEL: unsigned_wide_add_nxv8i16: |
| 67 | +; CHECK-SVE2: // %bb.0: // %entry |
| 68 | +; CHECK-SVE2-NEXT: uaddwb z0.s, z0.s, z1.h |
| 69 | +; CHECK-SVE2-NEXT: uaddwt z0.s, z0.s, z1.h |
| 70 | +; CHECK-SVE2-NEXT: ret |
| 71 | +; |
| 72 | +; CHECK-SVE-LABEL: unsigned_wide_add_nxv8i16: |
| 73 | +; CHECK-SVE: // %bb.0: // %entry |
| 74 | +; CHECK-SVE-NEXT: uunpklo z2.s, z1.h |
| 75 | +; CHECK-SVE-NEXT: uunpkhi z1.s, z1.h |
| 76 | +; CHECK-SVE-NEXT: add z0.s, z0.s, z2.s |
| 77 | +; CHECK-SVE-NEXT: add z0.s, z1.s, z0.s |
| 78 | +; CHECK-SVE-NEXT: ret |
46 | 79 | entry:
|
47 | 80 | %input.wide = zext <vscale x 8 x i16> %input to <vscale x 8 x i32>
|
48 | 81 | %partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv8i32(<vscale x 4 x i32> %acc, <vscale x 8 x i32> %input.wide)
|
49 | 82 | ret <vscale x 4 x i32> %partial.reduce
|
50 | 83 | }
|
51 | 84 |
|
52 | 85 | define <vscale x 8 x i16> @signed_wide_add_nxv16i8(<vscale x 8 x i16> %acc, <vscale x 16 x i8> %input){
|
53 |
| -; CHECK-LABEL: signed_wide_add_nxv16i8: |
54 |
| -; CHECK: // %bb.0: // %entry |
55 |
| -; CHECK-NEXT: saddwb z0.h, z0.h, z1.b |
56 |
| -; CHECK-NEXT: saddwt z0.h, z0.h, z1.b |
57 |
| -; CHECK-NEXT: ret |
| 86 | +; CHECK-SVE2-LABEL: signed_wide_add_nxv16i8: |
| 87 | +; CHECK-SVE2: // %bb.0: // %entry |
| 88 | +; CHECK-SVE2-NEXT: saddwb z0.h, z0.h, z1.b |
| 89 | +; CHECK-SVE2-NEXT: saddwt z0.h, z0.h, z1.b |
| 90 | +; CHECK-SVE2-NEXT: ret |
| 91 | +; |
| 92 | +; CHECK-SVE-LABEL: signed_wide_add_nxv16i8: |
| 93 | +; CHECK-SVE: // %bb.0: // %entry |
| 94 | +; CHECK-SVE-NEXT: sunpklo z2.h, z1.b |
| 95 | +; CHECK-SVE-NEXT: sunpkhi z1.h, z1.b |
| 96 | +; CHECK-SVE-NEXT: add z0.h, z0.h, z2.h |
| 97 | +; CHECK-SVE-NEXT: add z0.h, z1.h, z0.h |
| 98 | +; CHECK-SVE-NEXT: ret |
58 | 99 | entry:
|
59 | 100 | %input.wide = sext <vscale x 16 x i8> %input to <vscale x 16 x i16>
|
60 | 101 | %partial.reduce = tail call <vscale x 8 x i16> @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i16(<vscale x 8 x i16> %acc, <vscale x 16 x i16> %input.wide)
|
61 | 102 | ret <vscale x 8 x i16> %partial.reduce
|
62 | 103 | }
|
63 | 104 |
|
64 | 105 | define <vscale x 8 x i16> @unsigned_wide_add_nxv16i8(<vscale x 8 x i16> %acc, <vscale x 16 x i8> %input){
|
65 |
| -; CHECK-LABEL: unsigned_wide_add_nxv16i8: |
66 |
| -; CHECK: // %bb.0: // %entry |
67 |
| -; CHECK-NEXT: uaddwb z0.h, z0.h, z1.b |
68 |
| -; CHECK-NEXT: uaddwt z0.h, z0.h, z1.b |
69 |
| -; CHECK-NEXT: ret |
| 106 | +; CHECK-SVE2-LABEL: unsigned_wide_add_nxv16i8: |
| 107 | +; CHECK-SVE2: // %bb.0: // %entry |
| 108 | +; CHECK-SVE2-NEXT: uaddwb z0.h, z0.h, z1.b |
| 109 | +; CHECK-SVE2-NEXT: uaddwt z0.h, z0.h, z1.b |
| 110 | +; CHECK-SVE2-NEXT: ret |
| 111 | +; |
| 112 | +; CHECK-SVE-LABEL: unsigned_wide_add_nxv16i8: |
| 113 | +; CHECK-SVE: // %bb.0: // %entry |
| 114 | +; CHECK-SVE-NEXT: uunpklo z2.h, z1.b |
| 115 | +; CHECK-SVE-NEXT: uunpkhi z1.h, z1.b |
| 116 | +; CHECK-SVE-NEXT: add z0.h, z0.h, z2.h |
| 117 | +; CHECK-SVE-NEXT: add z0.h, z1.h, z0.h |
| 118 | +; CHECK-SVE-NEXT: ret |
70 | 119 | entry:
|
71 | 120 | %input.wide = zext <vscale x 16 x i8> %input to <vscale x 16 x i16>
|
72 | 121 | %partial.reduce = tail call <vscale x 8 x i16> @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i16(<vscale x 8 x i16> %acc, <vscale x 16 x i16> %input.wide)
|
|
0 commit comments