|
1 |
| -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=disabled -S | FileCheck %s -check-prefix=CHECK-NOTF |
2 |
| -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-NOTF |
3 |
| -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all -S | FileCheck %s -check-prefix=CHECK-TF |
4 |
| -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF |
5 |
| -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+noreductions -S | FileCheck %s -check-prefix=CHECK-TF-NORED |
6 |
| -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+norecurrences -S | FileCheck %s -check-prefix=CHECK-TF-NOREC |
7 |
| -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+noreverse -S | FileCheck %s -check-prefix=CHECK-TF-NOREV |
8 |
| -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=reductions -S | FileCheck %s -check-prefix=CHECK-TF-ONLYRED |
| 1 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=disabled -S | FileCheck %s -check-prefix=CHECK-NOTF |
| 2 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-NOTF |
| 3 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S | FileCheck %s -check-prefix=CHECK-NOTF |
| 4 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all -S | FileCheck %s -check-prefix=CHECK-TF |
| 5 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF |
| 6 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 -sve-tail-folding=default+reductions+recurrences+reverse | FileCheck %s -check-prefix=CHECK-TF |
| 7 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+noreductions -S | FileCheck %s -check-prefix=CHECK-TF-NORED |
| 8 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+norecurrences -S | FileCheck %s -check-prefix=CHECK-TF-NOREC |
| 9 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+noreverse -S | FileCheck %s -check-prefix=CHECK-TF-NOREV |
| 10 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=reductions -S | FileCheck %s -check-prefix=CHECK-TF-ONLYRED |
| 11 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -sve-tail-folding=default -mcpu=neoverse-v1 | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1 |
| 12 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 -sve-tail-folding=default | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1 |
| 13 | +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1 |
9 | 14 |
|
10 | 15 | target triple = "aarch64-unknown-linux-gnu"
|
11 | 16 |
|
@@ -58,6 +63,14 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
|
58 | 63 | ; CHECK-TF-ONLYRED-NOT: %{{.*}} = phi <vscale x 4 x i1>
|
59 | 64 | ; CHECK-TF-ONLYRED: store <vscale x 4 x i32> %[[SPLAT]], ptr
|
60 | 65 |
|
| 66 | +; CHECK-NEOVERSE-V1-LABEL: @simple_memset( |
| 67 | +; CHECK-NEOVERSE-V1: vector.ph: |
| 68 | +; CHECK-NEOVERSE-V1: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0 |
| 69 | +; CHECK-NEOVERSE-V1: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| 70 | +; CHECK-NEOVERSE-V1: vector.body: |
| 71 | +; CHECK-NEOVERSE-V1: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> |
| 72 | +; CHECK-NEOVERSE-V1: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]] |
| 73 | + |
61 | 74 | entry:
|
62 | 75 | br label %while.body
|
63 | 76 |
|
@@ -129,6 +142,15 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
|
129 | 142 | ; CHECK-TF-ONLYRED: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
|
130 | 143 | ; CHECK-TF-ONLYRED: middle.block:
|
131 | 144 | ; CHECK-TF-ONLYRED-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[SEL]])
|
| 145 | + |
| 146 | +; CHECK-NEOVERSE-V1-LABEL: @fadd_red_fast |
| 147 | +; CHECK-NEOVERSE-V1: vector.body: |
| 148 | +; CHECK-NEOVERSE-V1-NOT: %{{.*}} = phi <vscale x 4 x i1> |
| 149 | +; CHECK-NEOVERSE-V1: %[[LOAD:.*]] = load <vscale x 4 x float> |
| 150 | +; CHECK-NEOVERSE-V1: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]] |
| 151 | +; CHECK-NEOVERSE-V1: middle.block: |
| 152 | +; CHECK-NEOVERSE-V1-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[ADD]]) |
| 153 | + |
132 | 154 | entry:
|
133 | 155 | br label %for.body
|
134 | 156 |
|
@@ -225,6 +247,19 @@ define void @add_recur(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
|
225 | 247 | ; CHECK-TF-ONLYRED: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
|
226 | 248 | ; CHECK-TF-ONLYRED: store <vscale x 4 x i32> %[[ADD]]
|
227 | 249 |
|
| 250 | +; CHECK-NEOVERSE-V1-LABEL: @add_recur |
| 251 | +; CHECK-NEOVERSE-V1: entry: |
| 252 | +; CHECK-NEOVERSE-V1: %[[PRE:.*]] = load i32, ptr %src, align 4 |
| 253 | +; CHECK-NEOVERSE-V1: vector.ph: |
| 254 | +; CHECK-NEOVERSE-V1: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]] |
| 255 | +; CHECK-NEOVERSE-V1: vector.body: |
| 256 | +; CHECK-NEOVERSE-V1-NOT: %{{.*}} = phi <vscale x 4 x i1> |
| 257 | +; CHECK-NEOVERSE-V1: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] |
| 258 | +; CHECK-NEOVERSE-V1: %[[LOAD]] = load <vscale x 4 x i32> |
| 259 | +; CHECK-NEOVERSE-V1: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1) |
| 260 | +; CHECK-NEOVERSE-V1: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]] |
| 261 | +; CHECK-NEOVERSE-V1: store <vscale x 4 x i32> %[[ADD]] |
| 262 | + |
228 | 263 | entry:
|
229 | 264 | %.pre = load i32, ptr %src, align 4
|
230 | 265 | br label %for.body
|
@@ -276,6 +311,12 @@ define void @interleave(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
|
276 | 311 | ; CHECK-TF-NOREV: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
277 | 312 | ; CHECK-TF-NOREV: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
278 | 313 |
|
| 314 | +; CHECK-NEOVERSE-V1-LABEL: @interleave( |
| 315 | +; CHECK-NEOVERSE-V1: vector.body: |
| 316 | +; CHECK-NEOVERSE-V1: %[[LOAD:.*]] = load <8 x float>, ptr |
| 317 | +; CHECK-NEOVERSE-V1: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| 318 | +; CHECK-NEOVERSE-V1: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| 319 | + |
279 | 320 | entry:
|
280 | 321 | br label %for.body
|
281 | 322 |
|
@@ -335,6 +376,12 @@ define void @reverse(ptr noalias %dst, ptr noalias %src) #0 {
|
335 | 376 | ; CHECK-TF-NOREC: %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]])
|
336 | 377 | ; CHECK-TF-NOREC: %[[MASKED_LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0({{.*}} <vscale x 2 x i1> %reverse
|
337 | 378 |
|
| 379 | +; CHECK-TF-NEOVERSE-V1-LABEL: @reverse( |
| 380 | +; CHECK-TF-NEOVERSE-V1: vector.body: |
| 381 | +; CHECK-TF-NEOVERSE-V1-NOT: %{{.*}} = phi <vscale x 4 x i1> |
| 382 | +; CHECK-TF-NEOVERSE-V1: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* %18, align 8 |
| 383 | +; CHECK-TF-NEOVERSE-V1: %{{.*}} = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> %[[LOAD]]) |
| 384 | + |
338 | 385 | entry:
|
339 | 386 | br label %for.body
|
340 | 387 |
|
|
0 commit comments