|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of" |
| 2 | +; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s |
| 3 | + |
| 4 | +; REQUIRES: asserts |
| 5 | + |
| 6 | +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" |
| 7 | + |
| 8 | +define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) { |
| 9 | +; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction' |
| 10 | +; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1 |
| 11 | +; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| 12 | +; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 |
| 13 | +; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| 14 | +; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> |
| 15 | +; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> |
| 16 | +; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> |
| 17 | +; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src> |
| 18 | +; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5> |
| 19 | +; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4> |
| 20 | +; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128> |
| 21 | +; CHECK: Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1> |
| 22 | +; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or> |
| 23 | +; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst> |
| 24 | +; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c> |
| 25 | +; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> |
| 26 | +; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2> |
| 27 | +; CHECK: Cost of 0 for VF 2: vector loop backedge |
| 28 | +; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1 |
| 29 | +; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| 30 | +; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 |
| 31 | +; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| 32 | +; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> |
| 33 | +; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> |
| 34 | +; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> |
| 35 | +; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src> |
| 36 | +; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5> |
| 37 | +; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4> |
| 38 | +; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128> |
| 39 | +; CHECK: Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1> |
| 40 | +; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or> |
| 41 | +; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst> |
| 42 | +; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c> |
| 43 | +; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> |
| 44 | +; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2> |
| 45 | +; CHECK: Cost of 0 for VF 4: vector loop backedge |
| 46 | +; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1 |
| 47 | +; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| 48 | +; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 |
| 49 | +; |
| 50 | +entry: |
| 51 | + br label %loop.header |
| 52 | + |
| 53 | +loop.header: |
| 54 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| 55 | + %g.src = getelementptr inbounds i64, ptr %src, i64 %iv |
| 56 | + %l = load i64, ptr %g.src |
| 57 | + %iv.4 = add nuw nsw i64 %iv, 4 |
| 58 | + %c = icmp ule i64 %l, 128 |
| 59 | + br i1 %c, label %loop.then, label %loop.latch |
| 60 | + |
| 61 | +loop.then: |
| 62 | + %or = or disjoint i64 %iv.4, 1 |
| 63 | + %g.dst = getelementptr inbounds i64, ptr %dst, i64 %or |
| 64 | + store i64 %iv.4, ptr %g.dst, align 4 |
| 65 | + br label %loop.latch |
| 66 | + |
| 67 | +loop.latch: |
| 68 | + %iv.next = add nuw nsw i64 %iv, 1 |
| 69 | + %exitcond = icmp eq i64 %iv.next, 32 |
| 70 | + br i1 %exitcond, label %exit, label %loop.header |
| 71 | + |
| 72 | +exit: |
| 73 | + ret void |
| 74 | +} |
0 commit comments