1
+ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of"
2
+ ; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s
3
+
4
+ ; REQUIRES: asserts
5
+
6
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
7
+
8
+ define void @wide_or_replaced_with_add_vpinstruction (ptr %src , ptr noalias %dst ) {
9
+ ; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction'
10
+ ; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1
11
+ ; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
12
+ ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
13
+ ; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
14
+ ; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
15
+ ; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
16
+ ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
17
+ ; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src>
18
+ ; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
19
+ ; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
20
+ ; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
21
+ ; CHECK: Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
22
+ ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
23
+ ; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>
24
+ ; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
25
+ ; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
26
+ ; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2>
27
+ ; CHECK: Cost of 0 for VF 2: vector loop backedge
28
+ ; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
29
+ ; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
30
+ ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
31
+ ; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
32
+ ; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
33
+ ; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
34
+ ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
35
+ ; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src>
36
+ ; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
37
+ ; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
38
+ ; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
39
+ ; CHECK: Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
40
+ ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
41
+ ; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>
42
+ ; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
43
+ ; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
44
+ ; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2>
45
+ ; CHECK: Cost of 0 for VF 4: vector loop backedge
46
+ ; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
47
+ ; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
48
+ ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
49
+ ;
50
+ entry:
51
+ br label %loop.header
52
+
53
+ loop.header:
54
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop.latch ]
55
+ %g.src = getelementptr inbounds i64 , ptr %src , i64 %iv
56
+ %l = load i64 , ptr %g.src
57
+ %iv.4 = add nuw nsw i64 %iv , 4
58
+ %c = icmp ule i64 %l , 128
59
+ br i1 %c , label %loop.then , label %loop.latch
60
+
61
+ loop.then:
62
+ %or = or disjoint i64 %iv.4 , 1
63
+ %g.dst = getelementptr inbounds i64 , ptr %dst , i64 %or
64
+ store i64 %iv.4 , ptr %g.dst , align 4
65
+ br label %loop.latch
66
+
67
+ loop.latch:
68
+ %iv.next = add nuw nsw i64 %iv , 1
69
+ %exitcond = icmp eq i64 %iv.next , 32
70
+ br i1 %exitcond , label %exit , label %loop.header
71
+
72
+ exit:
73
+ ret void
74
+ }
0 commit comments