|
1 | 1 | ; REQUIRES: asserts
|
2 |
| -; RUN: opt < %s -force-vector-width=2 -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s |
| 2 | +; RUN: opt < %s -force-vector-width=2 -enable-cond-stores-vec -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s |
3 | 3 |
|
4 | 4 | target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
5 | 5 | target triple = "aarch64--linux-gnu"
|
@@ -51,3 +51,41 @@ for.end:
|
51 | 51 | %tmp7 = phi i32 [ %tmp6, %for.inc ]
|
52 | 52 | ret i32 %tmp7
|
53 | 53 | }
|
| 54 | + |
| 55 | +; CHECK-LABEL: predicated_store |
| 56 | +; |
| 57 | +; This test checks that we correctly compute the cost of the predicated store |
| 58 | +; instruction. If we assume the block probability is 50%, we compute the cost |
| 59 | +; as: |
| 60 | +; |
| 61 | +; Cost for vector lane zero: |
| 62 | +; (store(2) + 2 * extractelement(0)) / 2 = 1 |
| 63 | +; Cost for vector lane one: |
| 64 | +; (store(2) + 2 * extractelement(3)) / 2 = 4 |
| 65 | +; |
| 66 | +; CHECK: Found an estimated cost of 5 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4 |
| 67 | +; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4 |
| 68 | +; |
| 69 | +define void @predicated_store(i32* %a, i1 %c, i32 %x, i64 %n) { |
| 70 | +entry: |
| 71 | + br label %for.body |
| 72 | + |
| 73 | +for.body: |
| 74 | + %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] |
| 75 | + %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i |
| 76 | + %tmp1 = load i32, i32* %tmp0, align 4 |
| 77 | + br i1 %c, label %if.then, label %for.inc |
| 78 | + |
| 79 | +if.then: |
| 80 | + %tmp2 = add nsw i32 %tmp1, %x |
| 81 | + store i32 %tmp2, i32* %tmp0, align 4 |
| 82 | + br label %for.inc |
| 83 | + |
| 84 | +for.inc: |
| 85 | + %i.next = add nuw nsw i64 %i, 1 |
| 86 | + %cond = icmp slt i64 %i.next, %n |
| 87 | + br i1 %cond, label %for.body, label %for.end |
| 88 | + |
| 89 | +for.end: |
| 90 | + ret void |
| 91 | +} |
0 commit comments