|
1 |
| -; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-apple-ios | FileCheck %s |
2 |
| -; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-apple-ios -mattr=slow-misaligned-128store | FileCheck %s --check-prefix=SLOW_MISALIGNED_128_STORE |
| 1 | +; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-unknown | FileCheck %s |
| 2 | +; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-unknown -mattr=slow-misaligned-128store | FileCheck %s --check-prefix=SLOW_MISALIGNED_128_STORE |
3 | 3 |
|
4 | 4 | target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
5 | 5 | ; CHECK-LABEL: getMemoryOpCost
|
6 | 6 | ; SLOW_MISALIGNED_128_STORE-LABEL: getMemoryOpCost
|
7 | 7 | define void @getMemoryOpCost() {
|
8 |
| - ; If FeatureSlowMisaligned128Store is set, we penalize <2 x i64> stores. On |
9 |
| - ; Cyclone, for example, such stores should be expensive because we don't |
10 |
| - ; split them and misaligned 16b stores have bad performance. |
11 |
| - ; |
12 |
| - ; CHECK: cost of 1 {{.*}} store |
13 |
| - ; SLOW_MISALIGNED_128_STORE: cost of 12 {{.*}} store |
| 8 | + ; If FeatureSlowMisaligned128Store is set, we penalize 128-bit stores. |
| 9 | + ; The unlegalized 256-bit stores are further penalized when legalized down |
| 10 | + ; to 128-bit stores. |
| 11 | + |
| 12 | + ; CHECK: cost of 2 for {{.*}} store <4 x i64> |
| 13 | + ; SLOW_MISALIGNED_128_STORE: cost of 24 for {{.*}} store <4 x i64> |
| 14 | + store <4 x i64> undef, <4 x i64> * undef |
| 15 | + ; CHECK-NEXT: cost of 2 for {{.*}} store <8 x i32> |
| 16 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <8 x i32> |
| 17 | + store <8 x i32> undef, <8 x i32> * undef |
| 18 | + ; CHECK-NEXT: cost of 2 for {{.*}} store <16 x i16> |
| 19 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <16 x i16> |
| 20 | + store <16 x i16> undef, <16 x i16> * undef |
| 21 | + ; CHECK-NEXT: cost of 2 for {{.*}} store <32 x i8> |
| 22 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <32 x i8> |
| 23 | + store <32 x i8> undef, <32 x i8> * undef |
| 24 | + |
| 25 | + ; CHECK-NEXT: cost of 2 for {{.*}} store <4 x double> |
| 26 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <4 x double> |
| 27 | + store <4 x double> undef, <4 x double> * undef |
| 28 | + ; CHECK-NEXT: cost of 2 for {{.*}} store <8 x float> |
| 29 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <8 x float> |
| 30 | + store <8 x float> undef, <8 x float> * undef |
| 31 | + ; CHECK-NEXT: cost of 2 for {{.*}} store <16 x half> |
| 32 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <16 x half> |
| 33 | + store <16 x half> undef, <16 x half> * undef |
| 34 | + |
| 35 | + ; CHECK-NEXT: cost of 1 for {{.*}} store <2 x i64> |
| 36 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <2 x i64> |
14 | 37 | store <2 x i64> undef, <2 x i64> * undef
|
| 38 | + ; CHECK-NEXT: cost of 1 for {{.*}} store <4 x i32> |
| 39 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <4 x i32> |
| 40 | + store <4 x i32> undef, <4 x i32> * undef |
| 41 | + ; CHECK-NEXT: cost of 1 for {{.*}} store <8 x i16> |
| 42 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <8 x i16> |
| 43 | + store <8 x i16> undef, <8 x i16> * undef |
| 44 | + ; CHECK-NEXT: cost of 1 for {{.*}} store <16 x i8> |
| 45 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <16 x i8> |
| 46 | + store <16 x i8> undef, <16 x i8> * undef |
| 47 | + |
| 48 | + ; CHECK-NEXT: cost of 1 for {{.*}} store <2 x double> |
| 49 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <2 x double> |
| 50 | + store <2 x double> undef, <2 x double> * undef |
| 51 | + ; CHECK-NEXT: cost of 1 for {{.*}} store <4 x float> |
| 52 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <4 x float> |
| 53 | + store <4 x float> undef, <4 x float> * undef |
| 54 | + ; CHECK-NEXT: cost of 1 for {{.*}} store <8 x half> |
| 55 | + ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <8 x half> |
| 56 | + store <8 x half> undef, <8 x half> * undef |
15 | 57 |
|
16 | 58 | ; We scalarize the loads/stores because there is no vector register name for
|
17 | 59 | ; these types (they get extended to v.4h/v.2s).
|
|
0 commit comments