|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py |
| 2 | +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s |
| 3 | +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s |
| 4 | +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s |
| 5 | +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s |
| 6 | + |
| 7 | +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s |
| 8 | +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s |
| 9 | +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s |
| 10 | +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s |
| 11 | +; END. |
| 12 | + |
| 13 | +define i32 @fcopysign(i32 %arg) { |
| 14 | +; ALL-LABEL: 'fcopysign' |
| 15 | +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) |
| 16 | +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) |
| 17 | +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) |
| 18 | +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) |
| 19 | +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) |
| 20 | +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) |
| 21 | +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) |
| 22 | +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) |
| 23 | +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef |
| 24 | +; |
| 25 | +; ALL-SIZE-LABEL: 'fcopysign' |
| 26 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) |
| 27 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) |
| 28 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) |
| 29 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) |
| 30 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) |
| 31 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) |
| 32 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) |
| 33 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) |
| 34 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef |
| 35 | +; |
| 36 | + %F32 = call float @llvm.copysign.f32(float undef, float undef) |
| 37 | + %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) |
| 38 | + %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) |
| 39 | + %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) |
| 40 | + |
| 41 | + %F64 = call double @llvm.copysign.f64(double undef, double undef) |
| 42 | + %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) |
| 43 | + %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) |
| 44 | + %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) |
| 45 | + |
| 46 | + ret i32 undef |
| 47 | +} |
| 48 | + |
| 49 | +define i32 @fsqrt(i32 %arg) { |
| 50 | +; ALL-LABEL: 'fsqrt' |
| 51 | +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) |
| 52 | +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) |
| 53 | +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) |
| 54 | +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) |
| 55 | +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) |
| 56 | +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) |
| 57 | +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) |
| 58 | +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) |
| 59 | +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef |
| 60 | +; |
| 61 | +; ALL-SIZE-LABEL: 'fsqrt' |
| 62 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) |
| 63 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) |
| 64 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) |
| 65 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) |
| 66 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) |
| 67 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) |
| 68 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) |
| 69 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) |
| 70 | +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef |
| 71 | +; |
| 72 | + %F32 = call float @llvm.sqrt.f32(float undef) |
| 73 | + %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) |
| 74 | + %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) |
| 75 | + %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) |
| 76 | + |
| 77 | + %F64 = call double @llvm.sqrt.f64(double undef) |
| 78 | + %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) |
| 79 | + %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) |
| 80 | + %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) |
| 81 | + |
| 82 | + ret i32 undef |
| 83 | +} |
| 84 | + |
| 85 | +declare float @llvm.copysign.f32(float, float) |
| 86 | +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) |
| 87 | +declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) |
| 88 | +declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) |
| 89 | + |
| 90 | +declare double @llvm.copysign.f64(double, double) |
| 91 | +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) |
| 92 | +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) |
| 93 | +declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>) |
| 94 | + |
| 95 | +declare float @llvm.sqrt.f32(float) |
| 96 | +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) |
| 97 | +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) |
| 98 | +declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) |
| 99 | + |
| 100 | +declare double @llvm.sqrt.f64(double) |
| 101 | +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) |
| 102 | +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) |
| 103 | +declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) |
0 commit comments