-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Add some baseline cost model tests #100797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Add some baseline cost model tests #100797
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesPatch is 534.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100797.diff 25 Files Affected:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll
deleted file mode 100644
index 72a3392891592..0000000000000
--- a/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll
+++ /dev/null
@@ -1,103 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s
-
-; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s
-; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s
-; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s
-; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s
-; END.
-
-define i32 @fcopysign(i32 %arg) {
-; ALL-LABEL: 'fcopysign'
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
-;
-; ALL-SIZE-LABEL: 'fcopysign'
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %F32 = call float @llvm.copysign.f32(float undef, float undef)
- %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
- %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
- %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
-
- %F64 = call double @llvm.copysign.f64(double undef, double undef)
- %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
- %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
- %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-
- ret i32 undef
-}
-
-define i32 @fsqrt(i32 %arg) {
-; ALL-LABEL: 'fsqrt'
-; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
-;
-; ALL-SIZE-LABEL: 'fsqrt'
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %F32 = call float @llvm.sqrt.f32(float undef)
- %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
- %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
- %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-
- %F64 = call double @llvm.sqrt.f64(double undef)
- %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
- %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
- %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-
- ret i32 undef
-}
-
-declare float @llvm.copysign.f32(float, float)
-declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
-declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
-declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
-
-declare double @llvm.copysign.f64(double, double)
-declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
-declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>)
-
-declare float @llvm.sqrt.f32(float)
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
-declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
-declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
-
-declare double @llvm.sqrt.f64(double)
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
-declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
-declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arithmetic_fence.ll b/llvm/test/Analysis/CostModel/AMDGPU/arithmetic_fence.ll
new file mode 100644
index 0000000000000..2cee15193a503
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/arithmetic_fence.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s
+
+define void @arithmetic_fence_f16() {
+; ALL-LABEL: 'arithmetic_fence_f16'
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16 = call half @llvm.arithmetic.fence.f16(half undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = call <2 x half> @llvm.arithmetic.fence.v2f16(<2 x half> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.arithmetic.fence.v3f16(<3 x half> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.arithmetic.fence.v4f16(<4 x half> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.arithmetic.fence.v5f16(<5 x half> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f16 = call <8 x half> @llvm.arithmetic.fence.v8f16(<8 x half> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = call <16 x half> @llvm.arithmetic.fence.v16f16(<16 x half> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = call <17 x half> @llvm.arithmetic.fence.v17f16(<17 x half> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'arithmetic_fence_f16'
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16 = call half @llvm.arithmetic.fence.f16(half undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = call <2 x half> @llvm.arithmetic.fence.v2f16(<2 x half> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.arithmetic.fence.v3f16(<3 x half> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.arithmetic.fence.v4f16(<4 x half> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.arithmetic.fence.v5f16(<5 x half> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f16 = call <8 x half> @llvm.arithmetic.fence.v8f16(<8 x half> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = call <16 x half> @llvm.arithmetic.fence.v16f16(<16 x half> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = call <17 x half> @llvm.arithmetic.fence.v17f16(<17 x half> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+ %f16 = call half @llvm.arithmetic.fence.f16(half undef)
+ %v2f16 = call <2 x half> @llvm.arithmetic.fence.v2f16(<2 x half> undef)
+ %v3f16 = call <3 x half> @llvm.arithmetic.fence.v3f16(<3 x half> undef)
+ %v4f16 = call <4 x half> @llvm.arithmetic.fence.v4f16(<4 x half> undef)
+ %v5f16 = call <5 x half> @llvm.arithmetic.fence.v5f16(<5 x half> undef)
+ %v8f16 = call <8 x half> @llvm.arithmetic.fence.v8f16(<8 x half> undef)
+ %v16f16 = call <16 x half> @llvm.arithmetic.fence.v16f16(<16 x half> undef)
+ %v17f16 = call <17 x half> @llvm.arithmetic.fence.v17f16(<17 x half> undef)
+ ret void
+}
+
+define void @arithmetic_fence_bf16() {
+; ALL-LABEL: 'arithmetic_fence_bf16'
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bf16 = call bfloat @llvm.arithmetic.fence.bf16(bfloat undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2bf16 = call <2 x bfloat> @llvm.arithmetic.fence.v2bf16(<2 x bfloat> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3bf16 = call <3 x bfloat> @llvm.arithmetic.fence.v3bf16(<3 x bfloat> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4bf16 = call <4 x bfloat> @llvm.arithmetic.fence.v4bf16(<4 x bfloat> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5bf16 = call <5 x bfloat> @llvm.arithmetic.fence.v5bf16(<5 x bfloat> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8bf16 = call <8 x bfloat> @llvm.arithmetic.fence.v8bf16(<8 x bfloat> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16bf16 = call <16 x bfloat> @llvm.arithmetic.fence.v16bf16(<16 x bfloat> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17bf16 = call <17 x bfloat> @llvm.arithmetic.fence.v17bf16(<17 x bfloat> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'arithmetic_fence_bf16'
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bf16 = call bfloat @llvm.arithmetic.fence.bf16(bfloat undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2bf16 = call <2 x bfloat> @llvm.arithmetic.fence.v2bf16(<2 x bfloat> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3bf16 = call <3 x bfloat> @llvm.arithmetic.fence.v3bf16(<3 x bfloat> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4bf16 = call <4 x bfloat> @llvm.arithmetic.fence.v4bf16(<4 x bfloat> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5bf16 = call <5 x bfloat> @llvm.arithmetic.fence.v5bf16(<5 x bfloat> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8bf16 = call <8 x bfloat> @llvm.arithmetic.fence.v8bf16(<8 x bfloat> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16bf16 = call <16 x bfloat> @llvm.arithmetic.fence.v16bf16(<16 x bfloat> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17bf16 = call <17 x bfloat> @llvm.arithmetic.fence.v17bf16(<17 x bfloat> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+ %bf16 = call bfloat @llvm.arithmetic.fence.bf16(bfloat undef)
+ %v2bf16 = call <2 x bfloat> @llvm.arithmetic.fence.v2bf16(<2 x bfloat> undef)
+ %v3bf16 = call <3 x bfloat> @llvm.arithmetic.fence.v3bf16(<3 x bfloat> undef)
+ %v4bf16 = call <4 x bfloat> @llvm.arithmetic.fence.v4bf16(<4 x bfloat> undef)
+ %v5bf16 = call <5 x bfloat> @llvm.arithmetic.fence.v5bf16(<5 x bfloat> undef)
+ %v8bf16 = call <8 x bfloat> @llvm.arithmetic.fence.v8bf16(<8 x bfloat> undef)
+ %v16bf16 = call <16 x bfloat> @llvm.arithmetic.fence.v16bf16(<16 x bfloat> undef)
+ %v17bf16 = call <17 x bfloat> @llvm.arithmetic.fence.v17bf16(<17 x bfloat> undef)
+ ret void
+}
+
+define void @arithmetic_fence_f32() {
+; ALL-LABEL: 'arithmetic_fence_f32'
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32 = call float @llvm.arithmetic.fence.f32(float undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.arithmetic.fence.v3f32(<3 x float> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.arithmetic.fence.v4f32(<4 x float> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.arithmetic.fence.v5f32(<5 x float> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = call <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32 = call <16 x float> @llvm.arithmetic.fence.v16f32(<16 x float> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f32 = call <17 x float> @llvm.arithmetic.fence.v17f32(<17 x float> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'arithmetic_fence_f32'
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32 = call float @llvm.arithmetic.fence.f32(float undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.arithmetic.fence.v3f32(<3 x float> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.arithmetic.fence.v4f32(<4 x float> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.arithmetic.fence.v5f32(<5 x float> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = call <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32 = call <16 x float> @llvm.arithmetic.fence.v16f32(<16 x float> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f32 = call <17 x float> @llvm.arithmetic.fence.v17f32(<17 x float> undef)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+ %f32 = call float @llvm.arithmetic.fence.f32(float undef)
+ %v2f32 = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> undef)
+ %v3f32 = call <3 x float> @llvm.arithmetic.fence.v3f32(<3 x float> undef)
+ %v4f32 = call <4 x float> @llvm.arithmetic.fence.v4f32(<4 x float> undef)
+ %v5f32 = call <5 x float> @llvm.arithmetic.fence.v5f32(<5 x float> undef)
+ %v8f32 = call <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float> undef)
+ %v16f32 = call <16 x float> @llvm.arithmetic.fence.v16f32(<16 x float> undef)
+ %v17f32 = call <17 x float> @llvm.arithmetic.fence.v17f32(<17 x float> undef)
+ ret void
+}
+
+define void @arithmetic_fence_f64() {
+; ALL-LABEL: 'arithmetic_fence_f64'
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64 = call double @llvm.arithmetic.fence.f64(double undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.arithmetic.fence.v2f64(<2 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.arithmetic.fence.v3f64(<3 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.arithmetic.fence.v4f64(<4 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = call <5 x double> @llvm.arithmetic.fence.v5f64(<5 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64 = call <8 x double> @llvm.arithmetic.fence.v8f64(<8 x double> undef)
+; ALL-NEXT: Cost Model: Found an estimated cos...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any particular reason why we don't include GFX11 or GFX12 in any of these?
No description provided.