Skip to content

Commit a2120f6

Browse files
committed
[NFC][AMDGPU][CostModel] Add tests for AMDGPU cost model, part 2.
1 parent deaedab commit a2120f6

File tree

11 files changed

+4333
-0
lines changed

11 files changed

+4333
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2+
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s
3+
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s
4+
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s
5+
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s
6+
7+
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s
8+
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s
9+
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s
10+
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s
11+
; END.
12+
13+
%T1 = type { i32, float, <4 x i1> }
14+
15+
define void @extract_1() {
16+
; ALL-LABEL: 'extract_1'
17+
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = extractvalue { i32, i32 } undef, 0
18+
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = extractvalue { i32, i32 } undef, 1
19+
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = extractvalue { i32, i1 } undef, 0
20+
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = extractvalue { i32, i1 } undef, 1
21+
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r4 = extractvalue { i32, float } undef, 1
22+
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r5 = extractvalue { i32, [42 x i42] } undef, 1
23+
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = extractvalue { i32, <42 x i42> } undef, 1
24+
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = extractvalue { i32, %T1 } undef, 1
25+
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
26+
;
27+
; ALL-SIZE-LABEL: 'extract_1'
28+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = extractvalue { i32, i32 } undef, 0
29+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = extractvalue { i32, i32 } undef, 1
30+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = extractvalue { i32, i1 } undef, 0
31+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = extractvalue { i32, i1 } undef, 1
32+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r4 = extractvalue { i32, float } undef, 1
33+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r5 = extractvalue { i32, [42 x i42] } undef, 1
34+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = extractvalue { i32, <42 x i42> } undef, 1
35+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = extractvalue { i32, %T1 } undef, 1
36+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
37+
;
38+
%r0 = extractvalue {i32, i32} undef, 0
39+
%r1 = extractvalue {i32, i32} undef, 1
40+
%r2 = extractvalue {i32, i1} undef, 0
41+
%r3 = extractvalue {i32, i1} undef, 1
42+
%r4 = extractvalue {i32, float} undef, 1
43+
%r5 = extractvalue {i32, [42 x i42]} undef, 1
44+
%r6 = extractvalue {i32, <42 x i42>} undef, 1
45+
%r7 = extractvalue {i32, %T1} undef, 1
46+
ret void
47+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2+
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s
3+
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s
4+
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s
5+
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s
6+
7+
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s
8+
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s
9+
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s
10+
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s
11+
; END.
12+
13+
define i32 @fcopysign(i32 %arg) {
14+
; ALL-LABEL: 'fcopysign'
15+
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
16+
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
17+
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
18+
; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
19+
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
20+
; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
21+
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
22+
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
23+
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
24+
;
25+
; ALL-SIZE-LABEL: 'fcopysign'
26+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
27+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
28+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
29+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
30+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
31+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
32+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
33+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
34+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
35+
;
36+
%F32 = call float @llvm.copysign.f32(float undef, float undef)
37+
%V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
38+
%V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
39+
%V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
40+
41+
%F64 = call double @llvm.copysign.f64(double undef, double undef)
42+
%V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
43+
%V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
44+
%V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
45+
46+
ret i32 undef
47+
}
48+
49+
define i32 @fsqrt(i32 %arg) {
50+
; ALL-LABEL: 'fsqrt'
51+
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
52+
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
53+
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
54+
; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
55+
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
56+
; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
57+
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
58+
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
59+
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
60+
;
61+
; ALL-SIZE-LABEL: 'fsqrt'
62+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
63+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
64+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
65+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
66+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
67+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
68+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
69+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
70+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
71+
;
72+
%F32 = call float @llvm.sqrt.f32(float undef)
73+
%V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
74+
%V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
75+
%V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
76+
77+
%F64 = call double @llvm.sqrt.f64(double undef)
78+
%V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
79+
%V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
80+
%V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
81+
82+
ret i32 undef
83+
}
84+
85+
declare float @llvm.copysign.f32(float, float)
86+
declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
87+
declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
88+
declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
89+
90+
declare double @llvm.copysign.f64(double, double)
91+
declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
92+
declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
93+
declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>)
94+
95+
declare float @llvm.sqrt.f32(float)
96+
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
97+
declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
98+
declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
99+
100+
declare double @llvm.sqrt.f64(double)
101+
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
102+
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
103+
declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)

0 commit comments

Comments
 (0)