@@ -12,6 +12,10 @@ define amdgpu_kernel void @add_i32() #0 {
12
12
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
13
13
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
14
14
; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
15
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
16
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
17
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
18
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
15
19
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
16
20
;
17
21
; ALL-SIZE-LABEL: 'add_i32'
@@ -20,13 +24,21 @@ define amdgpu_kernel void @add_i32() #0 {
20
24
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
21
25
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
22
26
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
27
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
28
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
29
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
30
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
23
31
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
24
32
;
25
33
%i32 = add i32 undef , undef
26
34
%v2i32 = add <2 x i32 > undef , undef
27
35
%v3i32 = add <3 x i32 > undef , undef
28
36
%v4i32 = add <4 x i32 > undef , undef
29
37
%v5i32 = add <5 x i32 > undef , undef
38
+ %v6i32 = add <6 x i32 > undef , undef
39
+ %v7i32 = add <7 x i32 > undef , undef
40
+ %v8i32 = add <8 x i32 > undef , undef
41
+ %v32i32 = add <32 x i32 > undef , undef
30
42
ret void
31
43
}
32
44
@@ -36,6 +48,10 @@ define amdgpu_kernel void @add_i64() #0 {
36
48
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
37
49
; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
38
50
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
51
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
52
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
53
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
54
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
39
55
; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
40
56
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
41
57
;
@@ -44,13 +60,21 @@ define amdgpu_kernel void @add_i64() #0 {
44
60
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
45
61
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
46
62
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
63
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
64
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
65
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
66
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
47
67
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
48
68
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
49
69
;
50
70
%i64 = add i64 undef , undef
51
71
%v2i64 = add <2 x i64 > undef , undef
52
72
%v3i64 = add <3 x i64 > undef , undef
53
73
%v4i64 = add <4 x i64 > undef , undef
74
+ %v5i64 = add <5 x i64 > undef , undef
75
+ %v6i64 = add <6 x i64 > undef , undef
76
+ %v7i64 = add <7 x i64 > undef , undef
77
+ %v8i64 = add <8 x i64 > undef , undef
54
78
%v16i64 = add <16 x i64 > undef , undef
55
79
ret void
56
80
}
@@ -59,61 +83,124 @@ define amdgpu_kernel void @add_i16() #0 {
59
83
; FAST16-LABEL: 'add_i16'
60
84
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
61
85
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef
86
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = add <3 x i16> undef, undef
87
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
88
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
89
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
62
90
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
63
91
;
64
92
; SLOW16-LABEL: 'add_i16'
65
93
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
66
94
; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef
95
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = add <3 x i16> undef, undef
96
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
97
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
98
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
67
99
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
68
100
;
69
101
; FAST16-SIZE-LABEL: 'add_i16'
70
102
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
71
103
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef
104
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = add <3 x i16> undef, undef
105
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
106
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
107
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
72
108
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
73
109
;
74
110
; SLOW16-SIZE-LABEL: 'add_i16'
75
111
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
76
112
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef
113
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = add <3 x i16> undef, undef
114
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
115
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
116
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
77
117
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
78
118
;
79
119
%i16 = add i16 undef , undef
80
120
%v2i16 = add <2 x i16 > undef , undef
121
+ %v3i16 = add <3 x i16 > undef , undef
122
+ %v4i16 = add <4 x i16 > undef , undef
123
+ %v5i16 = add <5 x i16 > undef , undef
124
+ %v6i16 = add <6 x i16 > undef , undef
125
+ ret void
126
+ }
127
+
128
+ define amdgpu_kernel void @add_i8 () #0 {
129
+ ; ALL-LABEL: 'add_i8'
130
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = add i8 undef, undef
131
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = add <2 x i8> undef, undef
132
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = add <3 x i8> undef, undef
133
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
134
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
135
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
136
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
137
+ ;
138
+ ; ALL-SIZE-LABEL: 'add_i8'
139
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = add i8 undef, undef
140
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = add <2 x i8> undef, undef
141
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = add <3 x i8> undef, undef
142
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
143
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
144
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
145
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
146
+ ;
147
+ %i8 = add i8 undef , undef
148
+ %v2i8 = add <2 x i8 > undef , undef
149
+ %v3i8 = add <3 x i8 > undef , undef
150
+ %v4i8 = add <4 x i8 > undef , undef
151
+ %v5i8 = add <5 x i8 > undef , undef
152
+ %v6i8 = add <6 x i8 > undef , undef
81
153
ret void
82
154
}
83
155
84
156
define amdgpu_kernel void @sub () #0 {
85
157
; FAST16-LABEL: 'sub'
158
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
159
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
86
160
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
87
161
; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
88
- ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
89
162
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef
163
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = sub <3 x i16> undef, undef
164
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = sub <4 x i16> undef, undef
90
165
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
91
166
;
92
167
; SLOW16-LABEL: 'sub'
168
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
169
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
93
170
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
94
171
; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
95
- ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
96
172
; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef
173
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = sub <3 x i16> undef, undef
174
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
97
175
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
98
176
;
99
177
; FAST16-SIZE-LABEL: 'sub'
178
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
179
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
100
180
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
101
181
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
102
- ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
103
182
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef
183
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = sub <3 x i16> undef, undef
184
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = sub <4 x i16> undef, undef
104
185
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
105
186
;
106
187
; SLOW16-SIZE-LABEL: 'sub'
188
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
189
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
107
190
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
108
191
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
109
- ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
110
192
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef
193
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = sub <3 x i16> undef, undef
194
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
111
195
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
112
196
;
197
+ %i8 = sub i16 undef , undef
198
+ %i16 = sub i16 undef , undef
113
199
%i32 = sub i32 undef , undef
114
200
%i64 = sub i64 undef , undef
115
- %i16 = sub i16 undef , undef
116
201
%v2i16 = sub <2 x i16 > undef , undef
202
+ %v3i16 = sub <3 x i16 > undef , undef
203
+ %v4i16 = sub <4 x i16 > undef , undef
117
204
ret void
118
205
}
119
206
0 commit comments