@@ -15,7 +15,7 @@ define amdgpu_kernel void @add_i32() #0 {
15
15
; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
16
16
; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
17
17
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
18
- ; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
18
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
19
19
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
20
20
;
21
21
; ALL-SIZE-LABEL: 'add_i32'
@@ -27,7 +27,7 @@ define amdgpu_kernel void @add_i32() #0 {
27
27
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
28
28
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
29
29
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
30
- ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
30
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
31
31
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
32
32
;
33
33
%i32 = add i32 undef , undef
@@ -38,7 +38,7 @@ define amdgpu_kernel void @add_i32() #0 {
38
38
%v6i32 = add <6 x i32 > undef , undef
39
39
%v7i32 = add <7 x i32 > undef , undef
40
40
%v8i32 = add <8 x i32 > undef , undef
41
- %v32i32 = add <32 x i32 > undef , undef
41
+ %v9i32 = add <9 x i32 > undef , undef
42
42
ret void
43
43
}
44
44
@@ -48,34 +48,22 @@ define amdgpu_kernel void @add_i64() #0 {
48
48
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
49
49
; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
50
50
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
51
- ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
52
- ; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
53
- ; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
54
- ; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
55
- ; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
51
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
56
52
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
57
53
;
58
54
; ALL-SIZE-LABEL: 'add_i64'
59
55
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = add i64 undef, undef
60
56
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
61
57
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
62
58
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
63
- ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
64
- ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
65
- ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
66
- ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
67
- ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
59
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
68
60
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
69
61
;
70
62
%i64 = add i64 undef , undef
71
63
%v2i64 = add <2 x i64 > undef , undef
72
64
%v3i64 = add <3 x i64 > undef , undef
73
65
%v4i64 = add <4 x i64 > undef , undef
74
66
%v5i64 = add <5 x i64 > undef , undef
75
- %v6i64 = add <6 x i64 > undef , undef
76
- %v7i64 = add <7 x i64 > undef , undef
77
- %v8i64 = add <8 x i64 > undef , undef
78
- %v16i64 = add <16 x i64 > undef , undef
79
67
ret void
80
68
}
81
69
@@ -87,6 +75,8 @@ define amdgpu_kernel void @add_i16() #0 {
87
75
; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
88
76
; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
89
77
; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
78
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
79
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
90
80
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
91
81
;
92
82
; SLOW16-LABEL: 'add_i16'
@@ -96,6 +86,8 @@ define amdgpu_kernel void @add_i16() #0 {
96
86
; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
97
87
; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
98
88
; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
89
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
90
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
99
91
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
100
92
;
101
93
; FAST16-SIZE-LABEL: 'add_i16'
@@ -105,6 +97,8 @@ define amdgpu_kernel void @add_i16() #0 {
105
97
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
106
98
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
107
99
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
100
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
101
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
108
102
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
109
103
;
110
104
; SLOW16-SIZE-LABEL: 'add_i16'
@@ -114,6 +108,8 @@ define amdgpu_kernel void @add_i16() #0 {
114
108
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
115
109
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
116
110
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
111
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
112
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
117
113
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
118
114
;
119
115
%i16 = add i16 undef , undef
@@ -122,6 +118,8 @@ define amdgpu_kernel void @add_i16() #0 {
122
118
%v4i16 = add <4 x i16 > undef , undef
123
119
%v5i16 = add <5 x i16 > undef , undef
124
120
%v6i16 = add <6 x i16 > undef , undef
121
+ %v16i16 = add <16 x i16 > undef , undef
122
+ %v17i16 = add <17 x i16 > undef , undef
125
123
ret void
126
124
}
127
125
@@ -133,6 +131,8 @@ define amdgpu_kernel void @add_i8() #0 {
133
131
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
134
132
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
135
133
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
134
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
135
+ ; ALL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
136
136
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
137
137
;
138
138
; ALL-SIZE-LABEL: 'add_i8'
@@ -142,6 +142,8 @@ define amdgpu_kernel void @add_i8() #0 {
142
142
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
143
143
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
144
144
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
145
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
146
+ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
145
147
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
146
148
;
147
149
%i8 = add i8 undef , undef
@@ -150,12 +152,14 @@ define amdgpu_kernel void @add_i8() #0 {
150
152
%v4i8 = add <4 x i8 > undef , undef
151
153
%v5i8 = add <5 x i8 > undef , undef
152
154
%v6i8 = add <6 x i8 > undef , undef
155
+ %v32i8 = add <32 x i8 > undef , undef
156
+ %v33i8 = add <33 x i8 > undef , undef
153
157
ret void
154
158
}
155
159
156
160
define amdgpu_kernel void @sub () #0 {
157
161
; FAST16-LABEL: 'sub'
158
- ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
162
+ ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
159
163
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
160
164
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
161
165
; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -165,7 +169,7 @@ define amdgpu_kernel void @sub() #0 {
165
169
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
166
170
;
167
171
; SLOW16-LABEL: 'sub'
168
- ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
172
+ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
169
173
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
170
174
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
171
175
; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -175,7 +179,7 @@ define amdgpu_kernel void @sub() #0 {
175
179
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
176
180
;
177
181
; FAST16-SIZE-LABEL: 'sub'
178
- ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
182
+ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
179
183
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
180
184
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
181
185
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -185,7 +189,7 @@ define amdgpu_kernel void @sub() #0 {
185
189
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
186
190
;
187
191
; SLOW16-SIZE-LABEL: 'sub'
188
- ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
192
+ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
189
193
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
190
194
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
191
195
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -194,7 +198,7 @@ define amdgpu_kernel void @sub() #0 {
194
198
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
195
199
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
196
200
;
197
- %i8 = sub i16 undef , undef
201
+ %i8 = sub i8 undef , undef
198
202
%i16 = sub i16 undef , undef
199
203
%i32 = sub i32 undef , undef
200
204
%i64 = sub i64 undef , undef
0 commit comments