@@ -101,4 +101,162 @@ define <8 x half> @h_v8_s8(<8 x i16> %u) #0 {
101
101
ret <8 x half > %v
102
102
}
103
103
104
+ ; int-to-fp conversion of element in lane 0 should apply
105
+ ; cvtf on vector subregister to avoid fpr->gpr trip
106
+ define float @l0_extract_f_v2s (<2 x i32 > %u ) {
107
+ ; CHECK-LABEL: l0_extract_f_v2s:
108
+ ; CHECK: // %bb.0:
109
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
110
+ ; CHECK-NEXT: scvtf s0, s0
111
+ ; CHECK-NEXT: ret
112
+ %i = extractelement <2 x i32 > %u , i64 0
113
+ %f = sitofp i32 %i to float
114
+ ret float %f
115
+ }
116
+
117
+ ; cvtf to use ssub for bottom 32-bits from v2i32
118
+ define float @l0_extract_f_v2u (<2 x i32 > %u ) {
119
+ ; CHECK-LABEL: l0_extract_f_v2u:
120
+ ; CHECK: // %bb.0:
121
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
122
+ ; CHECK-NEXT: ucvtf s0, s0
123
+ ; CHECK-NEXT: ret
124
+ %i = extractelement <2 x i32 > %u , i64 0
125
+ %f = uitofp i32 %i to float
126
+ ret float %f
127
+ }
128
+
129
+ ; Pattern should only apply when it is known to be lane 0
130
+ define float @ln_extract_f_v2s (<2 x i32 > %u , i64 %n ) {
131
+ ; CHECK-LABEL: ln_extract_f_v2s:
132
+ ; CHECK: // %bb.0:
133
+ ; CHECK-NEXT: sub sp, sp, #16
134
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
135
+ ; CHECK-NEXT: add x8, sp, #8
136
+ ; CHECK-NEXT: str d0, [sp, #8]
137
+ ; CHECK-NEXT: bfi x8, x0, #2, #1
138
+ ; CHECK-NEXT: ldr s0, [x8]
139
+ ; CHECK-NEXT: scvtf s0, s0
140
+ ; CHECK-NEXT: add sp, sp, #16
141
+ ; CHECK-NEXT: ret
142
+ %i = extractelement <2 x i32 > %u , i64 %n
143
+ %f = sitofp i32 %i to float
144
+ ret float %f
145
+ }
146
+
147
+ ; cvtf to use ssub for bottom 32-bits from v4i32
148
+ define float @l0_extract_f_v4s (<4 x i32 > %u ) {
149
+ ; CHECK-LABEL: l0_extract_f_v4s:
150
+ ; CHECK: // %bb.0:
151
+ ; CHECK-NEXT: scvtf s0, s0
152
+ ; CHECK-NEXT: ret
153
+ %i = extractelement <4 x i32 > %u , i64 0
154
+ %f = sitofp i32 %i to float
155
+ ret float %f
156
+ }
157
+
158
+ define float @l0_extract_f_v4u (<4 x i32 > %u ) {
159
+ ; CHECK-LABEL: l0_extract_f_v4u:
160
+ ; CHECK: // %bb.0:
161
+ ; CHECK-NEXT: ucvtf s0, s0
162
+ ; CHECK-NEXT: ret
163
+ %i = extractelement <4 x i32 > %u , i64 0
164
+ %f = uitofp i32 %i to float
165
+ ret float %f
166
+ }
167
+
168
+ define float @ln_extract_f_v4s (<4 x i32 > %u , i64 %n ) {
169
+ ; CHECK-LABEL: ln_extract_f_v4s:
170
+ ; CHECK: // %bb.0:
171
+ ; CHECK-NEXT: sub sp, sp, #16
172
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
173
+ ; CHECK-NEXT: mov x8, sp
174
+ ; CHECK-NEXT: str q0, [sp]
175
+ ; CHECK-NEXT: bfi x8, x0, #2, #2
176
+ ; CHECK-NEXT: ldr s0, [x8]
177
+ ; CHECK-NEXT: scvtf s0, s0
178
+ ; CHECK-NEXT: add sp, sp, #16
179
+ ; CHECK-NEXT: ret
180
+ %i = extractelement <4 x i32 > %u , i64 %n
181
+ %f = sitofp i32 %i to float
182
+ ret float %f
183
+ }
184
+
185
+ ; cvtf to use dsub for bottom 64-bits from v2i64
186
+ define double @l0_extract_d_v2s (<2 x i64 > %u ) {
187
+ ; CHECK-LABEL: l0_extract_d_v2s:
188
+ ; CHECK: // %bb.0:
189
+ ; CHECK-NEXT: scvtf d0, d0
190
+ ; CHECK-NEXT: ret
191
+ %i = extractelement <2 x i64 > %u , i64 0
192
+ %f = sitofp i64 %i to double
193
+ ret double %f
194
+ }
195
+
196
+ define double @l0_extract_d_v2u (<2 x i64 > %u ) {
197
+ ; CHECK-LABEL: l0_extract_d_v2u:
198
+ ; CHECK: // %bb.0:
199
+ ; CHECK-NEXT: ucvtf d0, d0
200
+ ; CHECK-NEXT: ret
201
+ %i = extractelement <2 x i64 > %u , i64 0
202
+ %f = uitofp i64 %i to double
203
+ ret double %f
204
+ }
205
+
206
+ define double @ln_extract_d_v2s (<2 x i64 > %u , i64 %n ) {
207
+ ; CHECK-LABEL: ln_extract_d_v2s:
208
+ ; CHECK: // %bb.0:
209
+ ; CHECK-NEXT: sub sp, sp, #16
210
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
211
+ ; CHECK-NEXT: mov x8, sp
212
+ ; CHECK-NEXT: str q0, [sp]
213
+ ; CHECK-NEXT: bfi x8, x0, #3, #1
214
+ ; CHECK-NEXT: ldr d0, [x8]
215
+ ; CHECK-NEXT: scvtf d0, d0
216
+ ; CHECK-NEXT: add sp, sp, #16
217
+ ; CHECK-NEXT: ret
218
+ %i = extractelement <2 x i64 > %u , i64 %n
219
+ %f = sitofp i64 %i to double
220
+ ret double %f
221
+ }
222
+
223
+ ; (fullfp16) cvtf to use hsub for bottom 16-bits from v8i16
224
+ define half @l0_extract_h_v8s (<8 x i16 > %u ) #0 {
225
+ ; CHECK-LABEL: l0_extract_h_v8s:
226
+ ; CHECK: // %bb.0:
227
+ ; CHECK-NEXT: scvtf h0, h0
228
+ ; CHECK-NEXT: ret
229
+ %i = extractelement <8 x i16 > %u , i32 0
230
+ %f = sitofp i16 %i to half
231
+ ret half %f
232
+ }
233
+
234
+ define half @l0_extract_h_v8u (<8 x i16 > %u ) #0 {
235
+ ; CHECK-LABEL: l0_extract_h_v8u:
236
+ ; CHECK: // %bb.0:
237
+ ; CHECK-NEXT: ucvtf h0, h0
238
+ ; CHECK-NEXT: ret
239
+ %i = extractelement <8 x i16 > %u , i32 0
240
+ %f = uitofp i16 %i to half
241
+ ret half %f
242
+ }
243
+
244
+ define half @ln_extract_h_v8u (<8 x i16 > %u , i32 %n ) #0 {
245
+ ; CHECK-LABEL: ln_extract_h_v8u:
246
+ ; CHECK: // %bb.0:
247
+ ; CHECK-NEXT: sub sp, sp, #16
248
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
249
+ ; CHECK-NEXT: mov x8, sp
250
+ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
251
+ ; CHECK-NEXT: str q0, [sp]
252
+ ; CHECK-NEXT: bfi x8, x0, #1, #3
253
+ ; CHECK-NEXT: ldrh w8, [x8]
254
+ ; CHECK-NEXT: ucvtf h0, w8
255
+ ; CHECK-NEXT: add sp, sp, #16
256
+ ; CHECK-NEXT: ret
257
+ %i = extractelement <8 x i16 > %u , i32 %n
258
+ %f = uitofp i16 %i to half
259
+ ret half %f
260
+ }
261
+
104
262
attributes #0 = { "target-features" ="+fullfp16" }
0 commit comments