@@ -17,6 +17,14 @@ declare <5 x float> @llvm.sin.v5f32(<5 x float>)
17
17
declare <6 x float > @llvm.sin.v6f32 (<6 x float >)
18
18
declare <3 x double > @llvm.sin.v3f64 (<3 x double >)
19
19
20
+ declare <1 x float > @llvm.tan.v1f32 (<1 x float >)
21
+ declare <2 x float > @llvm.tan.v2f32 (<2 x float >)
22
+ declare <3 x float > @llvm.tan.v3f32 (<3 x float >)
23
+ declare <4 x float > @llvm.tan.v4f32 (<4 x float >)
24
+ declare <5 x float > @llvm.tan.v5f32 (<5 x float >)
25
+ declare <6 x float > @llvm.tan.v6f32 (<6 x float >)
26
+ declare <3 x double > @llvm.tan.v3f64 (<3 x double >)
27
+
20
28
; Verify that all of the potential libcall candidates are handled.
21
29
; Some of these have custom lowering, so those cases won't have
22
30
; libcalls.
@@ -230,6 +238,200 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
230
238
ret <3 x double > %r
231
239
}
232
240
241
+ define <1 x float > @tan_v1f32 (<1 x float > %x ) nounwind {
242
+ ; CHECK-LABEL: tan_v1f32:
243
+ ; CHECK: # %bb.0:
244
+ ; CHECK-NEXT: pushq %rax
245
+ ; CHECK-NEXT: callq tanf@PLT
246
+ ; CHECK-NEXT: popq %rax
247
+ ; CHECK-NEXT: retq
248
+ %r = call <1 x float > @llvm.tan.v1f32 (<1 x float > %x )
249
+ ret <1 x float > %r
250
+ }
251
+
252
+ define <2 x float > @tan_v2f32 (<2 x float > %x ) nounwind {
253
+ ; CHECK-LABEL: tan_v2f32:
254
+ ; CHECK: # %bb.0:
255
+ ; CHECK-NEXT: subq $40, %rsp
256
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
257
+ ; CHECK-NEXT: callq tanf@PLT
258
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
259
+ ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
260
+ ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
261
+ ; CHECK-NEXT: callq tanf@PLT
262
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
263
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
264
+ ; CHECK-NEXT: addq $40, %rsp
265
+ ; CHECK-NEXT: retq
266
+ %r = call <2 x float > @llvm.tan.v2f32 (<2 x float > %x )
267
+ ret <2 x float > %r
268
+ }
269
+
270
+ define <3 x float > @tan_v3f32 (<3 x float > %x ) nounwind {
271
+ ; CHECK-LABEL: tan_v3f32:
272
+ ; CHECK: # %bb.0:
273
+ ; CHECK-NEXT: subq $40, %rsp
274
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
275
+ ; CHECK-NEXT: callq tanf@PLT
276
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
277
+ ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
278
+ ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
279
+ ; CHECK-NEXT: callq tanf@PLT
280
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
281
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
282
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
283
+ ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
284
+ ; CHECK-NEXT: # xmm0 = mem[1,0]
285
+ ; CHECK-NEXT: callq tanf@PLT
286
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
287
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
288
+ ; CHECK-NEXT: addq $40, %rsp
289
+ ; CHECK-NEXT: retq
290
+ %r = call <3 x float > @llvm.tan.v3f32 (<3 x float > %x )
291
+ ret <3 x float > %r
292
+ }
293
+
294
+ define <4 x float > @tan_v4f32 (<4 x float > %x ) nounwind {
295
+ ; CHECK-LABEL: tan_v4f32:
296
+ ; CHECK: # %bb.0:
297
+ ; CHECK-NEXT: subq $40, %rsp
298
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
299
+ ; CHECK-NEXT: callq tanf@PLT
300
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
301
+ ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
302
+ ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
303
+ ; CHECK-NEXT: callq tanf@PLT
304
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
305
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
306
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
307
+ ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
308
+ ; CHECK-NEXT: # xmm0 = mem[1,0]
309
+ ; CHECK-NEXT: callq tanf@PLT
310
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
311
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
312
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
313
+ ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
314
+ ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
315
+ ; CHECK-NEXT: callq tanf@PLT
316
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
317
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
318
+ ; CHECK-NEXT: addq $40, %rsp
319
+ ; CHECK-NEXT: retq
320
+ %r = call <4 x float > @llvm.tan.v4f32 (<4 x float > %x )
321
+ ret <4 x float > %r
322
+ }
323
+
324
+ define <5 x float > @tan_v5f32 (<5 x float > %x ) nounwind {
325
+ ; CHECK-LABEL: tan_v5f32:
326
+ ; CHECK: # %bb.0:
327
+ ; CHECK-NEXT: subq $72, %rsp
328
+ ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
329
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
330
+ ; CHECK-NEXT: vzeroupper
331
+ ; CHECK-NEXT: callq tanf@PLT
332
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
333
+ ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
334
+ ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
335
+ ; CHECK-NEXT: callq tanf@PLT
336
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
337
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
338
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
339
+ ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
340
+ ; CHECK-NEXT: # xmm0 = mem[1,0]
341
+ ; CHECK-NEXT: callq tanf@PLT
342
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
343
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
344
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
345
+ ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
346
+ ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
347
+ ; CHECK-NEXT: callq tanf@PLT
348
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
349
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
350
+ ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
351
+ ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
352
+ ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
353
+ ; CHECK-NEXT: vzeroupper
354
+ ; CHECK-NEXT: callq tanf@PLT
355
+ ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
356
+ ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
357
+ ; CHECK-NEXT: addq $72, %rsp
358
+ ; CHECK-NEXT: retq
359
+ %r = call <5 x float > @llvm.tan.v5f32 (<5 x float > %x )
360
+ ret <5 x float > %r
361
+ }
362
+
363
+ define <6 x float > @tan_v6f32 (<6 x float > %x ) nounwind {
364
+ ; CHECK-LABEL: tan_v6f32:
365
+ ; CHECK: # %bb.0:
366
+ ; CHECK-NEXT: subq $72, %rsp
367
+ ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
368
+ ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
369
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
370
+ ; CHECK-NEXT: vzeroupper
371
+ ; CHECK-NEXT: callq tanf@PLT
372
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
373
+ ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
374
+ ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
375
+ ; CHECK-NEXT: callq tanf@PLT
376
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
377
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
378
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
379
+ ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
380
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
381
+ ; CHECK-NEXT: vzeroupper
382
+ ; CHECK-NEXT: callq tanf@PLT
383
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
384
+ ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
385
+ ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
386
+ ; CHECK-NEXT: callq tanf@PLT
387
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
388
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
389
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
390
+ ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
391
+ ; CHECK-NEXT: # xmm0 = mem[1,0]
392
+ ; CHECK-NEXT: callq tanf@PLT
393
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
394
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
395
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
396
+ ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
397
+ ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
398
+ ; CHECK-NEXT: callq tanf@PLT
399
+ ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
400
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
401
+ ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
402
+ ; CHECK-NEXT: addq $72, %rsp
403
+ ; CHECK-NEXT: retq
404
+ %r = call <6 x float > @llvm.tan.v6f32 (<6 x float > %x )
405
+ ret <6 x float > %r
406
+ }
407
+
408
+ define <3 x double > @tan_v3f64 (<3 x double > %x ) nounwind {
409
+ ; CHECK-LABEL: tan_v3f64:
410
+ ; CHECK: # %bb.0:
411
+ ; CHECK-NEXT: subq $72, %rsp
412
+ ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
413
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
414
+ ; CHECK-NEXT: vzeroupper
415
+ ; CHECK-NEXT: callq tan@PLT
416
+ ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
417
+ ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
418
+ ; CHECK-NEXT: # xmm0 = mem[1,0]
419
+ ; CHECK-NEXT: callq tan@PLT
420
+ ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
421
+ ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
422
+ ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
423
+ ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
424
+ ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
425
+ ; CHECK-NEXT: vzeroupper
426
+ ; CHECK-NEXT: callq tan@PLT
427
+ ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
428
+ ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
429
+ ; CHECK-NEXT: addq $72, %rsp
430
+ ; CHECK-NEXT: retq
431
+ %r = call <3 x double > @llvm.tan.v3f64 (<3 x double > %x )
432
+ ret <3 x double > %r
433
+ }
434
+
233
435
define <2 x float > @fabs_v2f32 (<2 x float > %x ) nounwind {
234
436
; CHECK-LABEL: fabs_v2f32:
235
437
; CHECK: # %bb.0:
0 commit comments