@@ -328,6 +328,72 @@ define <8 x float> @fadd_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
328
328
ret <8 x float > %r
329
329
}
330
330
331
+ define <8 x double > @fadd_v8f64_cast_cond (i8 noundef zeroext %pb , <8 x double > noundef %x , <8 x double > noundef %y ) {
332
+ ; AVX2-LABEL: fadd_v8f64_cast_cond:
333
+ ; AVX2: # %bb.0:
334
+ ; AVX2-NEXT: movl %edi, %eax
335
+ ; AVX2-NEXT: shrb %al
336
+ ; AVX2-NEXT: andb $1, %al
337
+ ; AVX2-NEXT: movzbl %al, %eax
338
+ ; AVX2-NEXT: movl %edi, %ecx
339
+ ; AVX2-NEXT: andb $1, %cl
340
+ ; AVX2-NEXT: movzbl %cl, %ecx
341
+ ; AVX2-NEXT: vmovd %ecx, %xmm4
342
+ ; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
343
+ ; AVX2-NEXT: movl %edi, %eax
344
+ ; AVX2-NEXT: shrb $2, %al
345
+ ; AVX2-NEXT: andb $1, %al
346
+ ; AVX2-NEXT: movzbl %al, %eax
347
+ ; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
348
+ ; AVX2-NEXT: movl %edi, %eax
349
+ ; AVX2-NEXT: shrb $3, %al
350
+ ; AVX2-NEXT: andb $1, %al
351
+ ; AVX2-NEXT: movzbl %al, %eax
352
+ ; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
353
+ ; AVX2-NEXT: movl %edi, %eax
354
+ ; AVX2-NEXT: shrb $4, %al
355
+ ; AVX2-NEXT: andb $1, %al
356
+ ; AVX2-NEXT: movzbl %al, %eax
357
+ ; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
358
+ ; AVX2-NEXT: movl %edi, %eax
359
+ ; AVX2-NEXT: shrb $5, %al
360
+ ; AVX2-NEXT: andb $1, %al
361
+ ; AVX2-NEXT: movzbl %al, %eax
362
+ ; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
363
+ ; AVX2-NEXT: movl %edi, %eax
364
+ ; AVX2-NEXT: shrb $6, %al
365
+ ; AVX2-NEXT: andb $1, %al
366
+ ; AVX2-NEXT: movzbl %al, %eax
367
+ ; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
368
+ ; AVX2-NEXT: shrb $7, %dil
369
+ ; AVX2-NEXT: movzbl %dil, %eax
370
+ ; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
371
+ ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
372
+ ; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
373
+ ; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
374
+ ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
375
+ ; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
376
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
377
+ ; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
378
+ ; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
379
+ ; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
380
+ ; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
381
+ ; AVX2-NEXT: vaddpd %ymm3, %ymm1, %ymm1
382
+ ; AVX2-NEXT: retq
383
+ ;
384
+ ; AVX512-LABEL: fadd_v8f64_cast_cond:
385
+ ; AVX512: # %bb.0:
386
+ ; AVX512-NEXT: kmovw %edi, %k1
387
+ ; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
388
+ ; AVX512-NEXT: vmovapd %zmm1, %zmm2 {%k1}
389
+ ; AVX512-NEXT: vaddpd %zmm2, %zmm0, %zmm0
390
+ ; AVX512-NEXT: retq
391
+ %b = bitcast i8 %pb to <8 x i1 >
392
+ %s = select <8 x i1 > %b , <8 x double > %y , <8 x double > <double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 >
393
+ %r = fadd <8 x double > %x , %s
394
+ ret <8 x double > %r
395
+ }
396
+
331
397
define <8 x float > @fsub_v8f32_cast_cond (i8 noundef zeroext %pb , <8 x float > noundef %x , <8 x float > noundef %y ) {
332
398
; AVX2-LABEL: fsub_v8f32_cast_cond:
333
399
; AVX2: # %bb.0:
@@ -399,3 +465,69 @@ define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
399
465
%r = fsub <8 x float > %x , %s
400
466
ret <8 x float > %r
401
467
}
468
+
469
+ define <8 x double > @fsub_v8f64_cast_cond (i8 noundef zeroext %pb , <8 x double > noundef %x , <8 x double > noundef %y ) {
470
+ ; AVX2-LABEL: fsub_v8f64_cast_cond:
471
+ ; AVX2: # %bb.0:
472
+ ; AVX2-NEXT: movl %edi, %eax
473
+ ; AVX2-NEXT: shrb %al
474
+ ; AVX2-NEXT: andb $1, %al
475
+ ; AVX2-NEXT: movzbl %al, %eax
476
+ ; AVX2-NEXT: movl %edi, %ecx
477
+ ; AVX2-NEXT: andb $1, %cl
478
+ ; AVX2-NEXT: movzbl %cl, %ecx
479
+ ; AVX2-NEXT: vmovd %ecx, %xmm4
480
+ ; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
481
+ ; AVX2-NEXT: movl %edi, %eax
482
+ ; AVX2-NEXT: shrb $2, %al
483
+ ; AVX2-NEXT: andb $1, %al
484
+ ; AVX2-NEXT: movzbl %al, %eax
485
+ ; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
486
+ ; AVX2-NEXT: movl %edi, %eax
487
+ ; AVX2-NEXT: shrb $3, %al
488
+ ; AVX2-NEXT: andb $1, %al
489
+ ; AVX2-NEXT: movzbl %al, %eax
490
+ ; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
491
+ ; AVX2-NEXT: movl %edi, %eax
492
+ ; AVX2-NEXT: shrb $4, %al
493
+ ; AVX2-NEXT: andb $1, %al
494
+ ; AVX2-NEXT: movzbl %al, %eax
495
+ ; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
496
+ ; AVX2-NEXT: movl %edi, %eax
497
+ ; AVX2-NEXT: shrb $5, %al
498
+ ; AVX2-NEXT: andb $1, %al
499
+ ; AVX2-NEXT: movzbl %al, %eax
500
+ ; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
501
+ ; AVX2-NEXT: movl %edi, %eax
502
+ ; AVX2-NEXT: shrb $6, %al
503
+ ; AVX2-NEXT: andb $1, %al
504
+ ; AVX2-NEXT: movzbl %al, %eax
505
+ ; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
506
+ ; AVX2-NEXT: shrb $7, %dil
507
+ ; AVX2-NEXT: movzbl %dil, %eax
508
+ ; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
509
+ ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
510
+ ; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
511
+ ; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
512
+ ; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
513
+ ; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3
514
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
515
+ ; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
516
+ ; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
517
+ ; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
518
+ ; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2
519
+ ; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0
520
+ ; AVX2-NEXT: vsubpd %ymm3, %ymm1, %ymm1
521
+ ; AVX2-NEXT: retq
522
+ ;
523
+ ; AVX512-LABEL: fsub_v8f64_cast_cond:
524
+ ; AVX512: # %bb.0:
525
+ ; AVX512-NEXT: kmovw %edi, %k1
526
+ ; AVX512-NEXT: vmovapd %zmm1, %zmm1 {%k1} {z}
527
+ ; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0
528
+ ; AVX512-NEXT: retq
529
+ %b = bitcast i8 %pb to <8 x i1 >
530
+ %s = select <8 x i1 > %b , <8 x double > %y , <8 x double > zeroinitializer
531
+ %r = fsub <8 x double > %x , %s
532
+ ret <8 x double > %r
533
+ }
0 commit comments