@@ -543,6 +543,104 @@ define void @build_v4i16_01zz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3)
543
543
ret void
544
544
}
545
545
546
+ define void @build_v4i16_0uuz (x86_mmx *%p0 , i16 %a0 , i16 %a1 , i16 %a2 , i16 %a3 ) nounwind {
547
+ ; X86-MMX-LABEL: build_v4i16_0uuz:
548
+ ; X86-MMX: # %bb.0:
549
+ ; X86-MMX-NEXT: pushl %ebp
550
+ ; X86-MMX-NEXT: movl %esp, %ebp
551
+ ; X86-MMX-NEXT: andl $-8, %esp
552
+ ; X86-MMX-NEXT: subl $8, %esp
553
+ ; X86-MMX-NEXT: movl 8(%ebp), %eax
554
+ ; X86-MMX-NEXT: movzwl %ax, %ecx
555
+ ; X86-MMX-NEXT: movl %ecx, {{[0-9]+}}(%esp)
556
+ ; X86-MMX-NEXT: shll $16, %ecx
557
+ ; X86-MMX-NEXT: movzwl 12(%ebp), %edx
558
+ ; X86-MMX-NEXT: orl %ecx, %edx
559
+ ; X86-MMX-NEXT: movl %edx, (%esp)
560
+ ; X86-MMX-NEXT: movq (%esp), %mm0
561
+ ; X86-MMX-NEXT: paddd %mm0, %mm0
562
+ ; X86-MMX-NEXT: movq %mm0, (%eax)
563
+ ; X86-MMX-NEXT: movl %ebp, %esp
564
+ ; X86-MMX-NEXT: popl %ebp
565
+ ; X86-MMX-NEXT: retl
566
+ ;
567
+ ; X86-SSE2-LABEL: build_v4i16_0uuz:
568
+ ; X86-SSE2: # %bb.0:
569
+ ; X86-SSE2-NEXT: pushl %ebp
570
+ ; X86-SSE2-NEXT: movl %esp, %ebp
571
+ ; X86-SSE2-NEXT: andl $-8, %esp
572
+ ; X86-SSE2-NEXT: subl $8, %esp
573
+ ; X86-SSE2-NEXT: movl 8(%ebp), %eax
574
+ ; X86-SSE2-NEXT: pxor %xmm0, %xmm0
575
+ ; X86-SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
576
+ ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
577
+ ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
578
+ ; X86-SSE2-NEXT: movq %xmm1, (%esp)
579
+ ; X86-SSE2-NEXT: movq (%esp), %mm0
580
+ ; X86-SSE2-NEXT: paddd %mm0, %mm0
581
+ ; X86-SSE2-NEXT: movq %mm0, (%eax)
582
+ ; X86-SSE2-NEXT: movl %ebp, %esp
583
+ ; X86-SSE2-NEXT: popl %ebp
584
+ ; X86-SSE2-NEXT: retl
585
+ ;
586
+ ; X86-SSSE3-LABEL: build_v4i16_0uuz:
587
+ ; X86-SSSE3: # %bb.0:
588
+ ; X86-SSSE3-NEXT: pushl %ebp
589
+ ; X86-SSSE3-NEXT: movl %esp, %ebp
590
+ ; X86-SSSE3-NEXT: andl $-8, %esp
591
+ ; X86-SSSE3-NEXT: subl $8, %esp
592
+ ; X86-SSSE3-NEXT: movl 8(%ebp), %eax
593
+ ; X86-SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
594
+ ; X86-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
595
+ ; X86-SSSE3-NEXT: movq %xmm0, (%esp)
596
+ ; X86-SSSE3-NEXT: movq (%esp), %mm0
597
+ ; X86-SSSE3-NEXT: paddd %mm0, %mm0
598
+ ; X86-SSSE3-NEXT: movq %mm0, (%eax)
599
+ ; X86-SSSE3-NEXT: movl %ebp, %esp
600
+ ; X86-SSSE3-NEXT: popl %ebp
601
+ ; X86-SSSE3-NEXT: retl
602
+ ;
603
+ ; X64-SSE2-LABEL: build_v4i16_0uuz:
604
+ ; X64-SSE2: # %bb.0:
605
+ ; X64-SSE2-NEXT: movd %esi, %xmm0
606
+ ; X64-SSE2-NEXT: pxor %xmm1, %xmm1
607
+ ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
608
+ ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
609
+ ; X64-SSE2-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
610
+ ; X64-SSE2-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
611
+ ; X64-SSE2-NEXT: paddd %mm0, %mm0
612
+ ; X64-SSE2-NEXT: movq %mm0, (%rdi)
613
+ ; X64-SSE2-NEXT: retq
614
+ ;
615
+ ; X64-SSSE3-LABEL: build_v4i16_0uuz:
616
+ ; X64-SSSE3: # %bb.0:
617
+ ; X64-SSSE3-NEXT: movd %esi, %xmm0
618
+ ; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
619
+ ; X64-SSSE3-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
620
+ ; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
621
+ ; X64-SSSE3-NEXT: paddd %mm0, %mm0
622
+ ; X64-SSSE3-NEXT: movq %mm0, (%rdi)
623
+ ; X64-SSSE3-NEXT: retq
624
+ ;
625
+ ; X64-AVX-LABEL: build_v4i16_0uuz:
626
+ ; X64-AVX: # %bb.0:
627
+ ; X64-AVX-NEXT: vmovd %esi, %xmm0
628
+ ; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
629
+ ; X64-AVX-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp)
630
+ ; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
631
+ ; X64-AVX-NEXT: paddd %mm0, %mm0
632
+ ; X64-AVX-NEXT: movq %mm0, (%rdi)
633
+ ; X64-AVX-NEXT: retq
634
+ %1 = insertelement <4 x i16 > undef , i16 %a0 , i32 0
635
+ %2 = insertelement <4 x i16 > %1 , i16 undef , i32 1
636
+ %3 = insertelement <4 x i16 > %2 , i16 undef , i32 2
637
+ %4 = insertelement <4 x i16 > %3 , i16 0 , i32 3
638
+ %5 = bitcast <4 x i16 > %4 to x86_mmx
639
+ %6 = tail call x86_mmx @llvm.x86.mmx.padd.d (x86_mmx %5 , x86_mmx %5 )
640
+ store x86_mmx %6 , x86_mmx *%p0
641
+ ret void
642
+ }
643
+
546
644
define void @build_v4i16_0zuz (x86_mmx *%p0 , i16 %a0 , i16 %a1 , i16 %a2 , i16 %a3 ) nounwind {
547
645
; X86-MMX-LABEL: build_v4i16_0zuz:
548
646
; X86-MMX: # %bb.0:
@@ -1295,6 +1393,112 @@ define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i
1295
1393
ret void
1296
1394
}
1297
1395
1396
+ define void @build_v8i8_0uuuuzzz (x86_mmx *%p0 , i8 %a0 , i8 %a1 , i8 %a2 , i8 %a3 , i8 %a4 , i8 %a5 , i8 %a6 , i8 %a7 ) nounwind {
1397
+ ; X86-MMX-LABEL: build_v8i8_0uuuuzzz:
1398
+ ; X86-MMX: # %bb.0:
1399
+ ; X86-MMX-NEXT: pushl %ebp
1400
+ ; X86-MMX-NEXT: movl %esp, %ebp
1401
+ ; X86-MMX-NEXT: andl $-8, %esp
1402
+ ; X86-MMX-NEXT: subl $8, %esp
1403
+ ; X86-MMX-NEXT: movl 8(%ebp), %eax
1404
+ ; X86-MMX-NEXT: movzbl 12(%ebp), %ecx
1405
+ ; X86-MMX-NEXT: movl %ecx, (%esp)
1406
+ ; X86-MMX-NEXT: movl $0, {{[0-9]+}}(%esp)
1407
+ ; X86-MMX-NEXT: movq (%esp), %mm0
1408
+ ; X86-MMX-NEXT: paddd %mm0, %mm0
1409
+ ; X86-MMX-NEXT: movq %mm0, (%eax)
1410
+ ; X86-MMX-NEXT: movl %ebp, %esp
1411
+ ; X86-MMX-NEXT: popl %ebp
1412
+ ; X86-MMX-NEXT: retl
1413
+ ;
1414
+ ; X86-SSE2-LABEL: build_v8i8_0uuuuzzz:
1415
+ ; X86-SSE2: # %bb.0:
1416
+ ; X86-SSE2-NEXT: pushl %ebp
1417
+ ; X86-SSE2-NEXT: movl %esp, %ebp
1418
+ ; X86-SSE2-NEXT: andl $-8, %esp
1419
+ ; X86-SSE2-NEXT: subl $8, %esp
1420
+ ; X86-SSE2-NEXT: movl 8(%ebp), %eax
1421
+ ; X86-SSE2-NEXT: movzbl 12(%ebp), %ecx
1422
+ ; X86-SSE2-NEXT: movd %ecx, %xmm0
1423
+ ; X86-SSE2-NEXT: pxor %xmm1, %xmm1
1424
+ ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1425
+ ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1426
+ ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1427
+ ; X86-SSE2-NEXT: movq %xmm0, (%esp)
1428
+ ; X86-SSE2-NEXT: movq (%esp), %mm0
1429
+ ; X86-SSE2-NEXT: paddd %mm0, %mm0
1430
+ ; X86-SSE2-NEXT: movq %mm0, (%eax)
1431
+ ; X86-SSE2-NEXT: movl %ebp, %esp
1432
+ ; X86-SSE2-NEXT: popl %ebp
1433
+ ; X86-SSE2-NEXT: retl
1434
+ ;
1435
+ ; X86-SSSE3-LABEL: build_v8i8_0uuuuzzz:
1436
+ ; X86-SSSE3: # %bb.0:
1437
+ ; X86-SSSE3-NEXT: pushl %ebp
1438
+ ; X86-SSSE3-NEXT: movl %esp, %ebp
1439
+ ; X86-SSSE3-NEXT: andl $-8, %esp
1440
+ ; X86-SSSE3-NEXT: subl $8, %esp
1441
+ ; X86-SSSE3-NEXT: movl 8(%ebp), %eax
1442
+ ; X86-SSSE3-NEXT: movzbl 12(%ebp), %ecx
1443
+ ; X86-SSSE3-NEXT: movd %ecx, %xmm0
1444
+ ; X86-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1445
+ ; X86-SSSE3-NEXT: movq %xmm0, (%esp)
1446
+ ; X86-SSSE3-NEXT: movq (%esp), %mm0
1447
+ ; X86-SSSE3-NEXT: paddd %mm0, %mm0
1448
+ ; X86-SSSE3-NEXT: movq %mm0, (%eax)
1449
+ ; X86-SSSE3-NEXT: movl %ebp, %esp
1450
+ ; X86-SSSE3-NEXT: popl %ebp
1451
+ ; X86-SSSE3-NEXT: retl
1452
+ ;
1453
+ ; X64-SSE2-LABEL: build_v8i8_0uuuuzzz:
1454
+ ; X64-SSE2: # %bb.0:
1455
+ ; X64-SSE2-NEXT: movzwl %si, %eax
1456
+ ; X64-SSE2-NEXT: movd %eax, %xmm0
1457
+ ; X64-SSE2-NEXT: pxor %xmm1, %xmm1
1458
+ ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1459
+ ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1460
+ ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1461
+ ; X64-SSE2-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
1462
+ ; X64-SSE2-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
1463
+ ; X64-SSE2-NEXT: paddd %mm0, %mm0
1464
+ ; X64-SSE2-NEXT: movq %mm0, (%rdi)
1465
+ ; X64-SSE2-NEXT: retq
1466
+ ;
1467
+ ; X64-SSSE3-LABEL: build_v8i8_0uuuuzzz:
1468
+ ; X64-SSSE3: # %bb.0:
1469
+ ; X64-SSSE3-NEXT: movzwl %si, %eax
1470
+ ; X64-SSSE3-NEXT: movd %eax, %xmm0
1471
+ ; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1472
+ ; X64-SSSE3-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
1473
+ ; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
1474
+ ; X64-SSSE3-NEXT: paddd %mm0, %mm0
1475
+ ; X64-SSSE3-NEXT: movq %mm0, (%rdi)
1476
+ ; X64-SSSE3-NEXT: retq
1477
+ ;
1478
+ ; X64-AVX-LABEL: build_v8i8_0uuuuzzz:
1479
+ ; X64-AVX: # %bb.0:
1480
+ ; X64-AVX-NEXT: movzwl %si, %eax
1481
+ ; X64-AVX-NEXT: vmovd %eax, %xmm0
1482
+ ; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1483
+ ; X64-AVX-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp)
1484
+ ; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
1485
+ ; X64-AVX-NEXT: paddd %mm0, %mm0
1486
+ ; X64-AVX-NEXT: movq %mm0, (%rdi)
1487
+ ; X64-AVX-NEXT: retq
1488
+ %1 = insertelement <8 x i8 > undef , i8 %a0 , i32 0
1489
+ %2 = insertelement <8 x i8 > %1 , i8 undef , i32 1
1490
+ %3 = insertelement <8 x i8 > %2 , i8 undef , i32 2
1491
+ %4 = insertelement <8 x i8 > %3 , i8 undef , i32 3
1492
+ %5 = insertelement <8 x i8 > %4 , i8 undef , i32 4
1493
+ %6 = insertelement <8 x i8 > %5 , i8 0 , i32 5
1494
+ %7 = insertelement <8 x i8 > %6 , i8 0 , i32 6
1495
+ %8 = insertelement <8 x i8 > %7 , i8 0 , i32 7
1496
+ %9 = bitcast <8 x i8 > %8 to x86_mmx
1497
+ %10 = tail call x86_mmx @llvm.x86.mmx.padd.d (x86_mmx %9 , x86_mmx %9 )
1498
+ store x86_mmx %10 , x86_mmx *%p0
1499
+ ret void
1500
+ }
1501
+
1298
1502
define void @build_v8i8_0zzzzzzu (x86_mmx *%p0 , i8 %a0 , i8 %a1 , i8 %a2 , i8 %a3 , i8 %a4 , i8 %a5 , i8 %a6 , i8 %a7 ) nounwind {
1299
1503
; X86-MMX-LABEL: build_v8i8_0zzzzzzu:
1300
1504
; X86-MMX: # %bb.0:
0 commit comments