@@ -397,6 +397,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
397
397
{X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
398
398
{X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst}},
399
399
1 );
400
+ case X86::VMOVAPDZ128rmk:
401
+ case X86::VMOVUPDZ128rmk:
402
+ return FixupConstant ({{X86::VMOVSDZrmk, 1 , 64 , rebuildZeroUpperCst},
403
+ {X86::VMOVDDUPZ128rmk, 1 , 64 , rebuildSplatCst}},
404
+ 3 );
405
+ case X86::VMOVAPDZ128rmkz:
406
+ case X86::VMOVUPDZ128rmkz:
407
+ return FixupConstant ({{X86::VMOVSDZrmkz, 1 , 64 , rebuildZeroUpperCst},
408
+ {X86::VMOVDDUPZ128rmkz, 1 , 64 , rebuildSplatCst}},
409
+ 2 );
410
+ case X86::VMOVAPSZ128rmk:
411
+ case X86::VMOVUPSZ128rmk:
412
+ return FixupConstant ({{X86::VMOVSSZrmk, 1 , 32 , rebuildZeroUpperCst},
413
+ {X86::VBROADCASTSSZ128rmk, 1 , 32 , rebuildSplatCst}},
414
+ 3 );
415
+ case X86::VMOVAPSZ128rmkz:
416
+ case X86::VMOVUPSZ128rmkz:
417
+ return FixupConstant ({{X86::VMOVSSZrmkz, 1 , 32 , rebuildZeroUpperCst},
418
+ {X86::VBROADCASTSSZ128rmkz, 1 , 32 , rebuildSplatCst}},
419
+ 2 );
400
420
case X86::VMOVAPDZ256rm:
401
421
case X86::VMOVAPSZ256rm:
402
422
case X86::VMOVUPDZ256rm:
@@ -406,6 +426,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
406
426
{X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
407
427
{X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst}},
408
428
1 );
429
+ case X86::VMOVAPDZ256rmk:
430
+ case X86::VMOVUPDZ256rmk:
431
+ return FixupConstant ({{X86::VBROADCASTSDZ256rmk, 1 , 64 , rebuildSplatCst}},
432
+ 3 );
433
+ case X86::VMOVAPDZ256rmkz:
434
+ case X86::VMOVUPDZ256rmkz:
435
+ return FixupConstant ({{X86::VBROADCASTSDZ256rmkz, 1 , 64 , rebuildSplatCst}},
436
+ 2 );
437
+ case X86::VMOVAPSZ256rmk:
438
+ case X86::VMOVUPSZ256rmk:
439
+ return FixupConstant (
440
+ {{X86::VBROADCASTSSZ256rmk, 1 , 32 , rebuildSplatCst},
441
+ {X86::VBROADCASTF32X4Z256rmk, 1 , 128 , rebuildSplatCst}},
442
+ 3 );
443
+ case X86::VMOVAPSZ256rmkz:
444
+ case X86::VMOVUPSZ256rmkz:
445
+ return FixupConstant (
446
+ {{X86::VBROADCASTSSZ256rmkz, 1 , 32 , rebuildSplatCst},
447
+ {X86::VBROADCASTF32X4Z256rmkz, 1 , 128 , rebuildSplatCst}},
448
+ 2 );
409
449
case X86::VMOVAPDZrm:
410
450
case X86::VMOVAPSZrm:
411
451
case X86::VMOVUPDZrm:
@@ -415,6 +455,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
415
455
{X86::VBROADCASTF32X4rm, 1 , 128 , rebuildSplatCst},
416
456
{X86::VBROADCASTF64X4rm, 1 , 256 , rebuildSplatCst}},
417
457
1 );
458
+ case X86::VMOVAPDZrmk:
459
+ case X86::VMOVUPDZrmk:
460
+ return FixupConstant ({{X86::VBROADCASTSDZrmk, 1 , 64 , rebuildSplatCst},
461
+ {X86::VBROADCASTF64X4rmk, 1 , 256 , rebuildSplatCst}},
462
+ 3 );
463
+ case X86::VMOVAPDZrmkz:
464
+ case X86::VMOVUPDZrmkz:
465
+ return FixupConstant ({{X86::VBROADCASTSDZrmkz, 1 , 64 , rebuildSplatCst},
466
+ {X86::VBROADCASTF64X4rmkz, 1 , 256 , rebuildSplatCst}},
467
+ 2 );
468
+ case X86::VMOVAPSZrmk:
469
+ case X86::VMOVUPSZrmk:
470
+ return FixupConstant ({{X86::VBROADCASTSSZrmk, 1 , 32 , rebuildSplatCst},
471
+ {X86::VBROADCASTF32X4rmk, 1 , 128 , rebuildSplatCst}},
472
+ 3 );
473
+ case X86::VMOVAPSZrmkz:
474
+ case X86::VMOVUPSZrmkz:
475
+ return FixupConstant ({{X86::VBROADCASTSSZrmkz, 1 , 32 , rebuildSplatCst},
476
+ {X86::VBROADCASTF32X4rmkz, 1 , 128 , rebuildSplatCst}},
477
+ 2 );
418
478
/* Integer Loads */
419
479
case X86::MOVDQArm:
420
480
case X86::MOVDQUrm: {
@@ -510,6 +570,42 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
510
570
{X86::VPMOVZXDQZ128rm, 2 , 32 , rebuildZExtCst}};
511
571
return FixupConstant (Fixups, 1 );
512
572
}
573
+ case X86::VMOVDQA32Z128rmk:
574
+ case X86::VMOVDQU32Z128rmk:
575
+ return FixupConstant ({{X86::VPBROADCASTDZ128rmk, 1 , 32 , rebuildSplatCst},
576
+ {X86::VPMOVSXBDZ128rmk, 4 , 8 , rebuildSExtCst},
577
+ {X86::VPMOVZXBDZ128rmk, 4 , 8 , rebuildZExtCst},
578
+ {X86::VPMOVSXWDZ128rmk, 4 , 16 , rebuildSExtCst},
579
+ {X86::VPMOVZXWDZ128rmk, 4 , 16 , rebuildZExtCst}},
580
+ 3 );
581
+ case X86::VMOVDQA32Z128rmkz:
582
+ case X86::VMOVDQU32Z128rmkz:
583
+ return FixupConstant ({{X86::VPBROADCASTDZ128rmkz, 1 , 32 , rebuildSplatCst},
584
+ {X86::VPMOVSXBDZ128rmkz, 4 , 8 , rebuildSExtCst},
585
+ {X86::VPMOVZXBDZ128rmkz, 4 , 8 , rebuildZExtCst},
586
+ {X86::VPMOVSXWDZ128rmkz, 4 , 16 , rebuildSExtCst},
587
+ {X86::VPMOVZXWDZ128rmkz, 4 , 16 , rebuildZExtCst}},
588
+ 2 );
589
+ case X86::VMOVDQA64Z128rmk:
590
+ case X86::VMOVDQU64Z128rmk:
591
+ return FixupConstant ({{X86::VPMOVSXBQZ128rmk, 2 , 8 , rebuildSExtCst},
592
+ {X86::VPMOVZXBQZ128rmk, 2 , 8 , rebuildZExtCst},
593
+ {X86::VPMOVSXWQZ128rmk, 2 , 16 , rebuildSExtCst},
594
+ {X86::VPMOVZXWQZ128rmk, 2 , 16 , rebuildZExtCst},
595
+ {X86::VPBROADCASTQZ128rmk, 1 , 64 , rebuildSplatCst},
596
+ {X86::VPMOVSXDQZ128rmk, 2 , 32 , rebuildSExtCst},
597
+ {X86::VPMOVZXDQZ128rmk, 2 , 32 , rebuildZExtCst}},
598
+ 3 );
599
+ case X86::VMOVDQA64Z128rmkz:
600
+ case X86::VMOVDQU64Z128rmkz:
601
+ return FixupConstant ({{X86::VPMOVSXBQZ128rmkz, 2 , 8 , rebuildSExtCst},
602
+ {X86::VPMOVZXBQZ128rmkz, 2 , 8 , rebuildZExtCst},
603
+ {X86::VPMOVSXWQZ128rmkz, 2 , 16 , rebuildSExtCst},
604
+ {X86::VPMOVZXWQZ128rmkz, 2 , 16 , rebuildZExtCst},
605
+ {X86::VPBROADCASTQZ128rmkz, 1 , 64 , rebuildSplatCst},
606
+ {X86::VPMOVSXDQZ128rmkz, 2 , 32 , rebuildSExtCst},
607
+ {X86::VPMOVZXDQZ128rmkz, 2 , 32 , rebuildZExtCst}},
608
+ 2 );
513
609
case X86::VMOVDQA32Z256rm:
514
610
case X86::VMOVDQA64Z256rm:
515
611
case X86::VMOVDQU32Z256rm:
@@ -534,6 +630,46 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
534
630
{X86::VPMOVZXDQZ256rm, 4 , 32 , rebuildZExtCst}};
535
631
return FixupConstant (Fixups, 1 );
536
632
}
633
+ case X86::VMOVDQA32Z256rmk:
634
+ case X86::VMOVDQU32Z256rmk:
635
+ return FixupConstant (
636
+ {{X86::VPBROADCASTDZ256rmk, 1 , 32 , rebuildSplatCst},
637
+ {X86::VPMOVSXBDZ256rmk, 8 , 8 , rebuildSExtCst},
638
+ {X86::VPMOVZXBDZ256rmk, 8 , 8 , rebuildZExtCst},
639
+ {X86::VBROADCASTI32X4Z256rmk, 1 , 128 , rebuildSplatCst},
640
+ {X86::VPMOVSXWDZ256rmk, 8 , 16 , rebuildSExtCst},
641
+ {X86::VPMOVZXWDZ256rmk, 8 , 16 , rebuildZExtCst}},
642
+ 3 );
643
+ case X86::VMOVDQA32Z256rmkz:
644
+ case X86::VMOVDQU32Z256rmkz:
645
+ return FixupConstant (
646
+ {{X86::VPBROADCASTDZ256rmkz, 1 , 32 , rebuildSplatCst},
647
+ {X86::VPMOVSXBDZ256rmkz, 8 , 8 , rebuildSExtCst},
648
+ {X86::VPMOVZXBDZ256rmkz, 8 , 8 , rebuildZExtCst},
649
+ {X86::VBROADCASTI32X4Z256rmkz, 1 , 128 , rebuildSplatCst},
650
+ {X86::VPMOVSXWDZ256rmkz, 8 , 16 , rebuildSExtCst},
651
+ {X86::VPMOVZXWDZ256rmkz, 8 , 16 , rebuildZExtCst}},
652
+ 2 );
653
+ case X86::VMOVDQA64Z256rmk:
654
+ case X86::VMOVDQU64Z256rmk:
655
+ return FixupConstant ({{X86::VPMOVSXBQZ256rmk, 4 , 8 , rebuildSExtCst},
656
+ {X86::VPMOVZXBQZ256rmk, 4 , 8 , rebuildZExtCst},
657
+ {X86::VPBROADCASTQZ256rmk, 1 , 64 , rebuildSplatCst},
658
+ {X86::VPMOVSXWQZ256rmk, 4 , 16 , rebuildSExtCst},
659
+ {X86::VPMOVZXWQZ256rmk, 4 , 16 , rebuildZExtCst},
660
+ {X86::VPMOVSXDQZ256rmk, 4 , 32 , rebuildSExtCst},
661
+ {X86::VPMOVZXDQZ256rmk, 4 , 32 , rebuildZExtCst}},
662
+ 3 );
663
+ case X86::VMOVDQA64Z256rmkz:
664
+ case X86::VMOVDQU64Z256rmkz:
665
+ return FixupConstant ({{X86::VPMOVSXBQZ256rmkz, 4 , 8 , rebuildSExtCst},
666
+ {X86::VPMOVZXBQZ256rmkz, 4 , 8 , rebuildZExtCst},
667
+ {X86::VPBROADCASTQZ256rmkz, 1 , 64 , rebuildSplatCst},
668
+ {X86::VPMOVSXWQZ256rmkz, 4 , 16 , rebuildSExtCst},
669
+ {X86::VPMOVZXWQZ256rmkz, 4 , 16 , rebuildZExtCst},
670
+ {X86::VPMOVSXDQZ256rmkz, 4 , 32 , rebuildSExtCst},
671
+ {X86::VPMOVZXDQZ256rmkz, 4 , 32 , rebuildZExtCst}},
672
+ 2 );
537
673
case X86::VMOVDQA32Zrm:
538
674
case X86::VMOVDQA64Zrm:
539
675
case X86::VMOVDQU32Zrm:
@@ -559,41 +695,87 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
559
695
{X86::VPMOVZXDQZrm, 8 , 32 , rebuildZExtCst}};
560
696
return FixupConstant (Fixups, 1 );
561
697
}
698
+ case X86::VMOVDQA32Zrmk:
699
+ case X86::VMOVDQU32Zrmk:
700
+ return FixupConstant ({{X86::VPBROADCASTDZrmk, 1 , 32 , rebuildSplatCst},
701
+ {X86::VBROADCASTI32X4rmk, 1 , 128 , rebuildSplatCst},
702
+ {X86::VPMOVSXBDZrmk, 16 , 8 , rebuildSExtCst},
703
+ {X86::VPMOVZXBDZrmk, 16 , 8 , rebuildZExtCst},
704
+ {X86::VPMOVSXWDZrmk, 16 , 16 , rebuildSExtCst},
705
+ {X86::VPMOVZXWDZrmk, 16 , 16 , rebuildZExtCst}},
706
+ 3 );
707
+ case X86::VMOVDQA32Zrmkz:
708
+ case X86::VMOVDQU32Zrmkz:
709
+ return FixupConstant ({{X86::VPBROADCASTDZrmkz, 1 , 32 , rebuildSplatCst},
710
+ {X86::VBROADCASTI32X4rmkz, 1 , 128 , rebuildSplatCst},
711
+ {X86::VPMOVSXBDZrmkz, 16 , 8 , rebuildSExtCst},
712
+ {X86::VPMOVZXBDZrmkz, 16 , 8 , rebuildZExtCst},
713
+ {X86::VPMOVSXWDZrmkz, 16 , 16 , rebuildSExtCst},
714
+ {X86::VPMOVZXWDZrmkz, 16 , 16 , rebuildZExtCst}},
715
+ 2 );
716
+ case X86::VMOVDQA64Zrmk:
717
+ case X86::VMOVDQU64Zrmk:
718
+ return FixupConstant ({{X86::VPBROADCASTQZrmk, 1 , 64 , rebuildSplatCst},
719
+ {X86::VPMOVSXBQZrmk, 8 , 8 , rebuildSExtCst},
720
+ {X86::VPMOVZXBQZrmk, 8 , 8 , rebuildZExtCst},
721
+ {X86::VPMOVSXWQZrmk, 8 , 16 , rebuildSExtCst},
722
+ {X86::VPMOVZXWQZrmk, 8 , 16 , rebuildZExtCst},
723
+ {X86::VBROADCASTI64X4rmk, 1 , 256 , rebuildSplatCst},
724
+ {X86::VPMOVSXDQZrmk, 8 , 32 , rebuildSExtCst},
725
+ {X86::VPMOVZXDQZrmk, 8 , 32 , rebuildZExtCst}},
726
+ 3 );
727
+ case X86::VMOVDQA64Zrmkz:
728
+ case X86::VMOVDQU64Zrmkz:
729
+ return FixupConstant ({{X86::VPBROADCASTQZrmkz, 1 , 64 , rebuildSplatCst},
730
+ {X86::VPMOVSXBQZrmkz, 8 , 8 , rebuildSExtCst},
731
+ {X86::VPMOVZXBQZrmkz, 8 , 8 , rebuildZExtCst},
732
+ {X86::VPMOVSXWQZrmkz, 8 , 16 , rebuildSExtCst},
733
+ {X86::VPMOVZXWQZrmkz, 8 , 16 , rebuildZExtCst},
734
+ {X86::VBROADCASTI64X4rmkz, 1 , 256 , rebuildSplatCst},
735
+ {X86::VPMOVSXDQZrmkz, 8 , 32 , rebuildSExtCst},
736
+ {X86::VPMOVZXDQZrmkz, 8 , 32 , rebuildZExtCst}},
737
+ 2 );
562
738
}
563
739
564
- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
565
- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
566
- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
740
+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc16, unsigned OpSrc32,
741
+ unsigned OpSrc64) {
742
+ if (OpSrc16) {
743
+ if (const X86FoldTableEntry *Mem2Bcst =
744
+ llvm::lookupBroadcastFoldTableBySize (OpSrc16, 16 )) {
745
+ unsigned OpBcst16 = Mem2Bcst->DstOp ;
746
+ unsigned OpNoBcst16 = Mem2Bcst->Flags & TB_INDEX_MASK;
747
+ FixupEntry Fixups[] = {{(int )OpBcst16, 1 , 16 , rebuildSplatCst}};
748
+ if (FixupConstant (Fixups, OpNoBcst16))
749
+ return true ;
750
+ }
751
+ }
567
752
if (OpSrc32) {
568
753
if (const X86FoldTableEntry *Mem2Bcst =
569
754
llvm::lookupBroadcastFoldTableBySize (OpSrc32, 32 )) {
570
- OpBcst32 = Mem2Bcst->DstOp ;
571
- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
755
+ unsigned OpBcst32 = Mem2Bcst->DstOp ;
756
+ unsigned OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
757
+ FixupEntry Fixups[] = {{(int )OpBcst32, 1 , 32 , rebuildSplatCst}};
758
+ if (FixupConstant (Fixups, OpNoBcst32))
759
+ return true ;
572
760
}
573
761
}
574
762
if (OpSrc64) {
575
763
if (const X86FoldTableEntry *Mem2Bcst =
576
764
llvm::lookupBroadcastFoldTableBySize (OpSrc64, 64 )) {
577
- OpBcst64 = Mem2Bcst->DstOp ;
578
- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
765
+ unsigned OpBcst64 = Mem2Bcst->DstOp ;
766
+ unsigned OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
767
+ FixupEntry Fixups[] = {{(int )OpBcst64, 1 , 64 , rebuildSplatCst}};
768
+ if (FixupConstant (Fixups, OpNoBcst64))
769
+ return true ;
579
770
}
580
771
}
581
- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
582
- " OperandNo mismatch" );
583
-
584
- if (OpBcst32 || OpBcst64) {
585
- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
586
- FixupEntry Fixups[] = {{(int )OpBcst32, 32 , 32 , rebuildSplatCst},
587
- {(int )OpBcst64, 64 , 64 , rebuildSplatCst}};
588
- return FixupConstant (Fixups, OpNo);
589
- }
590
772
return false ;
591
773
};
592
774
593
775
// Attempt to find a AVX512 mapping from a full width memory-fold instruction
594
776
// to a broadcast-fold instruction variant.
595
777
if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
596
- return ConvertToBroadcastAVX512 (Opc, Opc);
778
+ return ConvertToBroadcastAVX512 (Opc, Opc, Opc );
597
779
598
780
// Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
599
781
// conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -650,7 +832,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
650
832
break ;
651
833
}
652
834
if (OpSrc32 || OpSrc64)
653
- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
835
+ return ConvertToBroadcastAVX512 (0 , OpSrc32, OpSrc64);
654
836
}
655
837
656
838
return false ;
0 commit comments