@@ -424,6 +424,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
424
424
{X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
425
425
{X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst}},
426
426
128 , 1 );
427
+ case X86::VMOVAPDZ128rmk:
428
+ case X86::VMOVUPDZ128rmk:
429
+ return FixupConstant ({{X86::VMOVSDZrmk, 1 , 64 , rebuildZeroUpperCst},
430
+ {X86::VMOVDDUPZ128rmk, 1 , 64 , rebuildSplatCst}},
431
+ 128 , 3 );
432
+ case X86::VMOVAPDZ128rmkz:
433
+ case X86::VMOVUPDZ128rmkz:
434
+ return FixupConstant ({{X86::VMOVSDZrmkz, 1 , 64 , rebuildZeroUpperCst},
435
+ {X86::VMOVDDUPZ128rmkz, 1 , 64 , rebuildSplatCst}},
436
+ 128 , 2 );
437
+ case X86::VMOVAPSZ128rmk:
438
+ case X86::VMOVUPSZ128rmk:
439
+ return FixupConstant ({{X86::VMOVSSZrmk, 1 , 32 , rebuildZeroUpperCst},
440
+ {X86::VBROADCASTSSZ128rmk, 1 , 32 , rebuildSplatCst}},
441
+ 128 , 3 );
442
+ case X86::VMOVAPSZ128rmkz:
443
+ case X86::VMOVUPSZ128rmkz:
444
+ return FixupConstant ({{X86::VMOVSSZrmkz, 1 , 32 , rebuildZeroUpperCst},
445
+ {X86::VBROADCASTSSZ128rmkz, 1 , 32 , rebuildSplatCst}},
446
+ 128 , 2 );
427
447
case X86::VMOVAPDZ256rm:
428
448
case X86::VMOVAPSZ256rm:
429
449
case X86::VMOVUPDZ256rm:
@@ -433,6 +453,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
433
453
{X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
434
454
{X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst}},
435
455
256 , 1 );
456
+ case X86::VMOVAPDZ256rmk:
457
+ case X86::VMOVUPDZ256rmk:
458
+ return FixupConstant ({{X86::VBROADCASTSDZ256rmk, 1 , 64 , rebuildSplatCst}},
459
+ 256 , 3 );
460
+ case X86::VMOVAPDZ256rmkz:
461
+ case X86::VMOVUPDZ256rmkz:
462
+ return FixupConstant ({{X86::VBROADCASTSDZ256rmkz, 1 , 64 , rebuildSplatCst}},
463
+ 256 , 2 );
464
+ case X86::VMOVAPSZ256rmk:
465
+ case X86::VMOVUPSZ256rmk:
466
+ return FixupConstant (
467
+ {{X86::VBROADCASTSSZ256rmk, 1 , 32 , rebuildSplatCst},
468
+ {X86::VBROADCASTF32X4Z256rmk, 1 , 128 , rebuildSplatCst}},
469
+ 256 , 3 );
470
+ case X86::VMOVAPSZ256rmkz:
471
+ case X86::VMOVUPSZ256rmkz:
472
+ return FixupConstant (
473
+ {{X86::VBROADCASTSSZ256rmkz, 1 , 32 , rebuildSplatCst},
474
+ {X86::VBROADCASTF32X4Z256rmkz, 1 , 128 , rebuildSplatCst}},
475
+ 256 , 2 );
436
476
case X86::VMOVAPDZrm:
437
477
case X86::VMOVAPSZrm:
438
478
case X86::VMOVUPDZrm:
@@ -442,6 +482,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
442
482
{X86::VBROADCASTF32X4Zrm, 1 , 128 , rebuildSplatCst},
443
483
{X86::VBROADCASTF64X4Zrm, 1 , 256 , rebuildSplatCst}},
444
484
512 , 1 );
485
+ case X86::VMOVAPDZrmk:
486
+ case X86::VMOVUPDZrmk:
487
+ return FixupConstant ({{X86::VBROADCASTSDZrmk, 1 , 64 , rebuildSplatCst},
488
+ {X86::VBROADCASTF64X4Zrmk, 1 , 256 , rebuildSplatCst}},
489
+ 512 , 3 );
490
+ case X86::VMOVAPDZrmkz:
491
+ case X86::VMOVUPDZrmkz:
492
+ return FixupConstant ({{X86::VBROADCASTSDZrmkz, 1 , 64 , rebuildSplatCst},
493
+ {X86::VBROADCASTF64X4Zrmkz, 1 , 256 , rebuildSplatCst}},
494
+ 512 , 2 );
495
+ case X86::VMOVAPSZrmk:
496
+ case X86::VMOVUPSZrmk:
497
+ return FixupConstant ({{X86::VBROADCASTSSZrmk, 1 , 32 , rebuildSplatCst},
498
+ {X86::VBROADCASTF32X4Zrmk, 1 , 128 , rebuildSplatCst}},
499
+ 512 , 3 );
500
+ case X86::VMOVAPSZrmkz:
501
+ case X86::VMOVUPSZrmkz:
502
+ return FixupConstant ({{X86::VBROADCASTSSZrmkz, 1 , 32 , rebuildSplatCst},
503
+ {X86::VBROADCASTF32X4Zrmkz, 1 , 128 , rebuildSplatCst}},
504
+ 512 , 2 );
445
505
/* Integer Loads */
446
506
case X86::MOVDQArm:
447
507
case X86::MOVDQUrm: {
@@ -537,6 +597,42 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
537
597
{X86::VPMOVZXDQZ128rm, 2 , 32 , rebuildZExtCst}};
538
598
return FixupConstant (Fixups, 128 , 1 );
539
599
}
600
+ case X86::VMOVDQA32Z128rmk:
601
+ case X86::VMOVDQU32Z128rmk:
602
+ return FixupConstant ({{X86::VPBROADCASTDZ128rmk, 1 , 32 , rebuildSplatCst},
603
+ {X86::VPMOVSXBDZ128rmk, 4 , 8 , rebuildSExtCst},
604
+ {X86::VPMOVZXBDZ128rmk, 4 , 8 , rebuildZExtCst},
605
+ {X86::VPMOVSXWDZ128rmk, 4 , 16 , rebuildSExtCst},
606
+ {X86::VPMOVZXWDZ128rmk, 4 , 16 , rebuildZExtCst}},
607
+ 128 , 3 );
608
+ case X86::VMOVDQA32Z128rmkz:
609
+ case X86::VMOVDQU32Z128rmkz:
610
+ return FixupConstant ({{X86::VPBROADCASTDZ128rmkz, 1 , 32 , rebuildSplatCst},
611
+ {X86::VPMOVSXBDZ128rmkz, 4 , 8 , rebuildSExtCst},
612
+ {X86::VPMOVZXBDZ128rmkz, 4 , 8 , rebuildZExtCst},
613
+ {X86::VPMOVSXWDZ128rmkz, 4 , 16 , rebuildSExtCst},
614
+ {X86::VPMOVZXWDZ128rmkz, 4 , 16 , rebuildZExtCst}},
615
+ 128 , 2 );
616
+ case X86::VMOVDQA64Z128rmk:
617
+ case X86::VMOVDQU64Z128rmk:
618
+ return FixupConstant ({{X86::VPMOVSXBQZ128rmk, 2 , 8 , rebuildSExtCst},
619
+ {X86::VPMOVZXBQZ128rmk, 2 , 8 , rebuildZExtCst},
620
+ {X86::VPMOVSXWQZ128rmk, 2 , 16 , rebuildSExtCst},
621
+ {X86::VPMOVZXWQZ128rmk, 2 , 16 , rebuildZExtCst},
622
+ {X86::VPBROADCASTQZ128rmk, 1 , 64 , rebuildSplatCst},
623
+ {X86::VPMOVSXDQZ128rmk, 2 , 32 , rebuildSExtCst},
624
+ {X86::VPMOVZXDQZ128rmk, 2 , 32 , rebuildZExtCst}},
625
+ 128 , 3 );
626
+ case X86::VMOVDQA64Z128rmkz:
627
+ case X86::VMOVDQU64Z128rmkz:
628
+ return FixupConstant ({{X86::VPMOVSXBQZ128rmkz, 2 , 8 , rebuildSExtCst},
629
+ {X86::VPMOVZXBQZ128rmkz, 2 , 8 , rebuildZExtCst},
630
+ {X86::VPMOVSXWQZ128rmkz, 2 , 16 , rebuildSExtCst},
631
+ {X86::VPMOVZXWQZ128rmkz, 2 , 16 , rebuildZExtCst},
632
+ {X86::VPBROADCASTQZ128rmkz, 1 , 64 , rebuildSplatCst},
633
+ {X86::VPMOVSXDQZ128rmkz, 2 , 32 , rebuildSExtCst},
634
+ {X86::VPMOVZXDQZ128rmkz, 2 , 32 , rebuildZExtCst}},
635
+ 128 , 2 );
540
636
case X86::VMOVDQA32Z256rm:
541
637
case X86::VMOVDQA64Z256rm:
542
638
case X86::VMOVDQU32Z256rm:
@@ -561,6 +657,46 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
561
657
{X86::VPMOVZXDQZ256rm, 4 , 32 , rebuildZExtCst}};
562
658
return FixupConstant (Fixups, 256 , 1 );
563
659
}
660
+ case X86::VMOVDQA32Z256rmk:
661
+ case X86::VMOVDQU32Z256rmk:
662
+ return FixupConstant (
663
+ {{X86::VPBROADCASTDZ256rmk, 1 , 32 , rebuildSplatCst},
664
+ {X86::VPMOVSXBDZ256rmk, 8 , 8 , rebuildSExtCst},
665
+ {X86::VPMOVZXBDZ256rmk, 8 , 8 , rebuildZExtCst},
666
+ {X86::VBROADCASTI32X4Z256rmk, 1 , 128 , rebuildSplatCst},
667
+ {X86::VPMOVSXWDZ256rmk, 8 , 16 , rebuildSExtCst},
668
+ {X86::VPMOVZXWDZ256rmk, 8 , 16 , rebuildZExtCst}},
669
+ 256 , 3 );
670
+ case X86::VMOVDQA32Z256rmkz:
671
+ case X86::VMOVDQU32Z256rmkz:
672
+ return FixupConstant (
673
+ {{X86::VPBROADCASTDZ256rmkz, 1 , 32 , rebuildSplatCst},
674
+ {X86::VPMOVSXBDZ256rmkz, 8 , 8 , rebuildSExtCst},
675
+ {X86::VPMOVZXBDZ256rmkz, 8 , 8 , rebuildZExtCst},
676
+ {X86::VBROADCASTI32X4Z256rmkz, 1 , 128 , rebuildSplatCst},
677
+ {X86::VPMOVSXWDZ256rmkz, 8 , 16 , rebuildSExtCst},
678
+ {X86::VPMOVZXWDZ256rmkz, 8 , 16 , rebuildZExtCst}},
679
+ 256 , 2 );
680
+ case X86::VMOVDQA64Z256rmk:
681
+ case X86::VMOVDQU64Z256rmk:
682
+ return FixupConstant ({{X86::VPMOVSXBQZ256rmk, 4 , 8 , rebuildSExtCst},
683
+ {X86::VPMOVZXBQZ256rmk, 4 , 8 , rebuildZExtCst},
684
+ {X86::VPBROADCASTQZ256rmk, 1 , 64 , rebuildSplatCst},
685
+ {X86::VPMOVSXWQZ256rmk, 4 , 16 , rebuildSExtCst},
686
+ {X86::VPMOVZXWQZ256rmk, 4 , 16 , rebuildZExtCst},
687
+ {X86::VPMOVSXDQZ256rmk, 4 , 32 , rebuildSExtCst},
688
+ {X86::VPMOVZXDQZ256rmk, 4 , 32 , rebuildZExtCst}},
689
+ 256 , 3 );
690
+ case X86::VMOVDQA64Z256rmkz:
691
+ case X86::VMOVDQU64Z256rmkz:
692
+ return FixupConstant ({{X86::VPMOVSXBQZ256rmkz, 4 , 8 , rebuildSExtCst},
693
+ {X86::VPMOVZXBQZ256rmkz, 4 , 8 , rebuildZExtCst},
694
+ {X86::VPBROADCASTQZ256rmkz, 1 , 64 , rebuildSplatCst},
695
+ {X86::VPMOVSXWQZ256rmkz, 4 , 16 , rebuildSExtCst},
696
+ {X86::VPMOVZXWQZ256rmkz, 4 , 16 , rebuildZExtCst},
697
+ {X86::VPMOVSXDQZ256rmkz, 4 , 32 , rebuildSExtCst},
698
+ {X86::VPMOVZXDQZ256rmkz, 4 , 32 , rebuildZExtCst}},
699
+ 256 , 2 );
564
700
case X86::VMOVDQA32Zrm:
565
701
case X86::VMOVDQA64Zrm:
566
702
case X86::VMOVDQU32Zrm:
@@ -586,43 +722,67 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
586
722
{X86::VPMOVZXDQZrm, 8 , 32 , rebuildZExtCst}};
587
723
return FixupConstant (Fixups, 512 , 1 );
588
724
}
725
+ case X86::VMOVDQA32Zrmk:
726
+ case X86::VMOVDQU32Zrmk:
727
+ return FixupConstant ({{X86::VPBROADCASTDZrmk, 1 , 32 , rebuildSplatCst},
728
+ {X86::VBROADCASTI32X4Zrmk, 1 , 128 , rebuildSplatCst},
729
+ {X86::VPMOVSXBDZrmk, 16 , 8 , rebuildSExtCst},
730
+ {X86::VPMOVZXBDZrmk, 16 , 8 , rebuildZExtCst},
731
+ {X86::VPMOVSXWDZrmk, 16 , 16 , rebuildSExtCst},
732
+ {X86::VPMOVZXWDZrmk, 16 , 16 , rebuildZExtCst}},
733
+ 512 , 3 );
734
+ case X86::VMOVDQA32Zrmkz:
735
+ case X86::VMOVDQU32Zrmkz:
736
+ return FixupConstant ({{X86::VPBROADCASTDZrmkz, 1 , 32 , rebuildSplatCst},
737
+ {X86::VBROADCASTI32X4Zrmkz, 1 , 128 , rebuildSplatCst},
738
+ {X86::VPMOVSXBDZrmkz, 16 , 8 , rebuildSExtCst},
739
+ {X86::VPMOVZXBDZrmkz, 16 , 8 , rebuildZExtCst},
740
+ {X86::VPMOVSXWDZrmkz, 16 , 16 , rebuildSExtCst},
741
+ {X86::VPMOVZXWDZrmkz, 16 , 16 , rebuildZExtCst}},
742
+ 512 , 2 );
743
+ case X86::VMOVDQA64Zrmk:
744
+ case X86::VMOVDQU64Zrmk:
745
+ return FixupConstant ({{X86::VPBROADCASTQZrmk, 1 , 64 , rebuildSplatCst},
746
+ {X86::VPMOVSXBQZrmk, 8 , 8 , rebuildSExtCst},
747
+ {X86::VPMOVZXBQZrmk, 8 , 8 , rebuildZExtCst},
748
+ {X86::VPMOVSXWQZrmk, 8 , 16 , rebuildSExtCst},
749
+ {X86::VPMOVZXWQZrmk, 8 , 16 , rebuildZExtCst},
750
+ {X86::VBROADCASTI64X4Zrmk, 1 , 256 , rebuildSplatCst},
751
+ {X86::VPMOVSXDQZrmk, 8 , 32 , rebuildSExtCst},
752
+ {X86::VPMOVZXDQZrmk, 8 , 32 , rebuildZExtCst}},
753
+ 512 , 3 );
754
+ case X86::VMOVDQA64Zrmkz:
755
+ case X86::VMOVDQU64Zrmkz:
756
+ return FixupConstant ({{X86::VPBROADCASTQZrmkz, 1 , 64 , rebuildSplatCst},
757
+ {X86::VPMOVSXBQZrmkz, 8 , 8 , rebuildSExtCst},
758
+ {X86::VPMOVZXBQZrmkz, 8 , 8 , rebuildZExtCst},
759
+ {X86::VPMOVSXWQZrmkz, 8 , 16 , rebuildSExtCst},
760
+ {X86::VPMOVZXWQZrmkz, 8 , 16 , rebuildZExtCst},
761
+ {X86::VBROADCASTI64X4Zrmkz, 1 , 256 , rebuildSplatCst},
762
+ {X86::VPMOVSXDQZrmkz, 8 , 32 , rebuildSExtCst},
763
+ {X86::VPMOVZXDQZrmkz, 8 , 32 , rebuildZExtCst}},
764
+ 512 , 2 );
589
765
}
590
766
591
- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
592
- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
593
- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
594
- if (OpSrc32) {
595
- if (const X86FoldTableEntry *Mem2Bcst =
596
- llvm::lookupBroadcastFoldTableBySize (OpSrc32, 32 )) {
597
- OpBcst32 = Mem2Bcst->DstOp ;
598
- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
599
- }
600
- }
601
- if (OpSrc64) {
602
- if (const X86FoldTableEntry *Mem2Bcst =
603
- llvm::lookupBroadcastFoldTableBySize (OpSrc64, 64 )) {
604
- OpBcst64 = Mem2Bcst->DstOp ;
605
- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
606
- }
607
- }
608
- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
609
- " OperandNo mismatch" );
610
-
611
- if (OpBcst32 || OpBcst64) {
612
- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
613
- FixupEntry Fixups[] = {{(int )OpBcst32, 32 , 32 , rebuildSplatCst},
614
- {(int )OpBcst64, 64 , 64 , rebuildSplatCst}};
767
+ auto ConvertToBroadcast = [&](unsigned OpSrc, int BW) {
768
+ if (const X86FoldTableEntry *Mem2Bcst =
769
+ llvm::lookupBroadcastFoldTableBySize (OpSrc, BW)) {
770
+ unsigned OpBcst = Mem2Bcst->DstOp ;
771
+ unsigned OpNoBcst = Mem2Bcst->Flags & TB_INDEX_MASK;
772
+ FixupEntry Fixups[] = {{(int )OpBcst, 1 , BW, rebuildSplatCst}};
615
773
// TODO: Add support for RegBitWidth, but currently rebuildSplatCst
616
774
// doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
617
- return FixupConstant (Fixups, 0 , OpNo);
775
+ if (FixupConstant (Fixups, 0 , OpNoBcst))
776
+ return true ;
618
777
}
619
778
return false ;
620
779
};
621
780
622
781
// Attempt to find a AVX512 mapping from a full width memory-fold instruction
623
782
// to a broadcast-fold instruction variant.
624
783
if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
625
- return ConvertToBroadcastAVX512 (Opc, Opc);
784
+ return ConvertToBroadcast (Opc, 16 ) || ConvertToBroadcast (Opc, 32 ) ||
785
+ ConvertToBroadcast (Opc, 64 );
626
786
627
787
// Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
628
788
// conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -679,7 +839,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
679
839
break ;
680
840
}
681
841
if (OpSrc32 || OpSrc64)
682
- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
842
+ return ConvertToBroadcast (OpSrc32, 32 ) || ConvertToBroadcast ( OpSrc64, 64 );
683
843
}
684
844
685
845
return false ;
0 commit comments