@@ -407,6 +407,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
407
407
{X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
408
408
{X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst}},
409
409
128 , 1 );
410
+ case X86::VMOVAPDZ128rmk:
411
+ case X86::VMOVUPDZ128rmk:
412
+ return FixupConstant ({{X86::VMOVSDZrmk, 1 , 64 , rebuildZeroUpperCst},
413
+ {X86::VMOVDDUPZ128rmk, 1 , 64 , rebuildSplatCst}},
414
+ 128 , 3 );
415
+ case X86::VMOVAPDZ128rmkz:
416
+ case X86::VMOVUPDZ128rmkz:
417
+ return FixupConstant ({{X86::VMOVSDZrmkz, 1 , 64 , rebuildZeroUpperCst},
418
+ {X86::VMOVDDUPZ128rmkz, 1 , 64 , rebuildSplatCst}},
419
+ 128 , 2 );
420
+ case X86::VMOVAPSZ128rmk:
421
+ case X86::VMOVUPSZ128rmk:
422
+ return FixupConstant ({{X86::VMOVSSZrmk, 1 , 32 , rebuildZeroUpperCst},
423
+ {X86::VBROADCASTSSZ128rmk, 1 , 32 , rebuildSplatCst}},
424
+ 128 , 3 );
425
+ case X86::VMOVAPSZ128rmkz:
426
+ case X86::VMOVUPSZ128rmkz:
427
+ return FixupConstant ({{X86::VMOVSSZrmkz, 1 , 32 , rebuildZeroUpperCst},
428
+ {X86::VBROADCASTSSZ128rmkz, 1 , 32 , rebuildSplatCst}},
429
+ 128 , 2 );
410
430
case X86::VMOVAPDZ256rm:
411
431
case X86::VMOVAPSZ256rm:
412
432
case X86::VMOVUPDZ256rm:
@@ -416,6 +436,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
416
436
{X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
417
437
{X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst}},
418
438
256 , 1 );
439
+ case X86::VMOVAPDZ256rmk:
440
+ case X86::VMOVUPDZ256rmk:
441
+ return FixupConstant ({{X86::VBROADCASTSDZ256rmk, 1 , 64 , rebuildSplatCst}},
442
+ 256 , 3 );
443
+ case X86::VMOVAPDZ256rmkz:
444
+ case X86::VMOVUPDZ256rmkz:
445
+ return FixupConstant ({{X86::VBROADCASTSDZ256rmkz, 1 , 64 , rebuildSplatCst}},
446
+ 256 , 2 );
447
+ case X86::VMOVAPSZ256rmk:
448
+ case X86::VMOVUPSZ256rmk:
449
+ return FixupConstant (
450
+ {{X86::VBROADCASTSSZ256rmk, 1 , 32 , rebuildSplatCst},
451
+ {X86::VBROADCASTF32X4Z256rmk, 1 , 128 , rebuildSplatCst}},
452
+ 256 , 3 );
453
+ case X86::VMOVAPSZ256rmkz:
454
+ case X86::VMOVUPSZ256rmkz:
455
+ return FixupConstant (
456
+ {{X86::VBROADCASTSSZ256rmkz, 1 , 32 , rebuildSplatCst},
457
+ {X86::VBROADCASTF32X4Z256rmkz, 1 , 128 , rebuildSplatCst}},
458
+ 256 , 2 );
419
459
case X86::VMOVAPDZrm:
420
460
case X86::VMOVAPSZrm:
421
461
case X86::VMOVUPDZrm:
@@ -425,6 +465,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
425
465
{X86::VBROADCASTF32X4rm, 1 , 128 , rebuildSplatCst},
426
466
{X86::VBROADCASTF64X4rm, 1 , 256 , rebuildSplatCst}},
427
467
512 , 1 );
468
+ case X86::VMOVAPDZrmk:
469
+ case X86::VMOVUPDZrmk:
470
+ return FixupConstant ({{X86::VBROADCASTSDZrmk, 1 , 64 , rebuildSplatCst},
471
+ {X86::VBROADCASTF64X4rmk, 1 , 256 , rebuildSplatCst}},
472
+ 512 , 3 );
473
+ case X86::VMOVAPDZrmkz:
474
+ case X86::VMOVUPDZrmkz:
475
+ return FixupConstant ({{X86::VBROADCASTSDZrmkz, 1 , 64 , rebuildSplatCst},
476
+ {X86::VBROADCASTF64X4rmkz, 1 , 256 , rebuildSplatCst}},
477
+ 512 , 2 );
478
+ case X86::VMOVAPSZrmk:
479
+ case X86::VMOVUPSZrmk:
480
+ return FixupConstant ({{X86::VBROADCASTSSZrmk, 1 , 32 , rebuildSplatCst},
481
+ {X86::VBROADCASTF32X4rmk, 1 , 128 , rebuildSplatCst}},
482
+ 512 , 3 );
483
+ case X86::VMOVAPSZrmkz:
484
+ case X86::VMOVUPSZrmkz:
485
+ return FixupConstant ({{X86::VBROADCASTSSZrmkz, 1 , 32 , rebuildSplatCst},
486
+ {X86::VBROADCASTF32X4rmkz, 1 , 128 , rebuildSplatCst}},
487
+ 512 , 2 );
428
488
/* Integer Loads */
429
489
case X86::MOVDQArm:
430
490
case X86::MOVDQUrm: {
@@ -520,6 +580,42 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
520
580
{X86::VPMOVZXDQZ128rm, 2 , 32 , rebuildZExtCst}};
521
581
return FixupConstant (Fixups, 128 , 1 );
522
582
}
583
+ case X86::VMOVDQA32Z128rmk:
584
+ case X86::VMOVDQU32Z128rmk:
585
+ return FixupConstant ({{X86::VPBROADCASTDZ128rmk, 1 , 32 , rebuildSplatCst},
586
+ {X86::VPMOVSXBDZ128rmk, 4 , 8 , rebuildSExtCst},
587
+ {X86::VPMOVZXBDZ128rmk, 4 , 8 , rebuildZExtCst},
588
+ {X86::VPMOVSXWDZ128rmk, 4 , 16 , rebuildSExtCst},
589
+ {X86::VPMOVZXWDZ128rmk, 4 , 16 , rebuildZExtCst}},
590
+ 128 , 3 );
591
+ case X86::VMOVDQA32Z128rmkz:
592
+ case X86::VMOVDQU32Z128rmkz:
593
+ return FixupConstant ({{X86::VPBROADCASTDZ128rmkz, 1 , 32 , rebuildSplatCst},
594
+ {X86::VPMOVSXBDZ128rmkz, 4 , 8 , rebuildSExtCst},
595
+ {X86::VPMOVZXBDZ128rmkz, 4 , 8 , rebuildZExtCst},
596
+ {X86::VPMOVSXWDZ128rmkz, 4 , 16 , rebuildSExtCst},
597
+ {X86::VPMOVZXWDZ128rmkz, 4 , 16 , rebuildZExtCst}},
598
+ 128 , 2 );
599
+ case X86::VMOVDQA64Z128rmk:
600
+ case X86::VMOVDQU64Z128rmk:
601
+ return FixupConstant ({{X86::VPMOVSXBQZ128rmk, 2 , 8 , rebuildSExtCst},
602
+ {X86::VPMOVZXBQZ128rmk, 2 , 8 , rebuildZExtCst},
603
+ {X86::VPMOVSXWQZ128rmk, 2 , 16 , rebuildSExtCst},
604
+ {X86::VPMOVZXWQZ128rmk, 2 , 16 , rebuildZExtCst},
605
+ {X86::VPBROADCASTQZ128rmk, 1 , 64 , rebuildSplatCst},
606
+ {X86::VPMOVSXDQZ128rmk, 2 , 32 , rebuildSExtCst},
607
+ {X86::VPMOVZXDQZ128rmk, 2 , 32 , rebuildZExtCst}},
608
+ 128 , 3 );
609
+ case X86::VMOVDQA64Z128rmkz:
610
+ case X86::VMOVDQU64Z128rmkz:
611
+ return FixupConstant ({{X86::VPMOVSXBQZ128rmkz, 2 , 8 , rebuildSExtCst},
612
+ {X86::VPMOVZXBQZ128rmkz, 2 , 8 , rebuildZExtCst},
613
+ {X86::VPMOVSXWQZ128rmkz, 2 , 16 , rebuildSExtCst},
614
+ {X86::VPMOVZXWQZ128rmkz, 2 , 16 , rebuildZExtCst},
615
+ {X86::VPBROADCASTQZ128rmkz, 1 , 64 , rebuildSplatCst},
616
+ {X86::VPMOVSXDQZ128rmkz, 2 , 32 , rebuildSExtCst},
617
+ {X86::VPMOVZXDQZ128rmkz, 2 , 32 , rebuildZExtCst}},
618
+ 128 , 2 );
523
619
case X86::VMOVDQA32Z256rm:
524
620
case X86::VMOVDQA64Z256rm:
525
621
case X86::VMOVDQU32Z256rm:
@@ -544,6 +640,46 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
544
640
{X86::VPMOVZXDQZ256rm, 4 , 32 , rebuildZExtCst}};
545
641
return FixupConstant (Fixups, 256 , 1 );
546
642
}
643
+ case X86::VMOVDQA32Z256rmk:
644
+ case X86::VMOVDQU32Z256rmk:
645
+ return FixupConstant (
646
+ {{X86::VPBROADCASTDZ256rmk, 1 , 32 , rebuildSplatCst},
647
+ {X86::VPMOVSXBDZ256rmk, 8 , 8 , rebuildSExtCst},
648
+ {X86::VPMOVZXBDZ256rmk, 8 , 8 , rebuildZExtCst},
649
+ {X86::VBROADCASTI32X4Z256rmk, 1 , 128 , rebuildSplatCst},
650
+ {X86::VPMOVSXWDZ256rmk, 8 , 16 , rebuildSExtCst},
651
+ {X86::VPMOVZXWDZ256rmk, 8 , 16 , rebuildZExtCst}},
652
+ 256 , 3 );
653
+ case X86::VMOVDQA32Z256rmkz:
654
+ case X86::VMOVDQU32Z256rmkz:
655
+ return FixupConstant (
656
+ {{X86::VPBROADCASTDZ256rmkz, 1 , 32 , rebuildSplatCst},
657
+ {X86::VPMOVSXBDZ256rmkz, 8 , 8 , rebuildSExtCst},
658
+ {X86::VPMOVZXBDZ256rmkz, 8 , 8 , rebuildZExtCst},
659
+ {X86::VBROADCASTI32X4Z256rmkz, 1 , 128 , rebuildSplatCst},
660
+ {X86::VPMOVSXWDZ256rmkz, 8 , 16 , rebuildSExtCst},
661
+ {X86::VPMOVZXWDZ256rmkz, 8 , 16 , rebuildZExtCst}},
662
+ 256 , 2 );
663
+ case X86::VMOVDQA64Z256rmk:
664
+ case X86::VMOVDQU64Z256rmk:
665
+ return FixupConstant ({{X86::VPMOVSXBQZ256rmk, 4 , 8 , rebuildSExtCst},
666
+ {X86::VPMOVZXBQZ256rmk, 4 , 8 , rebuildZExtCst},
667
+ {X86::VPBROADCASTQZ256rmk, 1 , 64 , rebuildSplatCst},
668
+ {X86::VPMOVSXWQZ256rmk, 4 , 16 , rebuildSExtCst},
669
+ {X86::VPMOVZXWQZ256rmk, 4 , 16 , rebuildZExtCst},
670
+ {X86::VPMOVSXDQZ256rmk, 4 , 32 , rebuildSExtCst},
671
+ {X86::VPMOVZXDQZ256rmk, 4 , 32 , rebuildZExtCst}},
672
+ 256 , 3 );
673
+ case X86::VMOVDQA64Z256rmkz:
674
+ case X86::VMOVDQU64Z256rmkz:
675
+ return FixupConstant ({{X86::VPMOVSXBQZ256rmkz, 4 , 8 , rebuildSExtCst},
676
+ {X86::VPMOVZXBQZ256rmkz, 4 , 8 , rebuildZExtCst},
677
+ {X86::VPBROADCASTQZ256rmkz, 1 , 64 , rebuildSplatCst},
678
+ {X86::VPMOVSXWQZ256rmkz, 4 , 16 , rebuildSExtCst},
679
+ {X86::VPMOVZXWQZ256rmkz, 4 , 16 , rebuildZExtCst},
680
+ {X86::VPMOVSXDQZ256rmkz, 4 , 32 , rebuildSExtCst},
681
+ {X86::VPMOVZXDQZ256rmkz, 4 , 32 , rebuildZExtCst}},
682
+ 256 , 2 );
547
683
case X86::VMOVDQA32Zrm:
548
684
case X86::VMOVDQA64Zrm:
549
685
case X86::VMOVDQU32Zrm:
@@ -569,43 +705,93 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
569
705
{X86::VPMOVZXDQZrm, 8 , 32 , rebuildZExtCst}};
570
706
return FixupConstant (Fixups, 512 , 1 );
571
707
}
708
+ case X86::VMOVDQA32Zrmk:
709
+ case X86::VMOVDQU32Zrmk:
710
+ return FixupConstant ({{X86::VPBROADCASTDZrmk, 1 , 32 , rebuildSplatCst},
711
+ {X86::VBROADCASTI32X4rmk, 1 , 128 , rebuildSplatCst},
712
+ {X86::VPMOVSXBDZrmk, 16 , 8 , rebuildSExtCst},
713
+ {X86::VPMOVZXBDZrmk, 16 , 8 , rebuildZExtCst},
714
+ {X86::VPMOVSXWDZrmk, 16 , 16 , rebuildSExtCst},
715
+ {X86::VPMOVZXWDZrmk, 16 , 16 , rebuildZExtCst}},
716
+ 512 , 3 );
717
+ case X86::VMOVDQA32Zrmkz:
718
+ case X86::VMOVDQU32Zrmkz:
719
+ return FixupConstant ({{X86::VPBROADCASTDZrmkz, 1 , 32 , rebuildSplatCst},
720
+ {X86::VBROADCASTI32X4rmkz, 1 , 128 , rebuildSplatCst},
721
+ {X86::VPMOVSXBDZrmkz, 16 , 8 , rebuildSExtCst},
722
+ {X86::VPMOVZXBDZrmkz, 16 , 8 , rebuildZExtCst},
723
+ {X86::VPMOVSXWDZrmkz, 16 , 16 , rebuildSExtCst},
724
+ {X86::VPMOVZXWDZrmkz, 16 , 16 , rebuildZExtCst}},
725
+ 512 , 2 );
726
+ case X86::VMOVDQA64Zrmk:
727
+ case X86::VMOVDQU64Zrmk:
728
+ return FixupConstant ({{X86::VPBROADCASTQZrmk, 1 , 64 , rebuildSplatCst},
729
+ {X86::VPMOVSXBQZrmk, 8 , 8 , rebuildSExtCst},
730
+ {X86::VPMOVZXBQZrmk, 8 , 8 , rebuildZExtCst},
731
+ {X86::VPMOVSXWQZrmk, 8 , 16 , rebuildSExtCst},
732
+ {X86::VPMOVZXWQZrmk, 8 , 16 , rebuildZExtCst},
733
+ {X86::VBROADCASTI64X4rmk, 1 , 256 , rebuildSplatCst},
734
+ {X86::VPMOVSXDQZrmk, 8 , 32 , rebuildSExtCst},
735
+ {X86::VPMOVZXDQZrmk, 8 , 32 , rebuildZExtCst}},
736
+ 512 , 3 );
737
+ case X86::VMOVDQA64Zrmkz:
738
+ case X86::VMOVDQU64Zrmkz:
739
+ return FixupConstant ({{X86::VPBROADCASTQZrmkz, 1 , 64 , rebuildSplatCst},
740
+ {X86::VPMOVSXBQZrmkz, 8 , 8 , rebuildSExtCst},
741
+ {X86::VPMOVZXBQZrmkz, 8 , 8 , rebuildZExtCst},
742
+ {X86::VPMOVSXWQZrmkz, 8 , 16 , rebuildSExtCst},
743
+ {X86::VPMOVZXWQZrmkz, 8 , 16 , rebuildZExtCst},
744
+ {X86::VBROADCASTI64X4rmkz, 1 , 256 , rebuildSplatCst},
745
+ {X86::VPMOVSXDQZrmkz, 8 , 32 , rebuildSExtCst},
746
+ {X86::VPMOVZXDQZrmkz, 8 , 32 , rebuildZExtCst}},
747
+ 512 , 2 );
572
748
}
573
749
574
- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
575
- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
576
- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
750
+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc16, unsigned OpSrc32,
751
+ unsigned OpSrc64) {
752
+ if (OpSrc16) {
753
+ if (const X86FoldTableEntry *Mem2Bcst =
754
+ llvm::lookupBroadcastFoldTableBySize (OpSrc16, 16 )) {
755
+ unsigned OpBcst16 = Mem2Bcst->DstOp ;
756
+ unsigned OpNoBcst16 = Mem2Bcst->Flags & TB_INDEX_MASK;
757
+ FixupEntry Fixups[] = {{(int )OpBcst16, 1 , 16 , rebuildSplatCst}};
758
+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
759
+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
760
+ if (FixupConstant (Fixups, 0 , OpNoBcst16))
761
+ return true ;
762
+ }
763
+ }
577
764
if (OpSrc32) {
578
765
if (const X86FoldTableEntry *Mem2Bcst =
579
766
llvm::lookupBroadcastFoldTableBySize (OpSrc32, 32 )) {
580
- OpBcst32 = Mem2Bcst->DstOp ;
581
- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
767
+ unsigned OpBcst32 = Mem2Bcst->DstOp ;
768
+ unsigned OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
769
+ FixupEntry Fixups[] = {{(int )OpBcst32, 1 , 32 , rebuildSplatCst}};
770
+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
771
+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
772
+ if (FixupConstant (Fixups, 0 , OpNoBcst32))
773
+ return true ;
582
774
}
583
775
}
584
776
if (OpSrc64) {
585
777
if (const X86FoldTableEntry *Mem2Bcst =
586
778
llvm::lookupBroadcastFoldTableBySize (OpSrc64, 64 )) {
587
- OpBcst64 = Mem2Bcst->DstOp ;
588
- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
779
+ unsigned OpBcst64 = Mem2Bcst->DstOp ;
780
+ unsigned OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
781
+ FixupEntry Fixups[] = {{(int )OpBcst64, 1 , 64 , rebuildSplatCst}};
782
+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
783
+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
784
+ if (FixupConstant (Fixups, 0 , OpNoBcst64))
785
+ return true ;
589
786
}
590
787
}
591
- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
592
- " OperandNo mismatch" );
593
-
594
- if (OpBcst32 || OpBcst64) {
595
- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
596
- FixupEntry Fixups[] = {{(int )OpBcst32, 32 , 32 , rebuildSplatCst},
597
- {(int )OpBcst64, 64 , 64 , rebuildSplatCst}};
598
- // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
599
- // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
600
- return FixupConstant (Fixups, 0 , OpNo);
601
- }
602
788
return false ;
603
789
};
604
790
605
791
// Attempt to find a AVX512 mapping from a full width memory-fold instruction
606
792
// to a broadcast-fold instruction variant.
607
793
if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
608
- return ConvertToBroadcastAVX512 (Opc, Opc);
794
+ return ConvertToBroadcastAVX512 (Opc, Opc, Opc );
609
795
610
796
// Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
611
797
// conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -662,7 +848,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
662
848
break ;
663
849
}
664
850
if (OpSrc32 || OpSrc64)
665
- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
851
+ return ConvertToBroadcastAVX512 (0 , OpSrc32, OpSrc64);
666
852
}
667
853
668
854
return false ;
0 commit comments