@@ -338,6 +338,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
338
338
bool HasDQI = ST->hasDQI ();
339
339
bool HasBWI = ST->hasBWI ();
340
340
bool HasVLX = ST->hasVLX ();
341
+ bool MultiDomain = ST->hasAVX512 () || ST->hasNoDomainDelayMov ();
341
342
342
343
struct FixupEntry {
343
344
int Op;
@@ -401,47 +402,107 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
401
402
case X86::VMOVAPDrm:
402
403
case X86::VMOVAPSrm:
403
404
case X86::VMOVUPDrm:
404
- case X86::VMOVUPSrm:
405
- return FixupConstant ({{X86::VMOVSSrm, 1 , 32 , rebuildZeroUpperCst},
406
- {X86::VBROADCASTSSrm, 1 , 32 , rebuildSplatCst},
407
- {X86::VMOVSDrm, 1 , 64 , rebuildZeroUpperCst},
408
- {X86::VMOVDDUPrm, 1 , 64 , rebuildSplatCst}},
409
- 128 , 1 );
405
+ case X86::VMOVUPSrm: {
406
+ FixupEntry Fixups[] = {
407
+ {MultiDomain ? X86::VPMOVSXBQrm : 0 , 2 , 8 , rebuildSExtCst},
408
+ {MultiDomain ? X86::VPMOVZXBQrm : 0 , 2 , 8 , rebuildZExtCst},
409
+ {X86::VMOVSSrm, 1 , 32 , rebuildZeroUpperCst},
410
+ {X86::VBROADCASTSSrm, 1 , 32 , rebuildSplatCst},
411
+ {MultiDomain ? X86::VPMOVSXBDrm : 0 , 4 , 8 , rebuildSExtCst},
412
+ {MultiDomain ? X86::VPMOVZXBDrm : 0 , 4 , 8 , rebuildZExtCst},
413
+ {MultiDomain ? X86::VPMOVSXWQrm : 0 , 2 , 16 , rebuildSExtCst},
414
+ {MultiDomain ? X86::VPMOVZXWQrm : 0 , 2 , 16 , rebuildZExtCst},
415
+ {X86::VMOVSDrm, 1 , 64 , rebuildZeroUpperCst},
416
+ {X86::VMOVDDUPrm, 1 , 64 , rebuildSplatCst},
417
+ {MultiDomain ? X86::VPMOVSXWDrm : 0 , 4 , 16 , rebuildSExtCst},
418
+ {MultiDomain ? X86::VPMOVZXWDrm : 0 , 4 , 16 , rebuildZExtCst},
419
+ {MultiDomain ? X86::VPMOVSXDQrm : 0 , 2 , 32 , rebuildSExtCst},
420
+ {MultiDomain ? X86::VPMOVZXDQrm : 0 , 2 , 32 , rebuildZExtCst}};
421
+ return FixupConstant (Fixups, 128 , 1 );
422
+ }
410
423
case X86::VMOVAPDYrm:
411
424
case X86::VMOVAPSYrm:
412
425
case X86::VMOVUPDYrm:
413
- case X86::VMOVUPSYrm:
414
- return FixupConstant ({{X86::VBROADCASTSSYrm, 1 , 32 , rebuildSplatCst},
415
- {X86::VBROADCASTSDYrm, 1 , 64 , rebuildSplatCst},
416
- {X86::VBROADCASTF128rm, 1 , 128 , rebuildSplatCst}},
417
- 256 , 1 );
426
+ case X86::VMOVUPSYrm: {
427
+ FixupEntry Fixups[] = {
428
+ {X86::VBROADCASTSSYrm, 1 , 32 , rebuildSplatCst},
429
+ {HasAVX2 && MultiDomain ? X86::VPMOVSXBQYrm : 0 , 4 , 8 , rebuildSExtCst},
430
+ {HasAVX2 && MultiDomain ? X86::VPMOVZXBQYrm : 0 , 4 , 8 , rebuildZExtCst},
431
+ {X86::VBROADCASTSDYrm, 1 , 64 , rebuildSplatCst},
432
+ {HasAVX2 && MultiDomain ? X86::VPMOVSXBDYrm : 0 , 8 , 8 , rebuildSExtCst},
433
+ {HasAVX2 && MultiDomain ? X86::VPMOVZXBDYrm : 0 , 8 , 8 , rebuildZExtCst},
434
+ {HasAVX2 && MultiDomain ? X86::VPMOVSXWQYrm : 0 , 4 , 16 , rebuildSExtCst},
435
+ {HasAVX2 && MultiDomain ? X86::VPMOVZXWQYrm : 0 , 4 , 16 , rebuildZExtCst},
436
+ {X86::VBROADCASTF128rm, 1 , 128 , rebuildSplatCst},
437
+ {HasAVX2 && MultiDomain ? X86::VPMOVSXWDYrm : 0 , 8 , 16 , rebuildSExtCst},
438
+ {HasAVX2 && MultiDomain ? X86::VPMOVZXWDYrm : 0 , 8 , 16 , rebuildZExtCst},
439
+ {HasAVX2 && MultiDomain ? X86::VPMOVSXDQYrm : 0 , 4 , 32 , rebuildSExtCst},
440
+ {HasAVX2 && MultiDomain ? X86::VPMOVZXDQYrm : 0 , 4 , 32 ,
441
+ rebuildZExtCst}};
442
+ return FixupConstant (Fixups, 256 , 1 );
443
+ }
418
444
case X86::VMOVAPDZ128rm:
419
445
case X86::VMOVAPSZ128rm:
420
446
case X86::VMOVUPDZ128rm:
421
- case X86::VMOVUPSZ128rm:
422
- return FixupConstant ({{X86::VMOVSSZrm, 1 , 32 , rebuildZeroUpperCst},
423
- {X86::VBROADCASTSSZ128rm, 1 , 32 , rebuildSplatCst},
424
- {X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
425
- {X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst}},
426
- 128 , 1 );
447
+ case X86::VMOVUPSZ128rm: {
448
+ FixupEntry Fixups[] = {
449
+ {MultiDomain ? X86::VPMOVSXBQZ128rm : 0 , 2 , 8 , rebuildSExtCst},
450
+ {MultiDomain ? X86::VPMOVZXBQZ128rm : 0 , 2 , 8 , rebuildZExtCst},
451
+ {X86::VMOVSSZrm, 1 , 32 , rebuildZeroUpperCst},
452
+ {X86::VBROADCASTSSZ128rm, 1 , 32 , rebuildSplatCst},
453
+ {MultiDomain ? X86::VPMOVSXBDZ128rm : 0 , 4 , 8 , rebuildSExtCst},
454
+ {MultiDomain ? X86::VPMOVZXBDZ128rm : 0 , 4 , 8 , rebuildZExtCst},
455
+ {MultiDomain ? X86::VPMOVSXWQZ128rm : 0 , 2 , 16 , rebuildSExtCst},
456
+ {MultiDomain ? X86::VPMOVZXWQZ128rm : 0 , 2 , 16 , rebuildZExtCst},
457
+ {X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
458
+ {X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst},
459
+ {MultiDomain ? X86::VPMOVSXWDZ128rm : 0 , 4 , 16 , rebuildSExtCst},
460
+ {MultiDomain ? X86::VPMOVZXWDZ128rm : 0 , 4 , 16 , rebuildZExtCst},
461
+ {MultiDomain ? X86::VPMOVSXDQZ128rm : 0 , 2 , 32 , rebuildSExtCst},
462
+ {MultiDomain ? X86::VPMOVZXDQZ128rm : 0 , 2 , 32 , rebuildZExtCst}};
463
+ return FixupConstant (Fixups, 128 , 1 );
464
+ }
427
465
case X86::VMOVAPDZ256rm:
428
466
case X86::VMOVAPSZ256rm:
429
467
case X86::VMOVUPDZ256rm:
430
- case X86::VMOVUPSZ256rm:
431
- return FixupConstant (
432
- {{X86::VBROADCASTSSZ256rm, 1 , 32 , rebuildSplatCst},
433
- {X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
434
- {X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst}},
435
- 256 , 1 );
468
+ case X86::VMOVUPSZ256rm: {
469
+ FixupEntry Fixups[] = {
470
+ {X86::VBROADCASTSSZ256rm, 1 , 32 , rebuildSplatCst},
471
+ {MultiDomain ? X86::VPMOVSXBQZ256rm : 0 , 4 , 8 , rebuildSExtCst},
472
+ {MultiDomain ? X86::VPMOVZXBQZ256rm : 0 , 4 , 8 , rebuildZExtCst},
473
+ {X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
474
+ {MultiDomain ? X86::VPMOVSXBDZ256rm : 0 , 8 , 8 , rebuildSExtCst},
475
+ {MultiDomain ? X86::VPMOVZXBDZ256rm : 0 , 8 , 8 , rebuildZExtCst},
476
+ {MultiDomain ? X86::VPMOVSXWQZ256rm : 0 , 4 , 16 , rebuildSExtCst},
477
+ {MultiDomain ? X86::VPMOVZXWQZ256rm : 0 , 4 , 16 , rebuildZExtCst},
478
+ {X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst},
479
+ {MultiDomain ? X86::VPMOVSXWDZ256rm : 0 , 8 , 16 , rebuildSExtCst},
480
+ {MultiDomain ? X86::VPMOVZXWDZ256rm : 0 , 8 , 16 , rebuildZExtCst},
481
+ {MultiDomain ? X86::VPMOVSXDQZ256rm : 0 , 4 , 32 , rebuildSExtCst},
482
+ {MultiDomain ? X86::VPMOVZXDQZ256rm : 0 , 4 , 32 , rebuildZExtCst}};
483
+ return FixupConstant (Fixups, 256 , 1 );
484
+ }
436
485
case X86::VMOVAPDZrm:
437
486
case X86::VMOVAPSZrm:
438
487
case X86::VMOVUPDZrm:
439
- case X86::VMOVUPSZrm:
440
- return FixupConstant ({{X86::VBROADCASTSSZrm, 1 , 32 , rebuildSplatCst},
441
- {X86::VBROADCASTSDZrm, 1 , 64 , rebuildSplatCst},
442
- {X86::VBROADCASTF32X4Zrm, 1 , 128 , rebuildSplatCst},
443
- {X86::VBROADCASTF64X4Zrm, 1 , 256 , rebuildSplatCst}},
444
- 512 , 1 );
488
+ case X86::VMOVUPSZrm: {
489
+ FixupEntry Fixups[] = {
490
+ {X86::VBROADCASTSSZrm, 1 , 32 , rebuildSplatCst},
491
+ {X86::VBROADCASTSDZrm, 1 , 64 , rebuildSplatCst},
492
+ {MultiDomain ? X86::VPMOVSXBQZrm : 0 , 8 , 8 , rebuildSExtCst},
493
+ {MultiDomain ? X86::VPMOVZXBQZrm : 0 , 8 , 8 , rebuildZExtCst},
494
+ {X86::VBROADCASTF32X4Zrm, 1 , 128 , rebuildSplatCst},
495
+ {MultiDomain ? X86::VPMOVSXBDZrm : 0 , 16 , 8 , rebuildSExtCst},
496
+ {MultiDomain ? X86::VPMOVZXBDZrm : 0 , 16 , 8 , rebuildZExtCst},
497
+ {MultiDomain ? X86::VPMOVSXWQZrm : 0 , 8 , 16 , rebuildSExtCst},
498
+ {MultiDomain ? X86::VPMOVZXWQZrm : 0 , 8 , 16 , rebuildZExtCst},
499
+ {X86::VBROADCASTF64X4Zrm, 1 , 256 , rebuildSplatCst},
500
+ {MultiDomain ? X86::VPMOVSXWDZrm : 0 , 16 , 16 , rebuildSExtCst},
501
+ {MultiDomain ? X86::VPMOVZXWDZrm : 0 , 16 , 16 , rebuildZExtCst},
502
+ {MultiDomain ? X86::VPMOVSXDQZrm : 0 , 8 , 32 , rebuildSExtCst},
503
+ {MultiDomain ? X86::VPMOVZXDQZrm : 0 , 8 , 32 , rebuildZExtCst}};
504
+ return FixupConstant (Fixups, 512 , 1 );
505
+ }
445
506
/* Integer Loads */
446
507
case X86::MOVDQArm:
447
508
case X86::MOVDQUrm: {
0 commit comments