@@ -294,20 +294,56 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
294
294
case X86::VMOVUPSZ128rm:
295
295
return ConvertToBroadcast (0 , 0 , X86::VMOVDDUPZ128rm,
296
296
X86::VBROADCASTSSZ128rm, 0 , 0 , 1 );
297
+ case X86::VMOVAPDZ128rmk:
298
+ case X86::VMOVUPDZ128rmk:
299
+ return ConvertToBroadcast (0 , 0 , X86::VMOVDDUPZ128rmk, 0 , 0 , 0 , 3 );
300
+ case X86::VMOVAPSZ128rmk:
301
+ case X86::VMOVUPSZ128rmk:
302
+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZ128rmk, 0 , 0 , 3 );
303
+ case X86::VMOVAPDZ128rmkz:
304
+ case X86::VMOVUPDZ128rmkz:
305
+ return ConvertToBroadcast (0 , 0 , X86::VMOVDDUPZ128rmkz, 0 , 0 , 0 , 2 );
306
+ case X86::VMOVAPSZ128rmkz:
307
+ case X86::VMOVUPSZ128rmkz:
308
+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZ128rmkz, 0 , 0 , 2 );
297
309
case X86::VMOVAPDZ256rm:
298
310
case X86::VMOVAPSZ256rm:
299
311
case X86::VMOVUPDZ256rm:
300
312
case X86::VMOVUPSZ256rm:
301
313
return ConvertToBroadcast (0 , X86::VBROADCASTF32X4Z256rm,
302
314
X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm,
303
315
0 , 0 , 1 );
316
+ case X86::VMOVAPDZ256rmk:
317
+ case X86::VMOVUPDZ256rmk:
318
+ return ConvertToBroadcast (0 , 0 , X86::VBROADCASTSDZ256rmk, 0 , 0 , 0 , 3 );
319
+ case X86::VMOVAPSZ256rmk:
320
+ case X86::VMOVUPSZ256rmk:
321
+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZ256rmk, 0 , 0 , 3 );
322
+ case X86::VMOVAPDZ256rmkz:
323
+ case X86::VMOVUPDZ256rmkz:
324
+ return ConvertToBroadcast (0 , 0 , X86::VBROADCASTSDZ256rmkz, 0 , 0 , 0 , 2 );
325
+ case X86::VMOVAPSZ256rmkz:
326
+ case X86::VMOVUPSZ256rmkz:
327
+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZ256rmkz, 0 , 0 , 2 );
304
328
case X86::VMOVAPDZrm:
305
329
case X86::VMOVAPSZrm:
306
330
case X86::VMOVUPDZrm:
307
331
case X86::VMOVUPSZrm:
308
332
return ConvertToBroadcast (X86::VBROADCASTF64X4rm, X86::VBROADCASTF32X4rm,
309
333
X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0 , 0 ,
310
334
1 );
335
+ case X86::VMOVAPDZrmk:
336
+ case X86::VMOVUPDZrmk:
337
+ return ConvertToBroadcast (0 , 0 , X86::VBROADCASTSDZrmk, 0 , 0 , 0 , 3 );
338
+ case X86::VMOVAPSZrmk:
339
+ case X86::VMOVUPSZrmk:
340
+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZrmk, 0 , 0 , 3 );
341
+ case X86::VMOVAPDZrmkz:
342
+ case X86::VMOVUPDZrmkz:
343
+ return ConvertToBroadcast (0 , 0 , X86::VBROADCASTSDZrmkz, 0 , 0 , 0 , 2 );
344
+ case X86::VMOVAPSZrmkz:
345
+ case X86::VMOVUPSZrmkz:
346
+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZrmkz, 0 , 0 , 2 );
311
347
/* Integer Loads */
312
348
case X86::VMOVDQArm:
313
349
case X86::VMOVDQUrm:
@@ -332,6 +368,18 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
332
368
X86::VPBROADCASTDZ128rm,
333
369
HasBWI ? X86::VPBROADCASTWZ128rm : 0 ,
334
370
HasBWI ? X86::VPBROADCASTBZ128rm : 0 , 1 );
371
+ case X86::VMOVDQA32Z128rmk:
372
+ case X86::VMOVDQU32Z128rmk:
373
+ return ConvertToBroadcast (0 , 0 , 0 , X86::VPBROADCASTDZ128rmk, 0 , 0 , 3 );
374
+ case X86::VMOVDQA32Z128rmkz:
375
+ case X86::VMOVDQU32Z128rmkz:
376
+ return ConvertToBroadcast (0 , 0 , 0 , X86::VPBROADCASTDZ128rmkz, 0 , 0 , 2 );
377
+ case X86::VMOVDQA64Z128rmk:
378
+ case X86::VMOVDQU64Z128rmk:
379
+ return ConvertToBroadcast (0 , 0 , X86::VPBROADCASTQZ128rmk, 0 , 0 , 0 , 3 );
380
+ case X86::VMOVDQA64Z128rmkz:
381
+ case X86::VMOVDQU64Z128rmkz:
382
+ return ConvertToBroadcast (0 , 0 , X86::VPBROADCASTQZ128rmkz, 0 , 0 , 0 , 2 );
335
383
case X86::VMOVDQA32Z256rm:
336
384
case X86::VMOVDQA64Z256rm:
337
385
case X86::VMOVDQU32Z256rm:
@@ -340,6 +388,24 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
340
388
X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
341
389
HasBWI ? X86::VPBROADCASTWZ256rm : 0 ,
342
390
HasBWI ? X86::VPBROADCASTBZ256rm : 0 , 1 );
391
+ case X86::VMOVDQA32Z256rmk:
392
+ case X86::VMOVDQU32Z256rmk:
393
+ return ConvertToBroadcast (0 , X86::VBROADCASTI32X4Z256rmk,
394
+ HasDQI ? X86::VBROADCASTI32X2Z256rmk : 0 ,
395
+ X86::VPBROADCASTDZ256rmk, 0 , 0 , 3 );
396
+ case X86::VMOVDQA32Z256rmkz:
397
+ case X86::VMOVDQU32Z256rmkz:
398
+ return ConvertToBroadcast (0 , X86::VBROADCASTI32X4Z256rmkz,
399
+ HasDQI ? X86::VBROADCASTI32X2Z256rmkz : 0 ,
400
+ X86::VPBROADCASTDZ256rmkz, 0 , 0 , 2 );
401
+ case X86::VMOVDQA64Z256rmk:
402
+ case X86::VMOVDQU64Z256rmk:
403
+ return ConvertToBroadcast (0 , HasDQI ? X86::VBROADCASTI64X2Z128rmk : 0 ,
404
+ X86::VPBROADCASTQZ256rmk, 0 , 0 , 0 , 3 );
405
+ case X86::VMOVDQA64Z256rmkz:
406
+ case X86::VMOVDQU64Z256rmkz:
407
+ return ConvertToBroadcast (0 , HasDQI ? X86::VBROADCASTI64X2Z128rmkz : 0 ,
408
+ X86::VPBROADCASTQZ256rmkz, 0 , 0 , 0 , 2 );
343
409
case X86::VMOVDQA32Zrm:
344
410
case X86::VMOVDQA64Zrm:
345
411
case X86::VMOVDQU32Zrm:
@@ -348,39 +414,62 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
348
414
X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
349
415
HasBWI ? X86::VPBROADCASTWZrm : 0 ,
350
416
HasBWI ? X86::VPBROADCASTBZrm : 0 , 1 );
417
+ case X86::VMOVDQA32Zrmk:
418
+ case X86::VMOVDQU32Zrmk:
419
+ return ConvertToBroadcast (
420
+ HasDQI ? X86::VBROADCASTI32X8rmk : 0 , X86::VBROADCASTI32X4rmk,
421
+ HasDQI ? X86::VBROADCASTI32X2Zrmk : 0 , X86::VPBROADCASTDZrmk, 0 , 0 , 3 );
422
+ case X86::VMOVDQA32Zrmkz:
423
+ case X86::VMOVDQU32Zrmkz:
424
+ return ConvertToBroadcast (HasDQI ? X86::VBROADCASTI32X8rmkz : 0 ,
425
+ X86::VBROADCASTI32X4rmkz,
426
+ HasDQI ? X86::VBROADCASTI32X2Zrmkz : 0 ,
427
+ X86::VPBROADCASTDZrmkz, 0 , 0 , 2 );
428
+ case X86::VMOVDQA64Zrmk:
429
+ case X86::VMOVDQU64Zrmk:
430
+ return ConvertToBroadcast (X86::VBROADCASTI64X4rmk,
431
+ HasDQI ? X86::VBROADCASTI64X2rmk : 0 ,
432
+ X86::VPBROADCASTQZrmk, 0 , 0 , 0 , 3 );
433
+ case X86::VMOVDQA64Zrmkz:
434
+ case X86::VMOVDQU64Zrmkz:
435
+ return ConvertToBroadcast (X86::VBROADCASTI64X4rmkz,
436
+ HasDQI ? X86::VBROADCASTI64X2rmkz : 0 ,
437
+ X86::VPBROADCASTQZrmkz, 0 , 0 , 0 , 2 );
351
438
}
352
439
353
- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
354
- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
355
- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
440
+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc16, unsigned OpSrc32,
441
+ unsigned OpSrc64) {
442
+ if (OpSrc16) {
443
+ if (const X86FoldTableEntry *Mem2Bcst =
444
+ llvm::lookupBroadcastFoldTable (OpSrc16, 16 )) {
445
+ if (ConvertToBroadcast (0 , 0 , 0 , 0 , Mem2Bcst->DstOp , 0 ,
446
+ Mem2Bcst->Flags & TB_INDEX_MASK))
447
+ return true ;
448
+ }
449
+ }
356
450
if (OpSrc32) {
357
451
if (const X86FoldTableEntry *Mem2Bcst =
358
452
llvm::lookupBroadcastFoldTable (OpSrc32, 32 )) {
359
- OpBcst32 = Mem2Bcst->DstOp ;
360
- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
453
+ if (ConvertToBroadcast (0 , 0 , 0 , Mem2Bcst->DstOp , 0 , 0 ,
454
+ Mem2Bcst->Flags & TB_INDEX_MASK))
455
+ return true ;
361
456
}
362
457
}
363
458
if (OpSrc64) {
364
459
if (const X86FoldTableEntry *Mem2Bcst =
365
460
llvm::lookupBroadcastFoldTable (OpSrc64, 64 )) {
366
- OpBcst64 = Mem2Bcst->DstOp ;
367
- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
461
+ if (ConvertToBroadcast (0 , 0 , Mem2Bcst->DstOp , 0 , 0 , 0 ,
462
+ Mem2Bcst->Flags & TB_INDEX_MASK))
463
+ return true ;
368
464
}
369
465
}
370
- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
371
- " OperandNo mismatch" );
372
-
373
- if (OpBcst32 || OpBcst64) {
374
- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
375
- return ConvertToBroadcast (0 , 0 , OpBcst64, OpBcst32, 0 , 0 , OpNo);
376
- }
377
466
return false ;
378
467
};
379
468
380
469
// Attempt to find a AVX512 mapping from a full width memory-fold instruction
381
470
// to a broadcast-fold instruction variant.
382
471
if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
383
- return ConvertToBroadcastAVX512 (Opc, Opc);
472
+ return ConvertToBroadcastAVX512 (Opc, Opc, Opc );
384
473
385
474
// Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
386
475
// conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -437,7 +526,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
437
526
break ;
438
527
}
439
528
if (OpSrc32 || OpSrc64)
440
- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
529
+ return ConvertToBroadcastAVX512 (0 , OpSrc32, OpSrc64);
441
530
}
442
531
443
532
return false ;
0 commit comments