@@ -256,16 +256,32 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
256
256
return B.buildConcatVectors (DstRegs[0 ], SrcRegs);
257
257
}
258
258
259
- const int NumWide = LCMTy.getSizeInBits () / PartLLT.getSizeInBits ();
260
- Register Undef = B.buildUndef (PartLLT).getReg (0 );
261
-
262
- // Build vector of undefs.
263
- SmallVector<Register, 8 > WidenedSrcs (NumWide, Undef);
264
-
265
- // Replace the first sources with the real registers.
266
- std::copy (SrcRegs.begin (), SrcRegs.end (), WidenedSrcs.begin ());
259
+ // We need to create an unmerge to the result registers, which may require
260
+ // widening the original value.
261
+ Register UnmergeSrcReg;
262
+ if (LCMTy != PartLLT) {
263
+ // e.g. A <3 x s16> value was split to <2 x s16>
264
+ // %register_value0:_(<2 x s16>)
265
+ // %register_value1:_(<2 x s16>)
266
+ // %undef:_(<2 x s16>) = G_IMPLICIT_DEF
267
+ // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef
268
+ // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat
269
+ const int NumWide = LCMTy.getSizeInBits () / PartLLT.getSizeInBits ();
270
+ Register Undef = B.buildUndef (PartLLT).getReg (0 );
271
+
272
+ // Build vector of undefs.
273
+ SmallVector<Register, 8 > WidenedSrcs (NumWide, Undef);
274
+
275
+ // Replace the first sources with the real registers.
276
+ std::copy (SrcRegs.begin (), SrcRegs.end (), WidenedSrcs.begin ());
277
+ UnmergeSrcReg = B.buildConcatVectors (LCMTy, WidenedSrcs).getReg (0 );
278
+ } else {
279
+ // We don't need to widen anything if we're extracting a scalar which was
280
+ // promoted to a vector e.g. s8 -> v4s8 -> s8
281
+ assert (SrcRegs.size () == 1 );
282
+ UnmergeSrcReg = SrcRegs[0 ];
283
+ }
267
284
268
- auto Widened = B.buildConcatVectors (LCMTy, WidenedSrcs);
269
285
int NumDst = LCMTy.getSizeInBits () / LLTy.getSizeInBits ();
270
286
271
287
SmallVector<Register, 8 > PadDstRegs (NumDst);
@@ -275,17 +291,27 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
275
291
for (int I = DstRegs.size (); I != NumDst; ++I)
276
292
PadDstRegs[I] = MRI.createGenericVirtualRegister (LLTy);
277
293
278
- return B.buildUnmerge (PadDstRegs, Widened );
294
+ return B.buildUnmerge (PadDstRegs, UnmergeSrcReg );
279
295
}
280
296
281
297
// / Create a sequence of instructions to combine pieces split into register
282
298
// / typed values to the original IR value. \p OrigRegs contains the destination
283
299
// / value registers of type \p LLTy, and \p Regs contains the legalized pieces
284
- // / with type \p PartLLT.
285
- static void buildCopyToParts (MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
286
- ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT) {
300
+ // / with type \p PartLLT. This is used for incoming values (physregs to vregs).
301
+ static void buildCopyFromRegs (MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
302
+ ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT) {
287
303
MachineRegisterInfo &MRI = *B.getMRI ();
288
304
305
+ // We could just insert a regular copy, but this is unreachable at the moment.
306
+ assert (LLTy != PartLLT && " identical part types shouldn't reach here" );
307
+
308
+ if (PartLLT.isVector () == LLTy.isVector () &&
309
+ PartLLT.getScalarSizeInBits () > LLTy.getScalarSizeInBits ()) {
310
+ assert (OrigRegs.size () == 1 && Regs.size () == 1 );
311
+ B.buildTrunc (OrigRegs[0 ], Regs[0 ]);
312
+ return ;
313
+ }
314
+
289
315
if (!LLTy.isVector () && !PartLLT.isVector ()) {
290
316
assert (OrigRegs.size () == 1 );
291
317
LLT OrigTy = MRI.getType (OrigRegs[0 ]);
@@ -301,9 +327,9 @@ static void buildCopyToParts(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
301
327
return ;
302
328
}
303
329
304
- if (LLTy. isVector () && PartLLT.isVector ()) {
305
- assert (OrigRegs.size () == 1 );
306
- assert ( LLTy.getElementType () == PartLLT.getElementType ());
330
+ if (PartLLT.isVector ()) {
331
+ assert (OrigRegs.size () == 1 &&
332
+ LLTy.getScalarType () == PartLLT.getElementType ());
307
333
mergeVectorRegsToResultRegs (B, OrigRegs, Regs);
308
334
return ;
309
335
}
@@ -353,6 +379,71 @@ static void buildCopyToParts(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
353
379
}
354
380
}
355
381
382
+ // / Create a sequence of instructions to expand the value in \p SrcReg (of type
383
+ // / \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should
384
+ // / contain the type of scalar value extension if necessary.
385
+ // /
386
+ // / This is used for outgoing values (vregs to physregs)
387
+ static void buildCopyToRegs (MachineIRBuilder &B, ArrayRef<Register> DstRegs,
388
+ Register SrcReg, LLT SrcTy, LLT PartTy,
389
+ unsigned ExtendOp = TargetOpcode::G_ANYEXT) {
390
+ // We could just insert a regular copy, but this is unreachable at the moment.
391
+ assert (SrcTy != PartTy && " identical part types shouldn't reach here" );
392
+
393
+ const unsigned PartSize = PartTy.getSizeInBits ();
394
+
395
+ if (PartTy.isVector () == SrcTy.isVector () &&
396
+ PartTy.getScalarSizeInBits () > SrcTy.getScalarSizeInBits ()) {
397
+ assert (DstRegs.size () == 1 );
398
+ B.buildInstr (ExtendOp, {DstRegs[0 ]}, {SrcReg});
399
+ return ;
400
+ }
401
+
402
+ if (SrcTy.isVector () && !PartTy.isVector () &&
403
+ PartSize > SrcTy.getElementType ().getSizeInBits ()) {
404
+ // Vector was scalarized, and the elements extended.
405
+ auto UnmergeToEltTy = B.buildUnmerge (SrcTy.getElementType (), SrcReg);
406
+ for (int i = 0 , e = DstRegs.size (); i != e; ++i)
407
+ B.buildAnyExt (DstRegs[i], UnmergeToEltTy.getReg (i));
408
+ return ;
409
+ }
410
+
411
+ LLT GCDTy = getGCDType (SrcTy, PartTy);
412
+ if (GCDTy == PartTy) {
413
+ // If this already evenly divisible, we can create a simple unmerge.
414
+ B.buildUnmerge (DstRegs, SrcReg);
415
+ return ;
416
+ }
417
+
418
+ MachineRegisterInfo &MRI = *B.getMRI ();
419
+ LLT DstTy = MRI.getType (DstRegs[0 ]);
420
+ LLT LCMTy = getLCMType (SrcTy, PartTy);
421
+
422
+ const unsigned LCMSize = LCMTy.getSizeInBits ();
423
+ const unsigned DstSize = DstTy.getSizeInBits ();
424
+ const unsigned SrcSize = SrcTy.getSizeInBits ();
425
+
426
+ Register UnmergeSrc = SrcReg;
427
+ if (LCMSize != SrcSize) {
428
+ // Widen to the common type.
429
+ Register Undef = B.buildUndef (SrcTy).getReg (0 );
430
+ SmallVector<Register, 8 > MergeParts (1 , SrcReg);
431
+ for (unsigned Size = SrcSize; Size != LCMSize; Size += SrcSize)
432
+ MergeParts.push_back (Undef);
433
+
434
+ UnmergeSrc = B.buildMerge (LCMTy, MergeParts).getReg (0 );
435
+ }
436
+
437
+ // Unmerge to the original registers and pad with dead defs.
438
+ SmallVector<Register, 8 > UnmergeResults (DstRegs.begin (), DstRegs.end ());
439
+ for (unsigned Size = DstSize * DstRegs.size (); Size != LCMSize;
440
+ Size += DstSize) {
441
+ UnmergeResults.push_back (MRI.createGenericVirtualRegister (DstTy));
442
+ }
443
+
444
+ B.buildUnmerge (UnmergeResults, UnmergeSrc);
445
+ }
446
+
356
447
bool CallLowering::handleAssignments (MachineIRBuilder &MIRBuilder,
357
448
SmallVectorImpl<ArgInfo> &Args,
358
449
ValueHandler &Handler,
@@ -367,13 +458,22 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
367
458
ThisReturnReg);
368
459
}
369
460
461
+ static unsigned extendOpFromFlags (llvm::ISD::ArgFlagsTy Flags) {
462
+ if (Flags.isSExt ())
463
+ return TargetOpcode::G_SEXT;
464
+ if (Flags.isZExt ())
465
+ return TargetOpcode::G_ZEXT;
466
+ return TargetOpcode::G_ANYEXT;
467
+ }
468
+
370
469
bool CallLowering::handleAssignments (CCState &CCInfo,
371
470
SmallVectorImpl<CCValAssign> &ArgLocs,
372
471
MachineIRBuilder &MIRBuilder,
373
472
SmallVectorImpl<ArgInfo> &Args,
374
473
ValueHandler &Handler,
375
474
Register ThisReturnReg) const {
376
475
MachineFunction &MF = MIRBuilder.getMF ();
476
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
377
477
const Function &F = MF.getFunction ();
378
478
const DataLayout &DL = F.getParent ()->getDataLayout ();
379
479
@@ -399,10 +499,20 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
399
499
if (Handler.assignArg (i, NewVT, NewVT, CCValAssign::Full, Args[i],
400
500
Args[i].Flags [0 ], CCInfo))
401
501
return false ;
502
+
503
+ // If we couldn't directly assign this part, some casting may be
504
+ // necessary. Create the new register, but defer inserting the conversion
505
+ // instructions.
506
+ assert (Args[i].OrigRegs .empty ());
507
+ Args[i].OrigRegs .push_back (Args[i].Regs [0 ]);
508
+ assert (Args[i].Regs .size () == 1 );
509
+
510
+ const LLT VATy (NewVT);
511
+ Args[i].Regs [0 ] = MRI.createGenericVirtualRegister (VATy);
402
512
continue ;
403
513
}
404
514
405
- assert (NumParts > 1 );
515
+ const LLT NewLLT (NewVT );
406
516
407
517
// For incoming arguments (physregs to vregs), we could have values in
408
518
// physregs (or memlocs) which we want to extract and copy to vregs.
@@ -419,13 +529,11 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
419
529
Args[i].OrigRegs .push_back (Args[i].Regs [0 ]);
420
530
Args[i].Regs .clear ();
421
531
Args[i].Flags .clear ();
422
- LLT NewLLT = getLLTForMVT (NewVT);
423
532
// For each split register, create and assign a vreg that will store
424
533
// the incoming component of the larger value. These will later be
425
534
// merged to form the final vreg.
426
535
for (unsigned Part = 0 ; Part < NumParts; ++Part) {
427
- Register Reg =
428
- MIRBuilder.getMRI ()->createGenericVirtualRegister (NewLLT);
536
+ Register Reg = MRI.createGenericVirtualRegister (NewLLT);
429
537
ISD::ArgFlagsTy Flags = OrigFlags;
430
538
if (Part == 0 ) {
431
539
Flags.setSplit ();
@@ -443,12 +551,13 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
443
551
}
444
552
}
445
553
} else {
554
+ assert (Args[i].Regs .size () == 1 );
555
+
446
556
// This type is passed via multiple registers in the calling convention.
447
557
// We need to extract the individual parts.
448
- Register LargeReg = Args[i].Regs [0 ];
449
- LLT SmallTy = LLT::scalar (NewVT.getSizeInBits ());
450
- auto Unmerge = MIRBuilder.buildUnmerge (SmallTy, LargeReg);
451
- assert (Unmerge->getNumOperands () == NumParts + 1 );
558
+ assert (Args[i].OrigRegs .empty ());
559
+ Args[i].OrigRegs .push_back (Args[i].Regs [0 ]);
560
+
452
561
ISD::ArgFlagsTy OrigFlags = Args[i].Flags [0 ];
453
562
// We're going to replace the regs and flags with the split ones.
454
563
Args[i].Regs .clear ();
@@ -471,7 +580,9 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
471
580
Flags.setReturned (false );
472
581
}
473
582
474
- Args[i].Regs .push_back (Unmerge.getReg (PartIdx));
583
+ Register NewReg = MRI.createGenericVirtualRegister (NewLLT);
584
+
585
+ Args[i].Regs .push_back (NewReg);
475
586
Args[i].Flags .push_back (Flags);
476
587
if (Handler.assignArg (i, NewVT, NewVT, CCValAssign::Full,
477
588
Args[i], Args[i].Flags [PartIdx], CCInfo))
@@ -495,20 +606,25 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
495
606
continue ;
496
607
}
497
608
498
- EVT OrigVT = EVT::getEVT (Args[i].Ty );
499
609
EVT VAVT = VA.getValVT ();
500
610
const LLT OrigTy = getLLTForType (*Args[i].Ty , DL);
501
611
const LLT VATy (VAVT.getSimpleVT ());
502
612
503
613
// Expected to be multiple regs for a single incoming arg.
504
614
// There should be Regs.size() ArgLocs per argument.
505
615
unsigned NumArgRegs = Args[i].Regs .size ();
506
- MachineRegisterInfo &MRI = MF.getRegInfo ();
507
616
assert ((j + (NumArgRegs - 1 )) < ArgLocs.size () &&
508
617
" Too many regs for number of args" );
618
+
619
+ // Coerce into outgoing value types before register assignment.
620
+ if (!Handler.isIncomingArgumentHandler () && OrigTy != VATy) {
621
+ assert (Args[i].OrigRegs .size () == 1 );
622
+ buildCopyToRegs (MIRBuilder, Args[i].Regs , Args[i].OrigRegs [0 ], OrigTy,
623
+ VATy, extendOpFromFlags (Args[i].Flags [0 ]));
624
+ }
625
+
509
626
for (unsigned Part = 0 ; Part < NumArgRegs; ++Part) {
510
627
Register ArgReg = Args[i].Regs [Part];
511
- LLT ArgRegTy = MRI.getType (ArgReg);
512
628
// There should be Regs.size() ArgLocs per argument.
513
629
VA = ArgLocs[j + Part];
514
630
if (VA.isMemLoc ()) {
@@ -536,57 +652,16 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
536
652
continue ;
537
653
}
538
654
539
- // GlobalISel does not currently work for scalable vectors.
540
- if (OrigVT.getFixedSizeInBits () >= VAVT.getFixedSizeInBits () ||
541
- !Handler.isIncomingArgumentHandler ()) {
542
- // This is an argument that might have been split. There should be
543
- // Regs.size() ArgLocs per argument.
544
-
545
- // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge
546
- // to the original register after handling all of the parts.
547
- Handler.assignValueToReg (Args[i].Regs [Part], VA.getLocReg (), VA);
548
- continue ;
549
- }
550
-
551
- // This ArgLoc covers multiple pieces, so we need to split it.
552
- Register NewReg = MRI.createGenericVirtualRegister (VATy);
553
- Handler.assignValueToReg (NewReg, VA.getLocReg (), VA);
554
- // If it's a vector type, we either need to truncate the elements
555
- // or do an unmerge to get the lower block of elements.
556
- if (VATy.isVector () &&
557
- VATy.getNumElements () > OrigVT.getVectorNumElements ()) {
558
- // Just handle the case where the VA type is a multiple of original
559
- // type.
560
- if (VATy.getNumElements () % OrigVT.getVectorNumElements () != 0 ) {
561
- LLVM_DEBUG (dbgs () << " Incoming promoted vector arg elts is not a "
562
- " multiple of orig type elt: "
563
- << VATy << " vs " << OrigTy);
564
- return false ;
565
- }
566
- SmallVector<Register, 4 > DstRegs = {ArgReg};
567
- unsigned NumParts =
568
- VATy.getNumElements () / OrigVT.getVectorNumElements () - 1 ;
569
- for (unsigned Idx = 0 ; Idx < NumParts; ++Idx)
570
- DstRegs.push_back (
571
- MIRBuilder.getMRI ()->createGenericVirtualRegister (OrigTy));
572
- MIRBuilder.buildUnmerge (DstRegs, {NewReg});
573
- } else if (VATy.getScalarSizeInBits () > ArgRegTy.getScalarSizeInBits ()) {
574
- MIRBuilder.buildTrunc (ArgReg, {NewReg}).getReg (0 );
575
- } else {
576
- MIRBuilder.buildCopy (ArgReg, NewReg);
577
- }
655
+ Handler.assignValueToReg (ArgReg, VA.getLocReg (), VA);
578
656
}
579
657
580
- // Now that all pieces have been handled , re-pack any arguments into any
581
- // wider, original registers.
582
- if (Handler.isIncomingArgumentHandler ()) {
658
+ // Now that all pieces have been assigned , re-pack the register typed values
659
+ // into the original value typed registers.
660
+ if (Handler.isIncomingArgumentHandler () && OrigTy != VATy ) {
583
661
// Merge the split registers into the expected larger result vregs of
584
662
// the original call.
585
-
586
- if (OrigTy != VATy && !Args[i].OrigRegs .empty ()) {
587
- buildCopyToParts (MIRBuilder, Args[i].OrigRegs , Args[i].Regs , OrigTy,
588
- VATy);
589
- }
663
+ buildCopyFromRegs (MIRBuilder, Args[i].OrigRegs , Args[i].Regs , OrigTy,
664
+ VATy);
590
665
}
591
666
592
667
j += NumArgRegs - 1 ;
0 commit comments