@@ -478,9 +478,43 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
478
478
} else {
479
479
// Vector was split, and elements promoted to a wider type.
480
480
// FIXME: Should handle floating point promotions.
481
- LLT BVType = LLT::fixed_vector (LLTy.getNumElements (), PartLLT);
482
- auto BV = B.buildBuildVector (BVType, Regs);
483
- B.buildTrunc (OrigRegs[0 ], BV);
481
+ unsigned NumElts = LLTy.getNumElements ();
482
+ LLT BVType = LLT::fixed_vector (NumElts, PartLLT);
483
+
484
+ Register BuildVec;
485
+ if (NumElts == Regs.size ())
486
+ BuildVec = B.buildBuildVector (BVType, Regs).getReg (0 );
487
+ else {
488
+ // Vector elements are packed in the inputs.
489
+ // e.g. we have a <4 x s16> but 2 x s32 in regs.
490
+ assert (NumElts > Regs.size ());
491
+ LLT SrcEltTy = MRI.getType (Regs[0 ]);
492
+
493
+ LLT OriginalEltTy = MRI.getType (OrigRegs[0 ]).getElementType ();
494
+
495
+ // Input registers contain packed elements.
496
+ // Determine how many elements per reg.
497
+ assert ((SrcEltTy.getSizeInBits () % OriginalEltTy.getSizeInBits ()) == 0 );
498
+ unsigned EltPerReg =
499
+ (SrcEltTy.getSizeInBits () / OriginalEltTy.getSizeInBits ());
500
+
501
+ SmallVector<Register, 0 > BVRegs;
502
+ BVRegs.reserve (Regs.size () * EltPerReg);
503
+ for (Register R : Regs) {
504
+ auto Unmerge = B.buildUnmerge (OriginalEltTy, R);
505
+ for (unsigned K = 0 ; K < EltPerReg; ++K)
506
+ BVRegs.push_back (B.buildAnyExt (PartLLT, Unmerge.getReg (K)).getReg (0 ));
507
+ }
508
+
509
+ // We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces
510
+ // for a <3 x s16> vector. We should have less than EltPerReg extra items.
511
+ if (BVRegs.size () > NumElts) {
512
+ assert ((BVRegs.size () - NumElts) < EltPerReg);
513
+ BVRegs.truncate (NumElts);
514
+ }
515
+ BuildVec = B.buildBuildVector (BVType, BVRegs).getReg (0 );
516
+ }
517
+ B.buildTrunc (OrigRegs[0 ], BuildVec);
484
518
}
485
519
}
486
520
0 commit comments