@@ -248,18 +248,18 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
248
248
}
249
249
250
250
void assignValueToAddress (const CallLowering::ArgInfo &Arg, Register Addr,
251
- uint64_t Size , MachinePointerInfo &MPO,
251
+ uint64_t MemSize , MachinePointerInfo &MPO,
252
252
CCValAssign &VA) override {
253
253
Register ValVReg = VA.getLocInfo () != CCValAssign::LocInfo::FPExt
254
254
? extendRegister (Arg.Regs [0 ], VA)
255
255
: Arg.Regs [0 ];
256
256
257
- // If we extended we might need to adjust the MMO's Size.
257
+ // If we extended the value type we might need to adjust the MMO's
258
+ // Size. This happens if ComputeValueVTs widened a small type value to a
259
+ // legal register type (e.g. s8->s16)
258
260
const LLT RegTy = MRI.getType (ValVReg);
259
- if (RegTy.getSizeInBytes () > Size)
260
- Size = RegTy.getSizeInBytes ();
261
-
262
- assignValueToAddress (ValVReg, Addr, Size, MPO, VA);
261
+ MemSize = std::min (MemSize, (uint64_t )RegTy.getSizeInBytes ());
262
+ assignValueToAddress (ValVReg, Addr, MemSize, MPO, VA);
263
263
}
264
264
};
265
265
}
@@ -282,57 +282,72 @@ static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
282
282
}
283
283
}
284
284
285
- void AMDGPUCallLowering::splitToValueTypes (
286
- MachineIRBuilder &B,
287
- const ArgInfo &OrigArg,
288
- SmallVectorImpl<ArgInfo> &SplitArgs,
289
- const DataLayout &DL, CallingConv::ID CallConv,
290
- bool IsOutgoing,
291
- SplitArgTy PerformArgSplit) const {
285
+ // FIXME: This should move to generic code.
286
+ void AMDGPUCallLowering::splitToValueTypes (MachineIRBuilder &B,
287
+ const ArgInfo &OrigArg,
288
+ SmallVectorImpl<ArgInfo> &SplitArgs,
289
+ const DataLayout &DL,
290
+ CallingConv::ID CallConv) const {
292
291
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
293
292
LLVMContext &Ctx = OrigArg.Ty ->getContext ();
294
293
295
- if (OrigArg.Ty ->isVoidTy ())
296
- return ;
297
-
298
294
SmallVector<EVT, 4 > SplitVTs;
299
295
ComputeValueVTs (TLI, DL, OrigArg.Ty , SplitVTs);
300
296
301
297
assert (OrigArg.Regs .size () == SplitVTs.size ());
302
298
303
- int SplitIdx = 0 ;
304
- for (EVT VT : SplitVTs) {
305
- Register Reg = OrigArg.Regs [SplitIdx];
306
- Type *Ty = VT.getTypeForEVT (Ctx);
307
- LLT LLTy = getLLTForType (*Ty, DL);
299
+ if (SplitVTs.size () == 0 )
300
+ return ;
308
301
309
- if (IsOutgoing && VT.isScalarInteger ()) {
310
- unsigned ExtendOp = TargetOpcode::G_ANYEXT;
311
- if (OrigArg.Flags [0 ].isSExt ()) {
312
- assert (OrigArg.Regs .size () == 1 && " expect only simple return values" );
313
- ExtendOp = TargetOpcode::G_SEXT;
314
- } else if (OrigArg.Flags [0 ].isZExt ()) {
315
- assert (OrigArg.Regs .size () == 1 && " expect only simple return values" );
316
- ExtendOp = TargetOpcode::G_ZEXT;
317
- }
302
+ if (SplitVTs.size () == 1 ) {
303
+ // No splitting to do, but we want to replace the original type (e.g. [1 x
304
+ // double] -> double).
305
+ SplitArgs.emplace_back (OrigArg.Regs [0 ], SplitVTs[0 ].getTypeForEVT (Ctx),
306
+ OrigArg.Flags [0 ], OrigArg.IsFixed );
307
+ return ;
308
+ }
318
309
319
- EVT ExtVT = TLI.getTypeForExtReturn (Ctx, VT,
320
- extOpcodeToISDExtOpcode (ExtendOp));
321
- if (ExtVT.getSizeInBits () != VT.getSizeInBits ()) {
322
- VT = ExtVT;
323
- Ty = ExtVT.getTypeForEVT (Ctx);
324
- LLTy = getLLTForType (*Ty, DL);
325
- Reg = B.buildInstr (ExtendOp, {LLTy}, {Reg}).getReg (0 );
326
- }
327
- }
310
+ // Create one ArgInfo for each virtual register in the original ArgInfo.
311
+ assert (OrigArg.Regs .size () == SplitVTs.size () && " Regs / types mismatch" );
312
+
313
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters (
314
+ OrigArg.Ty , CallConv, false );
315
+ for (unsigned i = 0 , e = SplitVTs.size (); i < e; ++i) {
316
+ Type *SplitTy = SplitVTs[i].getTypeForEVT (Ctx);
317
+ SplitArgs.emplace_back (OrigArg.Regs [i], SplitTy, OrigArg.Flags [0 ],
318
+ OrigArg.IsFixed );
319
+ if (NeedsRegBlock)
320
+ SplitArgs.back ().Flags [0 ].setInConsecutiveRegs ();
321
+ }
322
+
323
+ SplitArgs.back ().Flags [0 ].setInConsecutiveRegsLast ();
324
+ }
325
+
326
+ void AMDGPUCallLowering::processSplitArgs (
327
+ MachineIRBuilder &B, const ArgInfo &OrigArg,
328
+ const SmallVectorImpl<ArgInfo> &SplitArg,
329
+ SmallVectorImpl<ArgInfo> &SplitArgs, const DataLayout &DL,
330
+ CallingConv::ID CallConv, bool IsOutgoing,
331
+ SplitArgTy PerformArgSplit) const {
332
+ LLVMContext &Ctx = OrigArg.Ty ->getContext ();
333
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
334
+
335
+ // FIXME: This is mostly nasty pre-processing before handleAssignments. Most
336
+ // of this should be performed by handleAssignments.
337
+
338
+ int SplitIdx = 0 ;
339
+ for (const ArgInfo &SplitArg : SplitArg) {
340
+ Register Reg = OrigArg.Regs [SplitIdx];
341
+ EVT VT = EVT::getEVT (SplitArg.Ty );
342
+ LLT LLTy = getLLTForType (*SplitArg.Ty , DL);
328
343
329
344
unsigned NumParts = TLI.getNumRegistersForCallingConv (Ctx, CallConv, VT);
330
345
MVT RegVT = TLI.getRegisterTypeForCallingConv (Ctx, CallConv, VT);
331
346
332
347
if (NumParts == 1 ) {
333
348
// No splitting to do, but we want to replace the original type (e.g. [1 x
334
349
// double] -> double).
335
- SplitArgs.emplace_back (Reg, Ty, OrigArg.Flags , OrigArg.IsFixed );
350
+ SplitArgs.emplace_back (Reg, SplitArg. Ty , OrigArg.Flags , OrigArg.IsFixed );
336
351
337
352
++SplitIdx;
338
353
continue ;
@@ -425,22 +440,68 @@ bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
425
440
const auto &F = MF.getFunction ();
426
441
const DataLayout &DL = MF.getDataLayout ();
427
442
MachineRegisterInfo *MRI = B.getMRI ();
443
+ LLVMContext &Ctx = F.getContext ();
428
444
429
445
CallingConv::ID CC = F.getCallingConv ();
430
446
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
431
447
432
- ArgInfo OrigRetInfo (VRegs, Val->getType ());
433
- setArgFlags (OrigRetInfo, AttributeList::ReturnIndex, DL, F);
434
- SmallVector<ArgInfo, 4 > SplitRetInfos;
448
+ SmallVector<EVT, 8 > SplitEVTs;
449
+ ComputeValueVTs (TLI, DL, Val->getType (), SplitEVTs);
450
+ assert (VRegs.size () == SplitEVTs.size () &&
451
+ " For each split Type there should be exactly one VReg." );
452
+
453
+ // We pre-process the return value decomposed into EVTs.
454
+ SmallVector<ArgInfo, 8 > PreSplitRetInfos;
435
455
436
- splitToValueTypes (
437
- B, OrigRetInfo, SplitRetInfos, DL, CC, true ,
438
- [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
439
- int VTSplitIdx) {
440
- unpackRegsToOrigType (B, Regs, SrcReg,
441
- SplitRetInfos[VTSplitIdx],
442
- LLTy, PartLLT);
443
- });
456
+ // Further processing is applied to split the arguments from PreSplitRetInfos
457
+ // into 32-bit pieces in SplitRetInfos before passing off to
458
+ // handleAssignments.
459
+ SmallVector<ArgInfo, 8 > SplitRetInfos;
460
+
461
+ for (unsigned i = 0 ; i < SplitEVTs.size (); ++i) {
462
+ EVT VT = SplitEVTs[i];
463
+ Register Reg = VRegs[i];
464
+ ArgInfo RetInfo (Reg, VT.getTypeForEVT (Ctx));
465
+ setArgFlags (RetInfo, AttributeList::ReturnIndex, DL, F);
466
+
467
+ if (VT.isScalarInteger ()) {
468
+ unsigned ExtendOp = TargetOpcode::G_ANYEXT;
469
+ if (RetInfo.Flags [0 ].isSExt ()) {
470
+ assert (RetInfo.Regs .size () == 1 && " expect only simple return values" );
471
+ ExtendOp = TargetOpcode::G_SEXT;
472
+ } else if (RetInfo.Flags [0 ].isZExt ()) {
473
+ assert (RetInfo.Regs .size () == 1 && " expect only simple return values" );
474
+ ExtendOp = TargetOpcode::G_ZEXT;
475
+ }
476
+
477
+ EVT ExtVT = TLI.getTypeForExtReturn (Ctx, VT,
478
+ extOpcodeToISDExtOpcode (ExtendOp));
479
+ if (ExtVT != VT) {
480
+ RetInfo.Ty = ExtVT.getTypeForEVT (Ctx);
481
+ LLT ExtTy = getLLTForType (*RetInfo.Ty , DL);
482
+ Reg = B.buildInstr (ExtendOp, {ExtTy}, {Reg}).getReg (0 );
483
+ }
484
+ }
485
+
486
+ if (Reg != RetInfo.Regs [0 ]) {
487
+ RetInfo.Regs [0 ] = Reg;
488
+ // Reset the arg flags after modifying Reg.
489
+ setArgFlags (RetInfo, AttributeList::ReturnIndex, DL, F);
490
+ }
491
+
492
+ splitToValueTypes (B, RetInfo, PreSplitRetInfos, DL, CC);
493
+
494
+ // FIXME: This splitting should mostly be done by handleAssignments
495
+ processSplitArgs (B, RetInfo,
496
+ PreSplitRetInfos, SplitRetInfos, DL, CC, true ,
497
+ [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy,
498
+ LLT PartLLT, int VTSplitIdx) {
499
+ unpackRegsToOrigType (B, Regs, SrcReg,
500
+ PreSplitRetInfos[VTSplitIdx], LLTy,
501
+ PartLLT);
502
+ });
503
+ PreSplitRetInfos.clear ();
504
+ }
444
505
445
506
CCAssignFn *AssignFn = TLI.CCAssignFnForReturn (CC, F.isVarArg ());
446
507
AMDGPUOutgoingValueHandler RetHandler (B, *MRI, Ret, AssignFn);
@@ -814,7 +875,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
814
875
CCInfo.AllocateReg (ImplicitBufferPtrReg);
815
876
}
816
877
817
-
878
+ SmallVector<ArgInfo, 8 > SplitArg;
818
879
SmallVector<ArgInfo, 32 > SplitArgs;
819
880
unsigned Idx = 0 ;
820
881
unsigned PSInputNum = 0 ;
@@ -859,16 +920,18 @@ bool AMDGPUCallLowering::lowerFormalArguments(
859
920
const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
860
921
setArgFlags (OrigArg, OrigArgIdx, DL, F);
861
922
862
- splitToValueTypes (
863
- B, OrigArg, SplitArgs, DL, CC, false ,
864
- // FIXME: We should probably be passing multiple registers to
865
- // handleAssignments to do this
866
- [&](ArrayRef<Register> Regs, Register DstReg,
867
- LLT LLTy, LLT PartLLT, int VTSplitIdx) {
868
- assert (DstReg == VRegs[Idx][VTSplitIdx]);
869
- packSplitRegsToOrigType (B, VRegs[Idx][VTSplitIdx], Regs,
870
- LLTy, PartLLT);
871
- });
923
+ SplitArg.clear ();
924
+ splitToValueTypes (B, OrigArg, SplitArg, DL, CC);
925
+
926
+ processSplitArgs (B, OrigArg, SplitArg, SplitArgs, DL, CC, false ,
927
+ // FIXME: We should probably be passing multiple registers
928
+ // to handleAssignments to do this
929
+ [&](ArrayRef<Register> Regs, Register DstReg, LLT LLTy,
930
+ LLT PartLLT, int VTSplitIdx) {
931
+ assert (DstReg == VRegs[Idx][VTSplitIdx]);
932
+ packSplitRegsToOrigType (B, VRegs[Idx][VTSplitIdx], Regs,
933
+ LLTy, PartLLT);
934
+ });
872
935
873
936
++Idx;
874
937
}
@@ -1159,17 +1222,21 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1159
1222
}
1160
1223
1161
1224
SmallVector<ArgInfo, 8 > OutArgs;
1162
- SmallVector<ArgInfo, 4 > SplitRetInfos;
1163
1225
1226
+ SmallVector<ArgInfo, 8 > SplitArg;
1164
1227
for (auto &OrigArg : Info.OrigArgs ) {
1165
- splitToValueTypes (
1166
- MIRBuilder, OrigArg, OutArgs, DL, Info.CallConv , true ,
1228
+ splitToValueTypes (MIRBuilder, OrigArg, SplitArg, DL, Info.CallConv );
1229
+
1230
+ processSplitArgs (
1231
+ MIRBuilder, OrigArg, SplitArg, OutArgs, DL, Info.CallConv , true ,
1167
1232
// FIXME: We should probably be passing multiple registers to
1168
1233
// handleAssignments to do this
1169
1234
[&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
1170
1235
int VTSplitIdx) {
1171
1236
unpackRegsToOrigType (MIRBuilder, Regs, SrcReg, OrigArg, LLTy, PartLLT);
1172
1237
});
1238
+
1239
+ SplitArg.clear ();
1173
1240
}
1174
1241
1175
1242
// If we can lower as a tail call, do that instead.
@@ -1269,14 +1336,19 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1269
1336
1270
1337
SmallVector<ArgInfo, 8 > InArgs;
1271
1338
if (!Info.OrigRet .Ty ->isVoidTy ()) {
1339
+ SmallVector<ArgInfo, 8 > PreSplitRetInfos;
1340
+
1272
1341
splitToValueTypes (
1273
- MIRBuilder, Info.OrigRet , InArgs, DL, Info.CallConv , false ,
1274
- [&](ArrayRef<Register> Regs, Register DstReg,
1275
- LLT LLTy, LLT PartLLT, int VTSplitIdx) {
1276
- assert (DstReg == Info.OrigRet .Regs [VTSplitIdx]);
1277
- packSplitRegsToOrigType (MIRBuilder, Info.OrigRet .Regs [VTSplitIdx],
1278
- Regs, LLTy, PartLLT);
1279
- });
1342
+ MIRBuilder, Info.OrigRet , PreSplitRetInfos/* InArgs*/ , DL, Info.CallConv );
1343
+
1344
+ processSplitArgs (MIRBuilder, Info.OrigRet ,
1345
+ PreSplitRetInfos, InArgs/* SplitRetInfos*/ , DL, Info.CallConv , false ,
1346
+ [&](ArrayRef<Register> Regs, Register DstReg,
1347
+ LLT LLTy, LLT PartLLT, int VTSplitIdx) {
1348
+ assert (DstReg == Info.OrigRet .Regs [VTSplitIdx]);
1349
+ packSplitRegsToOrigType (MIRBuilder, Info.OrigRet .Regs [VTSplitIdx],
1350
+ Regs, LLTy, PartLLT);
1351
+ });
1280
1352
}
1281
1353
1282
1354
// Make sure the raw argument copies are inserted before the marshalling to
0 commit comments