Skip to content

Commit d04b738

Browse files
committed
[AMDGPU] Support alloca in AS0
This PR lowers an alloca in AS0 to an alloca in AS5 followed by an addrspacecast back to AS0.
1 parent bc11987 commit d04b738

File tree

7 files changed

+636
-19
lines changed

7 files changed

+636
-19
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,9 +385,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
385385

386386
setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);
387387

388+
setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
389+
388390
// For R600, this is totally unsupported, just custom lower to produce an
389391
// error.
390392
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
393+
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
391394

392395
// Library functions. These default to Expand, but we have instructions
393396
// for them.

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -912,12 +912,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
912912
.widenScalarToNextPow2(0, 32)
913913
.clampMaxNumElements(0, S32, 16);
914914

915-
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({PrivatePtr});
915+
getActionDefinitionsBuilder(G_FRAME_INDEX)
916+
.legalFor({PrivatePtr})
917+
.customFor({FlatPtr});
916918

917919
// If the amount is divergent, we have to do a wave reduction to get the
918920
// maximum value, so this is expanded during RegBankSelect.
919921
getActionDefinitionsBuilder(G_DYN_STACKALLOC)
920-
.legalFor({{PrivatePtr, S32}});
922+
.legalFor({{PrivatePtr, S32}})
923+
.customFor({FlatPtr, S32});
921924

922925
getActionDefinitionsBuilder(G_STACKSAVE)
923926
.customFor({PrivatePtr});
@@ -2221,6 +2224,10 @@ bool AMDGPULegalizerInfo::legalizeCustom(
22212224
return legalizeTrap(MI, MRI, B);
22222225
case TargetOpcode::G_DEBUGTRAP:
22232226
return legalizeDebugTrap(MI, MRI, B);
2227+
case TargetOpcode::G_FRAME_INDEX:
2228+
return legalizeFrameIndex(MI, MRI, B);
2229+
case TargetOpcode::G_DYN_STACKALLOC:
2230+
return legalizeDynStackAlloc(MI, MRI, B);
22242231
default:
22252232
return false;
22262233
}
@@ -7668,3 +7675,25 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
76687675

76697676
return true;
76707677
}
7678+
7679+
bool AMDGPULegalizerInfo::legalizeFrameIndex(MachineInstr &MI,
7680+
MachineRegisterInfo &MRI,
7681+
MachineIRBuilder &B) const {
7682+
MachineInstrBuilder FI = B.buildFrameIndex(
7683+
LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), MI.getOperand(1).getIndex());
7684+
B.buildAddrSpaceCast(MI.getOperand(0).getReg(), FI);
7685+
MI.eraseFromParent();
7686+
return true;
7687+
}
7688+
7689+
bool AMDGPULegalizerInfo::legalizeDynStackAlloc(MachineInstr &MI,
7690+
MachineRegisterInfo &MRI,
7691+
MachineIRBuilder &B) const {
7692+
MachineInstrBuilder Size = B.buildTrunc(S32, MI.getOperand(1));
7693+
Align Alignment(MI.getOperand(2).getImm());
7694+
MachineInstrBuilder DynStackAlloc = B.buildDynStackAlloc(
7695+
LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), Size, Alignment);
7696+
B.buildAddrSpaceCast(MI.getOperand(0).getReg(), DynStackAlloc);
7697+
MI.eraseFromParent();
7698+
return true;
7699+
}

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
246246

247247
bool legalizeIntrinsic(LegalizerHelper &Helper,
248248
MachineInstr &MI) const override;
249+
250+
bool legalizeFrameIndex(MachineInstr &MI, MachineRegisterInfo &MRI,
251+
MachineIRBuilder &B) const;
252+
bool legalizeDynStackAlloc(MachineInstr &MI, MachineRegisterInfo &MRI,
253+
MachineIRBuilder &B) const;
249254
};
250255
} // End llvm namespace.
251256
#endif

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4117,6 +4117,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
41174117
InVals, /*IsThisReturn=*/false, SDValue());
41184118
}
41194119

4120+
SDValue SITargetLowering::lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
4121+
// Since address space information is lost here, we assume that an i64 frame
4122+
// index comes from an alloca in AS0.
4123+
SDLoc DL(Op);
4124+
auto *FI = cast<FrameIndexSDNode>(Op);
4125+
SDValue TFI = DAG.getFrameIndex(FI->getIndex(), MVT::i32);
4126+
return DAG.getAddrSpaceCast(DL, Op.getValueType(), TFI,
4127+
AMDGPUAS::PRIVATE_ADDRESS,
4128+
AMDGPUAS::FLAT_ADDRESS);
4129+
}
4130+
41204131
// This is similar to the default implementation in ExpandDYNAMIC_STACKALLOC,
41214132
// except for:
41224133
// 1. Stack growth direction(default: downwards, AMDGPU: upwards), and
@@ -4129,13 +4140,27 @@ SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
41294140
SDLoc dl(Op);
41304141
EVT VT = Op.getValueType();
41314142
SDValue Chain = Op.getOperand(0);
4143+
SDValue Size = Op.getOperand(1);
4144+
4145+
// Since address space information is lost here, we assume that an i64 dynamic
4146+
// alloca comes from an alloca in AS0.
4147+
if (VT == MVT::i64) {
4148+
SDValue Align = Op.getOperand(2);
4149+
Size = DAG.getZExtOrTrunc(Size, dl, MVT::i32);
4150+
SDValue Ops[] = {Chain, Size, Align};
4151+
SDValue DynAlloc =
4152+
DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, {MVT::i32, MVT::Other}, Ops);
4153+
SDValue Cast = DAG.getAddrSpaceCast(
4154+
dl, VT, DynAlloc, AMDGPUAS::PRIVATE_ADDRESS, AMDGPUAS::FLAT_ADDRESS);
4155+
return DAG.getMergeValues({Cast, DynAlloc.getValue(1)}, dl);
4156+
}
4157+
41324158
Register SPReg = Info->getStackPtrOffsetReg();
41334159

41344160
// Chain the dynamic stack allocation so that it doesn't modify the stack
41354161
// pointer when other instructions are using the stack.
41364162
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
41374163

4138-
SDValue Size = Op.getOperand(1);
41394164
SDValue BaseAddr = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
41404165
Align Alignment = cast<ConstantSDNode>(Op.getOperand(2))->getAlignValue();
41414166

@@ -6087,6 +6112,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
60876112
case ISD::SMUL_LOHI:
60886113
case ISD::UMUL_LOHI:
60896114
return lowerXMUL_LOHI(Op, DAG);
6115+
case ISD::FrameIndex:
6116+
return lowerFrameIndex(Op, DAG);
60906117
case ISD::DYNAMIC_STACKALLOC:
60916118
return LowerDYNAMIC_STACKALLOC(Op, DAG);
60926119
case ISD::STACKSAVE:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
428428
SDValue LowerCall(CallLoweringInfo &CLI,
429429
SmallVectorImpl<SDValue> &InVals) const override;
430430

431+
SDValue lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
431432
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
432433
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
433434
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;

0 commit comments

Comments
 (0)