Skip to content

Commit f5be3c4

Browse files
[NVPTX] Custom lower ADDRSPACECAST
1 parent c8f4189 commit f5be3c4

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
989989
setOperationAction(ISD::FLOG2, {MVT::v2f16, MVT::v2bf16}, Expand);
990990
}
991991

992+
setOperationAction(ISD::ADDRSPACECAST, {MVT::i32, MVT::i64}, Custom);
993+
992994
// No FPOW or FREM in PTX.
993995

994996
// Now deduce the information based on the above mentioned
@@ -2652,6 +2654,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
26522654
return SDValue();
26532655
case ISD::FRAMEADDR:
26542656
return SDValue();
2657+
case ISD::ADDRSPACECAST:
2658+
return LowerADDRSPACECAST(Op, DAG);
26552659
case ISD::GlobalAddress:
26562660
return LowerGlobalAddress(Op, DAG);
26572661
case ISD::INTRINSIC_W_CHAIN:
@@ -2726,7 +2730,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
27262730
case ISD::FMUL:
27272731
// Used only for bf16 on SM80, where we select fma for non-ftz operation
27282732
return PromoteBinOpIfF32FTZ(Op, DAG);
2729-
27302733
default:
27312734
llvm_unreachable("Custom lowering not defined for operation");
27322735
}
@@ -2767,6 +2770,22 @@ unsigned NVPTXTargetLowering::getJumpTableEncoding() const {
27672770
return MachineJumpTableInfo::EK_Inline;
27682771
}
27692772

2773+
SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const {
2774+
SDLoc DL(Op);
2775+
AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
2776+
2777+
EVT OperandVT = Op.getOperand(0).getValueType();
2778+
unsigned SrcAS = N->getSrcAddressSpace();
2779+
EVT ResultVT = Op.getValueType();
2780+
unsigned DestAS = N->getDestAddressSpace();
2781+
2782+
if (SrcAS == llvm::ADDRESS_SPACE_GENERIC || DestAS == llvm::ADDRESS_SPACE_GENERIC)
2783+
return Op;
2784+
2785+
SDValue ToGeneric = DAG.getAddrSpaceCast(DL, OperandVT, Op.getOperand(0), SrcAS, llvm::ADDRESS_SPACE_GENERIC);
2786+
return DAG.getAddrSpaceCast(DL, ResultVT, ToGeneric, llvm::ADDRESS_SPACE_GENERIC, DestAS);
2787+
}
2788+
27702789
// This function is almost a copy of SelectionDAG::expandVAArg().
27712790
// The only diff is that this one produces loads from local address space.
27722791
SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ class NVPTXTargetLowering : public TargetLowering {
264264
const NVPTXSubtarget &STI; // cache the subtarget here
265265
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
266266

267+
SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
267268
SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
268269

269270
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/NVPTX/addrspacecast.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,20 @@ define i32 @conv8(ptr %ptr) {
9999
ret i32 %val
100100
}
101101

102+
; ALL-LABEL: conv9
103+
define i32 @conv9(ptr addrspace(1) %ptr) {
104+
; CLS32: cvta.global.u32
105+
; CLS32: cvta.to.shared.u32
106+
; CLS64: cvta.global.u64
107+
; CLS64: cvta.to.shared.u64
108+
; PTRCONV: cvt.u32.u64
109+
; NOPTRCONV-NOT: cvt.u32.u64
110+
; ALL: ld.shared.u32
111+
%specptr = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(3)
112+
%val = load i32, ptr addrspace(3) %specptr
113+
ret i32 %val
114+
}
115+
102116
; Check that we support addrspacecast when splitting the vector
103117
; result (<2 x ptr> => 2 x <1 x ptr>).
104118
; This also checks that scalarization works for addrspacecast

0 commit comments

Comments
 (0)