Skip to content

Commit a872e23

Browse files
committed
Revert r273313 "[NVPTX] Improve lowering of byval args of device functions."
The change causes llvm crash in some unoptimized builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274163 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6a40504 commit a872e23

File tree

5 files changed

+23
-105
lines changed

5 files changed

+23
-105
lines changed

lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -662,58 +662,6 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
662662
TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
663663
break;
664664
case ADDRESS_SPACE_PARAM:
665-
if (Src.getOpcode() == NVPTXISD::MoveParam) {
666-
// TL;DR: addrspacecast MoveParam to param space is a no-op.
667-
//
668-
// Longer version is that this particular change is both an
669-
// optimization and a work-around for a problem in ptxas for
670-
// sm_50+.
671-
//
672-
// Background:
673-
// According to PTX ISA v7.5 5.1.6.4: "...whenever a formal
674-
// parameter has its address taken within the called function
675-
// [..] the parameter will be copied to the stack if
676-
// necessary, and so the address will be in the .local state
677-
// space and is accessed via ld.local and st.local
678-
// instructions."
679-
//
680-
// Bug part: 'copied to the stack if necessary' part is
681-
// currently broken for byval arguments with alignment < 4 for
682-
// sm_50+ in all public versions of CUDA. Taking the address of
683-
// such an argument results in SASS code that attempts to store
684-
// the value using a 32-bit write to an unaligned address.
685-
//
686-
// Optimization: On top of the overhead of spill-to-stack, we
687-
// also convert that address from local to generic space,
688-
// which may result in further overhead. All of it is in most
689-
// cases unnecessary, as we only need the value of the
690-
// variable, and it can be accessed directly by using the
691-
// argument symbol. We'll use the address of the local copy if
692-
// it's needed (see step (a) below).
693-
//
694-
// In order for this bit to do its job we need to:
695-
//
696-
// a) Let LLVM do the spilling and make sure the IR does not
697-
// access byval arguments directly. Instead, the argument
698-
// pointer (which is in the default address space) is
699-
// addrspacecast'ed to param space, and the data it points to
700-
// is copied to allocated local space (i.e. we let compiler
701-
// spill it, which gives us an opportunity to optimize the
702-
// spill away later if nobody needs its address). Then all
703-
// uses of arg pointer are replaced with a pointer to local
704-
// copy of the arg. All this is done in NVPTXLowerKernelArgs.
705-
//
706-
// b) LowerFormalArguments() lowers the argument to
707-
// NVPTXISD::MoveParam without any space conversion.
708-
//
709-
// c) And the final step is done by the code below.
710-
// It replaces the addrspacecast (MoveParam) from step (a)
711-
// with the arg symbol itself. This can then be used for
712-
// [symbol + offset] addressing.
713-
714-
ReplaceNode(N, Src.getOperand(0).getNode());
715-
return;
716-
}
717665
Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
718666
: NVPTX::nvvm_ptr_gen_to_param;
719667
break;

lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,6 +2078,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
20782078
SDValue Root = DAG.getRoot();
20792079
std::vector<SDValue> OutChains;
20802080

2081+
bool isKernel = llvm::isKernelFunction(*F);
20812082
bool isABI = (STI.getSmVersion() >= 20);
20822083
assert(isABI && "Non-ABI compilation is not supported");
20832084
if (!isABI)
@@ -2111,8 +2112,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
21112112
theArgs[i],
21122113
(theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
21132114
: nullptr))) {
2114-
assert(llvm::isKernelFunction(*F) &&
2115-
"Only kernels can have image/sampler params");
2115+
assert(isKernel && "Only kernels can have image/sampler params");
21162116
InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
21172117
continue;
21182118
}
@@ -2334,19 +2334,21 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
23342334
// machine instruction fails because TargetExternalSymbol
23352335
// (not lowered) is target dependent, and CopyToReg assumes
23362336
// the source is lowered.
2337-
//
2338-
// Byval arguments for regular function are lowered the same way
2339-
// as for kernels in order to generate better code and work around
2340-
// a known issue in ptxas. See comments in
2341-
// NVPTXDAGToDAGISel::SelectAddrSpaceCast() for the gory details.
23422337
EVT ObjectVT = getValueType(DL, Ty);
23432338
assert(ObjectVT == Ins[InsIdx].VT &&
23442339
"Ins type did not match function type");
23452340
SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
23462341
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
23472342
if (p.getNode())
23482343
p.getNode()->setIROrder(idx + 1);
2349-
InVals.push_back(p);
2344+
if (isKernel)
2345+
InVals.push_back(p);
2346+
else {
2347+
SDValue p2 = DAG.getNode(
2348+
ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
2349+
DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
2350+
InVals.push_back(p2);
2351+
}
23502352
}
23512353

23522354
// Clang will check explicit VarArg and issue error if any. However, Clang

lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,6 @@ namespace {
101101
class NVPTXLowerKernelArgs : public FunctionPass {
102102
bool runOnFunction(Function &F) override;
103103

104-
// Kernels and regular device functions treat byval arguments
105-
// differently.
106-
bool runOnKernelFunction(Function &F);
107-
bool runOnDeviceFunction(Function &F);
108-
109104
// handle byval parameters
110105
void handleByValParam(Argument *Arg);
111106
// Knowing Ptr must point to the global address space, this function
@@ -197,7 +192,11 @@ void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
197192
// =============================================================================
198193
// Main function for this pass.
199194
// =============================================================================
200-
bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
195+
bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
196+
// Skip non-kernels. See the comments at the top of this file.
197+
if (!isKernelFunction(F))
198+
return false;
199+
201200
if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
202201
// Mark pointers in byval structs as global.
203202
for (auto &B : F) {
@@ -229,19 +228,6 @@ bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
229228
return true;
230229
}
231230

232-
// See comments in NVPTXDAGToDAGISel::SelectAddrSpaceCast() for the
233-
// details on why we need to spill byval arguments into local memory.
234-
bool NVPTXLowerKernelArgs::runOnDeviceFunction(Function &F) {
235-
for (Argument &Arg : F.args())
236-
if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
237-
handleByValParam(&Arg);
238-
return true;
239-
}
240-
241-
bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
242-
return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
243-
}
244-
245231
FunctionPass *
246232
llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
247233
return new NVPTXLowerKernelArgs(TM);

test/CodeGen/NVPTX/bug21465.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ entry:
1515
%b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
1616
%0 = load i32, i32* %b, align 4
1717
; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}]
18-
; PTX: ld.param.u32 [[value:%r[0-9]+]], [_Z11TakesStruct1SPi_param_0+4]
18+
; PTX: ld.param.u32 [[value:%r[0-9]+]], [{{%rd[0-9]+}}+4]
1919
store i32 %0, i32* %output, align 4
2020
; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]]
2121
ret void

test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll

Lines changed: 7 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -28,38 +28,20 @@ define void @kernel2(float addrspace(1)* %input, float addrspace(1)* %output) {
2828

2929
%struct.S = type { i32*, i32* }
3030

31-
define void @ptr_in_byval_kernel(%struct.S* byval %input, i32* %output) {
32-
; CHECK-LABEL: .visible .entry ptr_in_byval_kernel(
33-
; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_kernel_param_1]
34-
; CHECK: cvta.to.global.u64 %[[optr_g:.*]], %[[optr]];
35-
; CHECK: ld.param.u64 %[[iptr:rd.*]], [ptr_in_byval_kernel_param_0+8]
36-
; CHECK: cvta.to.global.u64 %[[iptr_g:.*]], %[[iptr]];
37-
%b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
38-
%b = load i32*, i32** %b_ptr, align 4
39-
%v = load i32, i32* %b, align 4
40-
; CHECK: ld.global.u32 %[[val:.*]], [%[[iptr_g]]]
41-
store i32 %v, i32* %output, align 4
42-
; CHECK: st.global.u32 [%[[optr_g]]], %[[val]]
43-
ret void
44-
}
45-
46-
; Regular functions lower byval arguments differently. We need to make
47-
; sure that we're loading byval argument data using [symbol+offset].
48-
; There's also no assumption that all pointers within are in global space.
49-
define void @ptr_in_byval_func(%struct.S* byval %input, i32* %output) {
50-
; CHECK-LABEL: .visible .func ptr_in_byval_func(
51-
; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_func_param_1]
52-
; CHECK: ld.param.u64 %[[iptr:rd.*]], [ptr_in_byval_func_param_0+8]
31+
define void @ptr_in_byval(%struct.S* byval %input, i32* %output) {
32+
; CHECK-LABEL: .visible .entry ptr_in_byval(
33+
; CHECK: cvta.to.global.u64
34+
; CHECK: cvta.to.global.u64
5335
%b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
5436
%b = load i32*, i32** %b_ptr, align 4
5537
%v = load i32, i32* %b, align 4
56-
; CHECK: ld.u32 %[[val:.*]], [%[[iptr]]]
38+
; CHECK: ld.global.u32
5739
store i32 %v, i32* %output, align 4
58-
; CHECK: st.u32 [%[[optr]]], %[[val]]
40+
; CHECK: st.global.u32
5941
ret void
6042
}
6143

6244
!nvvm.annotations = !{!0, !1, !2}
6345
!0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
6446
!1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1}
65-
!2 = !{void (%struct.S*, i32*)* @ptr_in_byval_kernel, !"kernel", i32 1}
47+
!2 = !{void (%struct.S*, i32*)* @ptr_in_byval, !"kernel", i32 1}

0 commit comments

Comments
 (0)