-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[InferAS] Support getAssumedAddrSpace for Arguments for NVPTX #133991
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InferAS] Support getAssumedAddrSpace for Arguments for NVPTX #133991
Conversation
@llvm/pr-subscribers-debuginfo @llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesPatch is 69.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133991.diff 8 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 2637b9fab0d50..a683726facd0c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -678,11 +678,8 @@ static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F) {
LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n");
for (Argument &Arg : F.args()) {
- if (Arg.getType()->isPointerTy()) {
- if (Arg.hasByValAttr())
- handleByValParam(TM, &Arg);
- else if (TM.getDrvInterface() == NVPTX::CUDA)
- markPointerAsGlobal(&Arg);
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
+ handleByValParam(TM, &Arg);
} else if (Arg.getType()->isIntegerTy() &&
TM.getDrvInterface() == NVPTX::CUDA) {
HandleIntToPtr(Arg);
@@ -699,10 +696,9 @@ static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
cast<NVPTXTargetLowering>(TM.getSubtargetImpl()->getTargetLowering());
for (Argument &Arg : F.args())
- if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
- markPointerAsAS(&Arg, ADDRESS_SPACE_LOCAL);
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
adjustByValArgAlignment(&Arg, &Arg, TLI);
- }
+
return true;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index a89ca3037c7ff..e359735c20750 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -599,6 +599,21 @@ unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
if (isa<AllocaInst>(V))
return ADDRESS_SPACE_LOCAL;
+ if (const Argument *Arg = dyn_cast<Argument>(V)) {
+ if (isKernelFunction(*Arg->getParent())) {
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(getTLI()->getTargetMachine());
+ if (TM.getDrvInterface() == NVPTX::CUDA && !Arg->hasByValAttr())
+ return ADDRESS_SPACE_GLOBAL;
+ } else {
+ // We assume that all device parameters that are passed byval will be
+ // placed in the local AS. Very simple cases will be updated after ISel to
+ // use the device param space where possible.
+ if (Arg->hasByValAttr())
+ return ADDRESS_SPACE_LOCAL;
+ }
+ }
+
return -1;
}
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 73a3f5e4d3694..965d6b6e45e6e 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -305,10 +305,15 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
}
// Returns true if V is an address expression.
-// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
-// getelementptr operators.
+// TODO: Currently, we consider only arguments and phi, bitcast, addrspacecast,
+// and getelementptr operators.
static bool isAddressExpression(const Value &V, const DataLayout &DL,
const TargetTransformInfo *TTI) {
+
+ if (const Argument *Arg = dyn_cast<Argument>(&V))
+ return Arg->getType()->isPointerTy() &&
+ TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
+
const Operator *Op = dyn_cast<Operator>(&V);
if (!Op)
return false;
@@ -341,6 +346,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
static SmallVector<Value *, 2>
getPointerOperands(const Value &V, const DataLayout &DL,
const TargetTransformInfo *TTI) {
+ if (isa<Argument>(&V))
+ return {};
+
const Operator &Op = cast<Operator>(V);
switch (Op.getOpcode()) {
case Instruction::PHI: {
@@ -505,13 +513,11 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
if (Visited.insert(V).second) {
PostorderStack.emplace_back(V, false);
- Operator *Op = cast<Operator>(V);
- for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
- if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
- PostorderStack.emplace_back(CE, false);
- }
- }
+ if (auto *Op = dyn_cast<Operator>(V))
+ for (auto &O : Op->operands())
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(O))
+ if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
+ PostorderStack.emplace_back(CE, false);
}
}
}
@@ -828,6 +834,18 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
isAddressExpression(*V, *DL, TTI));
+ if (auto *Arg = dyn_cast<Argument>(V)) {
+ // Arguments are address space casted in the function body, as we do not
+ // want to change the function signature.
+ Function *F = Arg->getParent();
+ BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt();
+
+ Type *NewPtrTy = PointerType::get(Arg->getContext(), NewAddrSpace);
+ auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy);
+ NewI->insertBefore(Insert);
+ return NewI;
+ }
+
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
@@ -966,8 +984,12 @@ bool InferAddressSpacesImpl::updateAddressSpace(
// of all its pointer operands.
unsigned NewAS = UninitializedAddressSpace;
- const Operator &Op = cast<Operator>(V);
- if (Op.getOpcode() == Instruction::Select) {
+ // isAddressExpression should guarantee that V is an operator or an argument.
+ assert(isa<Operator>(V) || isa<Argument>(V));
+
+ if (isa<Operator>(V) &&
+ cast<Operator>(V).getOpcode() == Instruction::Select) {
+ const Operator &Op = cast<Operator>(V);
Value *Src0 = Op.getOperand(1);
Value *Src1 = Op.getOperand(2);
@@ -1258,7 +1280,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
}
// Otherwise, replaces the use with flat(NewV).
- if (Instruction *VInst = dyn_cast<Instruction>(V)) {
+ if (isa<Instruction>(V) || isa<Instruction>(NewV)) {
// Don't create a copy of the original addrspacecast.
if (U == V && isa<AddrSpaceCastInst>(V))
return;
@@ -1268,7 +1290,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
InsertPos = std::next(NewVInst->getIterator());
else
- InsertPos = std::next(VInst->getIterator());
+ InsertPos = std::next(cast<Instruction>(V)->getIterator());
while (isa<PHINode>(InsertPos))
++InsertPos;
diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
index f5f1dd9fcf0ea..44ac46db254a7 100644
--- a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
@@ -12,14 +12,14 @@ define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
; CHECK: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK: ld.param.u64 %rd1, [foo_param_0];
-; CHECK: ld.param.u64 %rd2, [foo_param_1];
-; CHECK: cvta.to.global.u64 %rd3, %rd2;
-; CHECK: cvta.to.global.u64 %rd4, %rd1;
-; CHECK: ld.global.nc.u8 %rs1, [%rd4];
+; CHECK: cvta.to.global.u64 %rd2, %rd1;
+; CHECK: ld.param.u64 %rd3, [foo_param_1];
+; CHECK: cvta.to.global.u64 %rd4, %rd3;
+; CHECK: ld.global.nc.u8 %rs1, [%rd2];
; CHECK: cvt.u32.u8 %r1, %rs1;
; CHECK: add.s32 %r2, %r1, 1;
; CHECK: and.b32 %r3, %r2, 1;
-; CHECK: st.global.u32 [%rd3], %r3;
+; CHECK: st.global.u32 [%rd4], %r3;
; CHECK: ret;
%ld = load i1, ptr %ptr, align 1
%zext = zext i1 %ld to i32
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index e4e1f40d0d8b2..38b7400696c54 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -12,9 +12,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; OPT-LABEL: define dso_local noundef i32 @non_kernel_function(
; OPT-SAME: ptr noundef readonly byval([[STRUCT_UINT4:%.*]]) align 16 captures(none) [[A:%.*]], i1 noundef zeroext [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; OPT-NEXT: [[ENTRY:.*:]]
-; OPT-NEXT: [[A2:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(5)
-; OPT-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
-; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A1]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
+; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
; OPT-NEXT: [[IDX_EXT:%.*]] = sext i32 [[C]] to i64
; OPT-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[A_]], i64 [[IDX_EXT]]
; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 1
@@ -74,12 +72,10 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_int(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
; OPT-NEXT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
-; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
+; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
; OPT-NEXT: ret void
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
@@ -105,15 +101,13 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
-; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
+; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
; OPT-NEXT: ret void
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -233,11 +227,9 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
-; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
+; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
; OPT-NEXT: ret void
store ptr %input, ptr %addr, align 8
ret void
@@ -263,14 +255,12 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NOT .local
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
-; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
-; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
+; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT]], align 8
; OPT-NEXT: ret void
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -311,13 +301,11 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4
; OPT-NEXT: [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]]
-; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
+; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT-NEXT: ret void
%val = load i32, ptr %input
@@ -361,15 +349,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4
; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4
-; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT5]], align 8
+; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT]], align 8
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
; OPT-NEXT: ret i32 [[ADD]]
@@ -407,11 +393,9 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
@@ -423,7 +407,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; OPT: [[MERGE]]:
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
@@ -470,13 +454,11 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
@@ -488,7 +470,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; OPT: [[MERGE]]:
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
@@ -531,17 +513,15 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_select(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index a1c0a86e9c4e4..8fa7d5c3e0cbc 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Alex MacLean (AlexMaclean) ChangesPatch is 69.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133991.diff 8 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 2637b9fab0d50..a683726facd0c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -678,11 +678,8 @@ static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F) {
LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n");
for (Argument &Arg : F.args()) {
- if (Arg.getType()->isPointerTy()) {
- if (Arg.hasByValAttr())
- handleByValParam(TM, &Arg);
- else if (TM.getDrvInterface() == NVPTX::CUDA)
- markPointerAsGlobal(&Arg);
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
+ handleByValParam(TM, &Arg);
} else if (Arg.getType()->isIntegerTy() &&
TM.getDrvInterface() == NVPTX::CUDA) {
HandleIntToPtr(Arg);
@@ -699,10 +696,9 @@ static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
cast<NVPTXTargetLowering>(TM.getSubtargetImpl()->getTargetLowering());
for (Argument &Arg : F.args())
- if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
- markPointerAsAS(&Arg, ADDRESS_SPACE_LOCAL);
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
adjustByValArgAlignment(&Arg, &Arg, TLI);
- }
+
return true;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index a89ca3037c7ff..e359735c20750 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -599,6 +599,21 @@ unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
if (isa<AllocaInst>(V))
return ADDRESS_SPACE_LOCAL;
+ if (const Argument *Arg = dyn_cast<Argument>(V)) {
+ if (isKernelFunction(*Arg->getParent())) {
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(getTLI()->getTargetMachine());
+ if (TM.getDrvInterface() == NVPTX::CUDA && !Arg->hasByValAttr())
+ return ADDRESS_SPACE_GLOBAL;
+ } else {
+ // We assume that all device parameters that are passed byval will be
+ // placed in the local AS. Very simple cases will be updated after ISel to
+ // use the device param space where possible.
+ if (Arg->hasByValAttr())
+ return ADDRESS_SPACE_LOCAL;
+ }
+ }
+
return -1;
}
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 73a3f5e4d3694..965d6b6e45e6e 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -305,10 +305,15 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
}
// Returns true if V is an address expression.
-// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
-// getelementptr operators.
+// TODO: Currently, we consider only arguments and phi, bitcast, addrspacecast,
+// and getelementptr operators.
static bool isAddressExpression(const Value &V, const DataLayout &DL,
const TargetTransformInfo *TTI) {
+
+ if (const Argument *Arg = dyn_cast<Argument>(&V))
+ return Arg->getType()->isPointerTy() &&
+ TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
+
const Operator *Op = dyn_cast<Operator>(&V);
if (!Op)
return false;
@@ -341,6 +346,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
static SmallVector<Value *, 2>
getPointerOperands(const Value &V, const DataLayout &DL,
const TargetTransformInfo *TTI) {
+ if (isa<Argument>(&V))
+ return {};
+
const Operator &Op = cast<Operator>(V);
switch (Op.getOpcode()) {
case Instruction::PHI: {
@@ -505,13 +513,11 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
if (Visited.insert(V).second) {
PostorderStack.emplace_back(V, false);
- Operator *Op = cast<Operator>(V);
- for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
- if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
- PostorderStack.emplace_back(CE, false);
- }
- }
+ if (auto *Op = dyn_cast<Operator>(V))
+ for (auto &O : Op->operands())
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(O))
+ if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
+ PostorderStack.emplace_back(CE, false);
}
}
}
@@ -828,6 +834,18 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
isAddressExpression(*V, *DL, TTI));
+ if (auto *Arg = dyn_cast<Argument>(V)) {
+ // Arguments are address space casted in the function body, as we do not
+ // want to change the function signature.
+ Function *F = Arg->getParent();
+ BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt();
+
+ Type *NewPtrTy = PointerType::get(Arg->getContext(), NewAddrSpace);
+ auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy);
+ NewI->insertBefore(Insert);
+ return NewI;
+ }
+
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
@@ -966,8 +984,12 @@ bool InferAddressSpacesImpl::updateAddressSpace(
// of all its pointer operands.
unsigned NewAS = UninitializedAddressSpace;
- const Operator &Op = cast<Operator>(V);
- if (Op.getOpcode() == Instruction::Select) {
+ // isAddressExpression should guarantee that V is an operator or an argument.
+ assert(isa<Operator>(V) || isa<Argument>(V));
+
+ if (isa<Operator>(V) &&
+ cast<Operator>(V).getOpcode() == Instruction::Select) {
+ const Operator &Op = cast<Operator>(V);
Value *Src0 = Op.getOperand(1);
Value *Src1 = Op.getOperand(2);
@@ -1258,7 +1280,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
}
// Otherwise, replaces the use with flat(NewV).
- if (Instruction *VInst = dyn_cast<Instruction>(V)) {
+ if (isa<Instruction>(V) || isa<Instruction>(NewV)) {
// Don't create a copy of the original addrspacecast.
if (U == V && isa<AddrSpaceCastInst>(V))
return;
@@ -1268,7 +1290,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
InsertPos = std::next(NewVInst->getIterator());
else
- InsertPos = std::next(VInst->getIterator());
+ InsertPos = std::next(cast<Instruction>(V)->getIterator());
while (isa<PHINode>(InsertPos))
++InsertPos;
diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
index f5f1dd9fcf0ea..44ac46db254a7 100644
--- a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
@@ -12,14 +12,14 @@ define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
; CHECK: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK: ld.param.u64 %rd1, [foo_param_0];
-; CHECK: ld.param.u64 %rd2, [foo_param_1];
-; CHECK: cvta.to.global.u64 %rd3, %rd2;
-; CHECK: cvta.to.global.u64 %rd4, %rd1;
-; CHECK: ld.global.nc.u8 %rs1, [%rd4];
+; CHECK: cvta.to.global.u64 %rd2, %rd1;
+; CHECK: ld.param.u64 %rd3, [foo_param_1];
+; CHECK: cvta.to.global.u64 %rd4, %rd3;
+; CHECK: ld.global.nc.u8 %rs1, [%rd2];
; CHECK: cvt.u32.u8 %r1, %rs1;
; CHECK: add.s32 %r2, %r1, 1;
; CHECK: and.b32 %r3, %r2, 1;
-; CHECK: st.global.u32 [%rd3], %r3;
+; CHECK: st.global.u32 [%rd4], %r3;
; CHECK: ret;
%ld = load i1, ptr %ptr, align 1
%zext = zext i1 %ld to i32
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index e4e1f40d0d8b2..38b7400696c54 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -12,9 +12,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; OPT-LABEL: define dso_local noundef i32 @non_kernel_function(
; OPT-SAME: ptr noundef readonly byval([[STRUCT_UINT4:%.*]]) align 16 captures(none) [[A:%.*]], i1 noundef zeroext [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; OPT-NEXT: [[ENTRY:.*:]]
-; OPT-NEXT: [[A2:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(5)
-; OPT-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
-; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A1]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
+; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
; OPT-NEXT: [[IDX_EXT:%.*]] = sext i32 [[C]] to i64
; OPT-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[A_]], i64 [[IDX_EXT]]
; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 1
@@ -74,12 +72,10 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_int(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
; OPT-NEXT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
-; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
+; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
; OPT-NEXT: ret void
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
@@ -105,15 +101,13 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
-; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
+; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
; OPT-NEXT: ret void
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -233,11 +227,9 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
-; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
+; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
; OPT-NEXT: ret void
store ptr %input, ptr %addr, align 8
ret void
@@ -263,14 +255,12 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NOT .local
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
-; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
-; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
+; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT]], align 8
; OPT-NEXT: ret void
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -311,13 +301,11 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4
; OPT-NEXT: [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]]
-; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
+; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT-NEXT: ret void
%val = load i32, ptr %input
@@ -361,15 +349,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4
; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4
-; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT5]], align 8
+; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT]], align 8
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
; OPT-NEXT: ret i32 [[ADD]]
@@ -407,11 +393,9 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
@@ -423,7 +407,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; OPT: [[MERGE]]:
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
@@ -470,13 +454,11 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
@@ -488,7 +470,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; OPT: [[MERGE]]:
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
@@ -531,17 +513,15 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_select(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index a1c0a86e9c4e4..8fa7d5c3e0cbc 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with a test suggestion.
08103f8
to
04ddbe9
Compare
04ddbe9
to
519bed1
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/195/builds/7133 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/55/builds/9389 Here is the relevant piece of the build log for the reference
|
No description provided.