Skip to content

[InferAS] Support getAssumedAddrSpace for Arguments for NVPTX #133991

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,11 +678,8 @@ static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F) {

LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n");
for (Argument &Arg : F.args()) {
if (Arg.getType()->isPointerTy()) {
if (Arg.hasByValAttr())
handleByValParam(TM, &Arg);
else if (TM.getDrvInterface() == NVPTX::CUDA)
markPointerAsGlobal(&Arg);
if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
handleByValParam(TM, &Arg);
} else if (Arg.getType()->isIntegerTy() &&
TM.getDrvInterface() == NVPTX::CUDA) {
HandleIntToPtr(Arg);
Expand All @@ -699,10 +696,9 @@ static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
cast<NVPTXTargetLowering>(TM.getSubtargetImpl()->getTargetLowering());

for (Argument &Arg : F.args())
if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
markPointerAsAS(&Arg, ADDRESS_SPACE_LOCAL);
if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
adjustByValArgAlignment(&Arg, &Arg, TLI);
}

return true;
}

Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,21 @@ unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
if (isa<AllocaInst>(V))
return ADDRESS_SPACE_LOCAL;

if (const Argument *Arg = dyn_cast<Argument>(V)) {
if (isKernelFunction(*Arg->getParent())) {
const NVPTXTargetMachine &TM =
static_cast<const NVPTXTargetMachine &>(getTLI()->getTargetMachine());
if (TM.getDrvInterface() == NVPTX::CUDA && !Arg->hasByValAttr())
return ADDRESS_SPACE_GLOBAL;
} else {
// We assume that all device parameters that are passed byval will be
// placed in the local AS. Very simple cases will be updated after ISel to
// use the device param space where possible.
if (Arg->hasByValAttr())
return ADDRESS_SPACE_LOCAL;
}
}

return -1;
}

Expand Down
49 changes: 36 additions & 13 deletions llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,10 +305,16 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
}

// Returns true if V is an address expression.
// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
// getelementptr operators.
// TODO: Currently, we only consider:
// - arguments
// - phi, bitcast, addrspacecast, and getelementptr operators
static bool isAddressExpression(const Value &V, const DataLayout &DL,
const TargetTransformInfo *TTI) {

if (const Argument *Arg = dyn_cast<Argument>(&V))
return Arg->getType()->isPointerTy() &&
TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;

const Operator *Op = dyn_cast<Operator>(&V);
if (!Op)
return false;
Expand Down Expand Up @@ -341,6 +347,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
static SmallVector<Value *, 2>
getPointerOperands(const Value &V, const DataLayout &DL,
const TargetTransformInfo *TTI) {
if (isa<Argument>(&V))
return {};

const Operator &Op = cast<Operator>(V);
switch (Op.getOpcode()) {
case Instruction::PHI: {
Expand Down Expand Up @@ -505,13 +514,11 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
if (Visited.insert(V).second) {
PostorderStack.emplace_back(V, false);

Operator *Op = cast<Operator>(V);
for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
PostorderStack.emplace_back(CE, false);
}
}
if (auto *Op = dyn_cast<Operator>(V))
for (auto &O : Op->operands())
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(O))
if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
PostorderStack.emplace_back(CE, false);
}
}
}
Expand Down Expand Up @@ -828,6 +835,18 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
isAddressExpression(*V, *DL, TTI));

if (auto *Arg = dyn_cast<Argument>(V)) {
// Arguments are address space casted in the function body, as we do not
// want to change the function signature.
Function *F = Arg->getParent();
BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt();

Type *NewPtrTy = PointerType::get(Arg->getContext(), NewAddrSpace);
auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy);
NewI->insertBefore(Insert);
return NewI;
}

if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
Expand Down Expand Up @@ -966,8 +985,12 @@ bool InferAddressSpacesImpl::updateAddressSpace(
// of all its pointer operands.
unsigned NewAS = UninitializedAddressSpace;

const Operator &Op = cast<Operator>(V);
if (Op.getOpcode() == Instruction::Select) {
// isAddressExpression should guarantee that V is an operator or an argument.
assert(isa<Operator>(V) || isa<Argument>(V));

if (isa<Operator>(V) &&
cast<Operator>(V).getOpcode() == Instruction::Select) {
const Operator &Op = cast<Operator>(V);
Value *Src0 = Op.getOperand(1);
Value *Src1 = Op.getOperand(2);

Expand Down Expand Up @@ -1258,7 +1281,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
}

// Otherwise, replaces the use with flat(NewV).
if (Instruction *VInst = dyn_cast<Instruction>(V)) {
if (isa<Instruction>(V) || isa<Instruction>(NewV)) {
// Don't create a copy of the original addrspacecast.
if (U == V && isa<AddrSpaceCastInst>(V))
return;
Expand All @@ -1268,7 +1291,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
InsertPos = std::next(NewVInst->getIterator());
else
InsertPos = std::next(VInst->getIterator());
InsertPos = std::next(cast<Instruction>(V)->getIterator());

while (isa<PHINode>(InsertPos))
++InsertPos;
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/NVPTX/i1-ext-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
; CHECK: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK: ld.param.u64 %rd1, [foo_param_0];
; CHECK: ld.param.u64 %rd2, [foo_param_1];
; CHECK: cvta.to.global.u64 %rd3, %rd2;
; CHECK: cvta.to.global.u64 %rd4, %rd1;
; CHECK: ld.global.nc.u8 %rs1, [%rd4];
; CHECK: cvta.to.global.u64 %rd2, %rd1;
; CHECK: ld.param.u64 %rd3, [foo_param_1];
; CHECK: cvta.to.global.u64 %rd4, %rd3;
; CHECK: ld.global.nc.u8 %rs1, [%rd2];
; CHECK: cvt.u32.u8 %r1, %rs1;
; CHECK: add.s32 %r2, %r1, 1;
; CHECK: and.b32 %r3, %r2, 1;
; CHECK: st.global.u32 [%rd3], %r3;
; CHECK: st.global.u32 [%rd4], %r3;
; CHECK: ret;
%ld = load i1, ptr %ptr, align 1
%zext = zext i1 %ld to i32
Expand Down
46 changes: 13 additions & 33 deletions llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; OPT-LABEL: define dso_local noundef i32 @non_kernel_function(
; OPT-SAME: ptr noundef readonly byval([[STRUCT_UINT4:%.*]]) align 16 captures(none) [[A:%.*]], i1 noundef zeroext [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; OPT-NEXT: [[ENTRY:.*:]]
; OPT-NEXT: [[A2:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(5)
; OPT-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A1]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
; OPT-NEXT: [[IDX_EXT:%.*]] = sext i32 [[C]] to i64
; OPT-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[A_]], i64 [[IDX_EXT]]
; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 1
Expand Down Expand Up @@ -74,12 +72,10 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_int(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
; OPT-NEXT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
; OPT-NEXT: ret void
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
Expand All @@ -105,15 +101,13 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
; OPT-NEXT: ret void
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
Expand Down Expand Up @@ -233,11 +227,9 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
; OPT-NEXT: ret void
store ptr %input, ptr %addr, align 8
ret void
Expand All @@ -263,14 +255,12 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NOT .local
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT]], align 8
; OPT-NEXT: ret void
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
Expand Down Expand Up @@ -311,13 +301,11 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4
; OPT-NEXT: [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]]
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT-NEXT: ret void
%val = load i32, ptr %input
Expand Down Expand Up @@ -361,15 +349,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4
; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4
; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT5]], align 8
; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT]], align 8
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
; OPT-NEXT: ret i32 [[ADD]]
Expand Down Expand Up @@ -407,11 +393,9 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
Expand All @@ -423,7 +407,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; OPT: [[MERGE]]:
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void

%val = load i32, ptr %inout
Expand Down Expand Up @@ -470,13 +454,11 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
Expand All @@ -488,7 +470,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; OPT: [[MERGE]]:
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
Expand Down Expand Up @@ -531,17 +513,15 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_select(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
Expand Down
Loading