Skip to content

Commit 5a7de89

Browse files
author
Justin Lebar
committed
[NVPTX] Use addrspacecast instead of target-specific intrinsics in NVPTXGenericToNVVM.
Summary: NVPTXGenericToNVVM was using target-specific intrinsics to do address space casts. Using the addrspacecast instruction is (a lot) simpler. But it also has the advantage of being understandable to other passes. In particular, InferAddrSpaces is able to understand these address space casts and remove them in most cases. Reviewers: tra Subscribers: jholewinski, sanjoy, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D43914 llvm-svn: 326389
1 parent 285271c commit 5a7de89

File tree

4 files changed

+19
-65
lines changed

4 files changed

+19
-65
lines changed

llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp

Lines changed: 9 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ class GenericToNVVM : public ModulePass {
4545
void getAnalysisUsage(AnalysisUsage &AU) const override {}
4646

4747
private:
48-
Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
49-
IRBuilder<> &Builder);
5048
Value *remapConstant(Module *M, Function *F, Constant *C,
5149
IRBuilder<> &Builder);
5250
Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
@@ -156,46 +154,6 @@ bool GenericToNVVM::runOnModule(Module &M) {
156154
return true;
157155
}
158156

159-
Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
160-
GlobalVariable *GV,
161-
IRBuilder<> &Builder) {
162-
PointerType *GVType = GV->getType();
163-
Value *CVTA = nullptr;
164-
165-
// See if the address space conversion requires the operand to be bitcast
166-
// to i8 addrspace(n)* first.
167-
EVT ExtendedGVType = EVT::getEVT(GV->getValueType(), true);
168-
if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
169-
// A bitcast to i8 addrspace(n)* on the operand is needed.
170-
LLVMContext &Context = M->getContext();
171-
unsigned int AddrSpace = GVType->getAddressSpace();
172-
Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
173-
CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
174-
// Insert the address space conversion.
175-
Type *ResultType =
176-
PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
177-
Function *CVTAFunction = Intrinsic::getDeclaration(
178-
M, Intrinsic::nvvm_ptr_global_to_gen, {ResultType, DestTy});
179-
CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
180-
// Another bitcast from i8 * to <the element type of GVType> * is
181-
// required.
182-
DestTy =
183-
PointerType::get(GV->getValueType(), llvm::ADDRESS_SPACE_GENERIC);
184-
CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
185-
} else {
186-
// A simple CVTA is enough.
187-
SmallVector<Type *, 2> ParamTypes;
188-
ParamTypes.push_back(PointerType::get(GV->getValueType(),
189-
llvm::ADDRESS_SPACE_GENERIC));
190-
ParamTypes.push_back(GVType);
191-
Function *CVTAFunction = Intrinsic::getDeclaration(
192-
M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
193-
CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
194-
}
195-
196-
return CVTA;
197-
}
198-
199157
Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
200158
IRBuilder<> &Builder) {
201159
// If the constant C has been converted already in the given function F, just
@@ -207,17 +165,17 @@ Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
207165

208166
Value *NewValue = C;
209167
if (isa<GlobalVariable>(C)) {
210-
// If the constant C is a global variable and is found in GVMap, generate a
211-
// set set of instructions that convert the clone of C with the global
212-
// address space specifier to a generic pointer.
213-
// The constant C cannot be used here, as it will be erased from the
214-
// module eventually. And the clone of C with the global address space
215-
// specifier cannot be used here either, as it will affect the types of
216-
// other instructions in the function. Hence, this address space conversion
217-
// is required.
168+
// If the constant C is a global variable and is found in GVMap, substitute
169+
//
170+
// addrspacecast GVMap[C] to addrspace(0)
171+
//
172+
// for our use of C.
218173
GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C));
219174
if (I != GVMap.end()) {
220-
NewValue = getOrInsertCVTA(M, F, I->second, Builder);
175+
GlobalVariable *GV = I->second;
176+
NewValue = Builder.CreateAddrSpaceCast(
177+
GV,
178+
PointerType::get(GV->getValueType(), llvm::ADDRESS_SPACE_GENERIC));
221179
}
222180
} else if (isa<ConstantAggregate>(C)) {
223181
// If any element in the constant vector or aggregate C is or uses a global

llvm/test/CodeGen/NVPTX/access-non-generic.ll

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,6 @@
55

66
@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
77
@scalar = internal addrspace(3) global float 0.000000e+00, align 4
8-
@generic_scalar = internal global float 0.000000e+00, align 4
9-
10-
define float @ld_from_shared() {
11-
%1 = addrspacecast float* @generic_scalar to float addrspace(3)*
12-
%2 = load float, float addrspace(3)* %1
13-
ret float %2
14-
}
158

169
; Verifies nvptx-favor-non-generic correctly optimizes generic address space
1710
; usage to non-generic address space usage for the patterns we claim to handle:

llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ define void @func() !dbg !8 {
1616
;CHECK-LABEL: @func()
1717
;CHECK-SAME: !dbg [[FUNCNODE:![0-9]+]]
1818
entry:
19-
; References to the variables must be converted back to generic address space via llvm intrinsic call
20-
; CHECK-DAG: call i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8({{.*}} addrspace(1)* @.str
19+
; References to the variables must be converted back to generic address space.
20+
; CHECK-DAG: addrspacecast ([4 x i8] addrspace(1)* @.str to [4 x i8]*)
2121
%0 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), align 1
2222
call void @extfunc(i8 signext %0)
23-
; CHECK-DAG: call i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)* @static_var
23+
; CHECK-DAG: addrspacecast (i8 addrspace(1)* @static_var to i8*)
2424
%1 = load i8, i8* @static_var, align 1
2525
call void @extfunc(i8 signext %1)
2626
ret void

llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@ target triple = "nvptx-nvidia-cuda"
77

88
; CHECK: .global .align 4 .u32 myglobal = 42;
99
@myglobal = internal global i32 42, align 4
10-
; CHECK: .global .align 4 .u32 myconst = 42;
11-
@myconst = internal constant i32 42, align 4
10+
; CHECK: .global .align 4 .u32 myconst = 420;
11+
@myconst = internal constant i32 420, align 4
1212

1313

1414
define void @foo(i32* %a, i32* %b) {
15-
; CHECK: cvta.global.u32
15+
; Expect one load -- @myconst isn't loaded from, because we know its value
16+
; statically.
17+
; CHECK: ld.global.u32
18+
; CHECK: st.global.u32
19+
; CHECK: st.global.u32
1620
%ld1 = load i32, i32* @myglobal
17-
; CHECK: cvta.global.u32
1821
%ld2 = load i32, i32* @myconst
1922
store i32 %ld1, i32* %a
2023
store i32 %ld2, i32* %b

0 commit comments

Comments
 (0)