-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][GPUCommon] Remove typed pointer support #70735
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This commit removes the GPUCommon's lowering support for typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect. Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502
@llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-mlir-gpu Author: Christian Ulmann (Dinistro) ChangesThis commit removes the GPUCommon's lowering support for typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect. Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502 Patch is 20.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70735.diff 12 Files Affected:
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index cf6e545749ffc64..ffad25ea5993a8a 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -460,10 +460,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> {
Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string",
/*default=*/"gpu::getDefaultGpuBinaryAnnotation()",
"Annotation attribute string for GPU binary"
- >,
- Option<"useOpaquePointers", "use-opaque-pointers", "bool",
- /*default=*/"true", "Generate LLVM IR using opaque pointers "
- "instead of typed pointers">,
+ >
];
let dependentDialects = [
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
index 447e3c9a59e5c0a..bbed1ea5cf62204 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
@@ -210,7 +210,7 @@ class GEPIndicesAdaptor {
/// string (operations inserted at the builder insertion point).
Value createGlobalString(Location loc, OpBuilder &builder, StringRef name,
StringRef value, Linkage linkage,
- bool useOpaquePointers);
+ bool useOpaquePointers = true);
/// LLVM requires some operations to be inside of a Module operation. This
/// function confirms that the Operation has the desired properties.
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index 12bd02050be036c..8b454e2af45b2e3 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -588,7 +588,6 @@ DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SetCsrPointersOp)
void GpuToLLVMConversionPass::runOnOperation() {
LowerToLLVMOptions options(&getContext());
- options.useOpaquePointers = useOpaquePointers;
options.useBarePtrCallConv = hostBarePtrCallConv;
LLVMTypeConverter converter(&getContext(), options);
@@ -835,8 +834,6 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
// Allocate the underlying buffer and store a pointer to it in the MemRef
// descriptor.
- Type elementPtrType = this->getElementPtrType(memRefType);
-
auto nullPtr = rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmPointerType);
Value stream = adaptor.getAsyncDependencies().empty()
? nullPtr
@@ -848,9 +845,6 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
Value allocatedPtr =
allocCallBuilder.create(loc, rewriter, {sizeBytes, stream, isHostShared})
.getResult();
- if (!getTypeConverter()->useOpaquePointers())
- allocatedPtr =
- rewriter.create<LLVM::BitcastOp>(loc, elementPtrType, allocatedPtr);
// No alignment.
Value alignedPtr = allocatedPtr;
@@ -880,8 +874,6 @@ LogicalResult ConvertDeallocOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pointer =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pointer = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pointer);
Value stream = adaptor.getAsyncDependencies().front();
deallocCallBuilder.create(loc, rewriter, {pointer, stream});
@@ -1050,9 +1042,6 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray(
auto elementPtr = builder.create<LLVM::GEPOp>(
loc, llvmPointerPointerType, llvmPointerType, arrayPtr,
ArrayRef<LLVM::GEPArg>{en.index()});
- if (!getTypeConverter()->useOpaquePointers())
- fieldPtr =
- builder.create<LLVM::BitcastOp>(loc, llvmPointerType, fieldPtr);
builder.create<LLVM::StoreOp>(loc, fieldPtr, elementPtr);
}
return arrayPtr;
@@ -1079,7 +1068,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateKernelNameConstant(
std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, name));
return LLVM::createGlobalString(
loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()),
- LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers());
+ LLVM::Linkage::Internal);
}
// Emits LLVM IR to launch a kernel function. Expects the module that contains
@@ -1170,9 +1159,9 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
SmallString<128> nameBuffer(kernelModule.getName());
nameBuffer.append(kGpuBinaryStorageSuffix);
- Value data = LLVM::createGlobalString(
- loc, rewriter, nameBuffer.str(), binaryAttr.getValue(),
- LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers());
+ Value data =
+ LLVM::createGlobalString(loc, rewriter, nameBuffer.str(),
+ binaryAttr.getValue(), LLVM::Linkage::Internal);
// Pass the binary size. SPIRV requires binary size.
auto gpuBlob = binaryAttr.getValue();
@@ -1244,11 +1233,7 @@ static Value bitAndAddrspaceCast(Location loc,
typeConverter.getPointerType(sourceTy.getElementType(),
destinationType.getAddressSpace()),
sourcePtr);
-
- if (typeConverter.useOpaquePointers())
- return sourcePtr;
-
- return rewriter.create<LLVM::BitcastOp>(loc, destinationType, sourcePtr);
+ return sourcePtr;
}
LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite(
@@ -1366,8 +1351,6 @@ LogicalResult ConvertCreateDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pTensor =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pTensor = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pTensor);
Type dType = op.getMemref().getType().getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
@@ -1457,11 +1440,6 @@ LogicalResult ConvertCreateCooOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowIdxs);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type iType =
llvm::cast<MemRefType>(op.getColIdxs().getType()).getElementType();
Type dType =
@@ -1489,10 +1467,6 @@ LogicalResult ConvertCreateCooAoSOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pIdxs = MemRefDescriptor(adaptor.getIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type iType = llvm::cast<MemRefType>(op.getIdxs().getType()).getElementType();
Type dType =
llvm::cast<MemRefType>(op.getValues().getType()).getElementType();
@@ -1522,11 +1496,6 @@ LogicalResult ConvertCreateCsrOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowPos);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getRowPos().getType()).getElementType();
Type iType =
@@ -1556,8 +1525,6 @@ LogicalResult ConvertCreate2To4SpMatOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pMat =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pMat = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pMat);
Type dType =
llvm::cast<MemRefType>(op.getMemref().getType()).getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
@@ -1630,8 +1597,6 @@ LogicalResult ConvertSpMVOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
spMVCallBuilder.create(loc, rewriter,
{modeA, adaptor.getSpmatA(), adaptor.getDnX(),
adaptor.getDnY(), computeType, pBuf, stream});
@@ -1737,8 +1702,6 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite(
SmallVector<Value> pBufs;
for (Value buffer : adaptor.getBuffers()) {
Value pBuf = MemRefDescriptor(buffer).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
pBufs.push_back(pBuf);
}
createCuSparseLtSpMMBuilder.create(
@@ -1748,8 +1711,6 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite(
} else {
Value pBuf = MemRefDescriptor(adaptor.getBuffers().front())
.allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
createSpMMCallBuilder.create(loc, rewriter,
{modeA, modeB, adaptor.getSpmatA(),
adaptor.getDnmatB(), adaptor.getDnmatC(),
@@ -1781,8 +1742,6 @@ LogicalResult ConvertSDDMMOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
createSDDMMCallBuilder.create(loc, rewriter,
{modeA, modeB, adaptor.getDnmatA(),
adaptor.getDnmatB(), adaptor.getSpmatC(),
@@ -1837,9 +1796,6 @@ ConvertSpGEMMWorkEstimationOrComputeOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
-
Value bufferSizeNew;
if (adaptor.getKind() ==
@@ -1934,11 +1890,6 @@ LogicalResult ConvertSetCsrPointersOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getCoordinates()).allocatedPtr(rewriter, loc);
Value pVal =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pPos);
- pCrd = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pCrd);
- pVal = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pVal);
- }
createSetCsrPointersBuilder.create(
loc, rewriter, {adaptor.getSpmat(), pPos, pCrd, pVal, stream});
rewriter.replaceOp(op, {stream});
@@ -1959,11 +1910,6 @@ LogicalResult ConvertCreateCscOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getRowIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pColPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColPos);
- pRowIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getColPos().getType()).getElementType();
Type iType =
@@ -1997,11 +1943,6 @@ LogicalResult ConvertCreateBsrOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getBColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowPos);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getBRowPos().getType()).getElementType();
Type iType =
diff --git a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
index 113d49c507e9c8e..f448d35992333b3 100644
--- a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
index 70450656b9df64f..ae8b7aaac7fd944 100644
--- a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
// CHECK-LABEL: llvm.func @main
diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
index 9df110d9b23bacf..1b9afcdf50a17f0 100644
--- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin use-opaque-pointers=1" -split-input-file | FileCheck %s
-// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco use-opaque-pointers=1" -split-input-file | FileCheck %s --check-prefix=ROCDL
+// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco" -split-input-file | FileCheck %s --check-prefix=ROCDL
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
index 5c8e6d11934dbba..3f86b0769827957 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
index 7e4b1191c5e6c29..aaced31813d574a 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
index f86d929e0e19acf..d4c0a76088356f5 100644
--- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
index a828c1d58da5f9e..d15efe354cfa8b4 100644
--- a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/transfer_write.mlir b/mlir/test/Conversion/GPUCommon/transfer_write.mlir
index 9c2edf698548ba8..cba85915b49e43a 100644
--- a/mlir/test/Conversion/GPUCommon/transfer_write.mlir
+++ b/mlir/test/Conversion/GPUCommon/transfer_write.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
func.func @warp_extract(%arg0: index, %arg1: memref<1024x1024xf32>, %arg2: index, %arg3: vector<1xf32>) {
%c0 = arith.constant 0 : index
diff --git a/mlir/test/Conversion/GPUCommon/typed-pointers.mlir b/mlir/test/Conversion/GPUCommon/typed-pointers.mlir
deleted file mode 100644
index e27162c7dbc1902..000000000000000
--- a/mlir/test/Conversion/GPUCommon/typed-pointers.mlir
+++ /dev/null
@@ -1,82 +0,0 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=0' --split-input-file | FileCheck %s
-
-module attributes {gpu.container_module} {
- // CHECK-LABEL: llvm.func @main
- // CHECK-SAME: %[[size:.*]]: i64
- func.func @main(%size : index) {
- // CHECK: %[[stream:.*]] = llvm.call @mgpuStreamCreate()
- %0 = gpu.wait async
- // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}[%[[size]]]
- // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]]
- // CHECK: %[[isHostShared:.*]] = llvm.mlir.constant
- // CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]], %[[isHostShared]])
- %1, %2 = gpu.alloc async [%0] (%size) : memref<?xf32>
- // CHECK: %[[float_ptr:.*]] = llvm.extractvalue {{.*}}[0]
- // CHECK: %[[void_ptr:.*]] = llvm.bitcast %[[float_ptr]]
- // CHECK: llvm.call @mgpuMemFree(%[[void_ptr]], %[[stream]])
- %3 = gpu.dealloc async [%2] %1 : memref<?xf32>
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[stream]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[stream]])
- gpu.wait [%3]
- return
- }
-
- // CHECK: func @foo
- func.func @foo(%dst : memref<7xf32, 1>, %src : memref<7xf32>) {
- // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
- %t0 = gpu.wait async
- // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint
- // CHECK-NOT: llvm.addrspacecast
- // CHECK: %[[src:.*]] = llvm.bitcast
- // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
- // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
- // CHECK: llvm.call @mgpuMemcpy(%[[dst]], %[[src]], %[[size_bytes]], %[[t0]])
- %t1 = gpu.memcpy async [%t0] %dst, %src : memref<7xf32, 1>, memref<7xf32>
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
- gpu.wait [%t1]
- return
- }
-}
-
-// -----
-
-module attributes {gpu.container_module} {
-
- // CHECK: func @memset_f32
- func.func @memset_f32(%dst : memref<7xf32, 1>, %value : f32) {
- // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
- %t0 = gpu.wait async
- // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant
- // CHECK: %[[value:.*]] = llvm.bitcast
- // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
- // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
- // CHECK: llvm.call @mgpuMemset32(%[[dst]], %[[value]],...
[truncated]
|
I see a few uses of llvm-project/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp Lines 85 to 88 in ced9f4f
Can we also remove these in favour of plain and canonical LLVM::LLVMPointerType::get calls? I think this will also lead to a lot of cleanups in a few places (here and other dialects) as the element type is now dead code.
|
Changes look good to me. Adding Guray to give the green light on this. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for doing this, it looks good.
Let me know if there's anything I can help with deprecating for NVGPU
and GPU
dialects.
@@ -210,7 +210,7 @@ class GEPIndicesAdaptor { | |||
/// string (operations inserted at the builder insertion point). | |||
Value createGlobalString(Location loc, OpBuilder &builder, StringRef name, | |||
StringRef value, Linkage linkage, | |||
bool useOpaquePointers); | |||
bool useOpaquePointers = true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am guessing this will be removed incrementally.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's the plan, yes. We will be working on the LLVM dialect side once all users of typed pointers have been dropped.
Thanks 😄 |
This commit removes the GPUCommon's lowering support for typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect.
Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502