-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Do not generate NVVM target attribute when creating the module #116882
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesLeave it to the Full diff: https://github.com/llvm/llvm-project/pull/116882.diff 5 Files Affected:
diff --git a/flang/lib/Optimizer/Transforms/CUFCommon.cpp b/flang/lib/Optimizer/Transforms/CUFCommon.cpp
index 5eca86529f9e17..162df8f9cab9cd 100644
--- a/flang/lib/Optimizer/Transforms/CUFCommon.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFCommon.cpp
@@ -22,9 +22,6 @@ mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod,
mlir::OpBuilder builder(ctx);
auto gpuMod = builder.create<mlir::gpu::GPUModuleOp>(mod.getLoc(),
cudaDeviceModuleName);
- llvm::SmallVector<mlir::Attribute> targets;
- targets.push_back(mlir::NVVM::NVVMTargetAttr::get(ctx));
- gpuMod.setTargetsAttr(builder.getArrayAttr(targets));
mlir::Block::iterator insertPt(mod.getBodyRegion().front().end());
symTab.insert(gpuMod, insertPt);
return gpuMod;
diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir
index 49bb5bdf5e6bc4..abf2d56695b172 100644
--- a/flang/test/Fir/CUDA/cuda-alloc-free.fir
+++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir
@@ -73,7 +73,7 @@ func.func @_QPtest_type() {
// CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64
// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
-gpu.module @cuda_device_mod [#nvvm.target] {
+gpu.module @cuda_device_mod {
gpu.func @_QMalloc() kernel {
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QMallocEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
gpu.return
diff --git a/flang/test/Fir/CUDA/cuda-constructor-2.f90 b/flang/test/Fir/CUDA/cuda-constructor-2.f90
index 99386abc4fafdd..901497e2cde550 100644
--- a/flang/test/Fir/CUDA/cuda-constructor-2.f90
+++ b/flang/test/Fir/CUDA/cuda-constructor-2.f90
@@ -10,11 +10,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<
fir.has_value %1 : !fir.box<!fir.heap<!fir.array<?xi32>>>
}
- gpu.module @cuda_device_mod [#nvvm.target] {
+ gpu.module @cuda_device_mod {
}
}
-// CHECK: gpu.module @cuda_device_mod [#nvvm.target]
+// CHECK: gpu.module @cuda_device_mod
// CHECK: llvm.func internal @__cudaFortranConstructor() {
// CHECK-DAG: %[[MODULE:.*]] = cuf.register_module @cuda_device_mod -> !llvm.ptr
diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90
index c83a938d5af214..8cac643b27c349 100644
--- a/flang/test/Fir/CUDA/cuda-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-device-global.f90
@@ -5,9 +5,9 @@
module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
- gpu.module @cuda_device_mod [#nvvm.target] {
+ gpu.module @cuda_device_mod {
}
}
-// CHECK: gpu.module @cuda_device_mod [#nvvm.target]
+// CHECK: gpu.module @cuda_device_mo
// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
diff --git a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
index 18b56a491cd65f..6707572efb5a8f 100644
--- a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
@@ -25,7 +25,7 @@ // Test that global used in device function are flagged with the correct
// CHECK: fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %[[CONV]], %{{.*}}) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
// CHECK: fir.global linkonce @_QQcl[[SYMBOL]] {data_attr = #cuf.cuda<constant>} constant : !fir.char<1,32>
-// CHECK-LABEL: gpu.module @cuda_device_mod [#nvvm.target]
+// CHECK-LABEL: gpu.module @cuda_device_mod
// CHECK: fir.global linkonce @_QQclX6995815537abaf90e86ce166af128f3a
// -----
@@ -51,5 +51,5 @@ // Test that global used in device function are flagged with the correct
// CHECK: fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %[[CONV]], %{{.*}}) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
// CHECK: fir.global linkonce @_QQcl[[SYMBOL]] constant : !fir.char<1,32>
-// CHECK-LABEL: gpu.module @cuda_device_mod [#nvvm.target]
+// CHECK-LABEL: gpu.module @cuda_device_mod
// CHECK-NOT: fir.global linkonce @_QQclX6995815537abaf90e86ce166af128f3a
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/157/builds/13183 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/15171 Here is the relevant piece of the build log for the reference
|
Leave it to the
NVVMAttachTargetPass
so we can set compute capability and features.