Skip to content

Commit 01cd7ad

Browse files
authored
[flang][cuda] Do not generate NVVM target attribute when creating the module (#116882)
Leave it to the `NVVMAttachTargetPass` so we can set compute capability and features.
1 parent 2187738 commit 01cd7ad

File tree

5 files changed

+7
-10
lines changed

5 files changed

+7
-10
lines changed

flang/lib/Optimizer/Transforms/CUFCommon.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod,
2222
mlir::OpBuilder builder(ctx);
2323
auto gpuMod = builder.create<mlir::gpu::GPUModuleOp>(mod.getLoc(),
2424
cudaDeviceModuleName);
25-
llvm::SmallVector<mlir::Attribute> targets;
26-
targets.push_back(mlir::NVVM::NVVMTargetAttr::get(ctx));
27-
gpuMod.setTargetsAttr(builder.getArrayAttr(targets));
2825
mlir::Block::iterator insertPt(mod.getBodyRegion().front().end());
2926
symTab.insert(gpuMod, insertPt);
3027
return gpuMod;

flang/test/Fir/CUDA/cuda-alloc-free.fir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func.func @_QPtest_type() {
7373
// CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64
7474
// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
7575

76-
gpu.module @cuda_device_mod [#nvvm.target] {
76+
gpu.module @cuda_device_mod {
7777
gpu.func @_QMalloc() kernel {
7878
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QMallocEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
7979
gpu.return

flang/test/Fir/CUDA/cuda-constructor-2.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<
1010
fir.has_value %1 : !fir.box<!fir.heap<!fir.array<?xi32>>>
1111
}
1212

13-
gpu.module @cuda_device_mod [#nvvm.target] {
13+
gpu.module @cuda_device_mod {
1414
}
1515
}
1616

17-
// CHECK: gpu.module @cuda_device_mod [#nvvm.target]
17+
// CHECK: gpu.module @cuda_device_mod
1818

1919
// CHECK: llvm.func internal @__cudaFortranConstructor() {
2020
// CHECK-DAG: %[[MODULE:.*]] = cuf.register_module @cuda_device_mod -> !llvm.ptr

flang/test/Fir/CUDA/cuda-device-global.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
66
fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
77

8-
gpu.module @cuda_device_mod [#nvvm.target] {
8+
gpu.module @cuda_device_mod {
99
}
1010
}
1111

12-
// CHECK: gpu.module @cuda_device_mod [#nvvm.target]
12+
// CHECK: gpu.module @cuda_device_mo
1313
// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>

flang/test/Fir/CUDA/cuda-implicit-device-global.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ // Test that global used in device function are flagged with the correct
2525
// CHECK: fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %[[CONV]], %{{.*}}) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
2626
// CHECK: fir.global linkonce @_QQcl[[SYMBOL]] {data_attr = #cuf.cuda<constant>} constant : !fir.char<1,32>
2727

28-
// CHECK-LABEL: gpu.module @cuda_device_mod [#nvvm.target]
28+
// CHECK-LABEL: gpu.module @cuda_device_mod
2929
// CHECK: fir.global linkonce @_QQclX6995815537abaf90e86ce166af128f3a
3030

3131
// -----
@@ -51,7 +51,7 @@ // Test that global used in device function are flagged with the correct
5151
// CHECK: fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %[[CONV]], %{{.*}}) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
5252
// CHECK: fir.global linkonce @_QQcl[[SYMBOL]] constant : !fir.char<1,32>
5353

54-
// CHECK-LABEL: gpu.module @cuda_device_mod [#nvvm.target]
54+
// CHECK-LABEL: gpu.module @cuda_device_mod
5555
// CHECK-NOT: fir.global linkonce @_QQclX6995815537abaf90e86ce166af128f3a
5656

5757
// -----

0 commit comments

Comments
 (0)