Skip to content

Commit c5613dc

Browse files
authored
[MLIR] Mark LLVM::FMAOp as legal (#144671)
Mark LLVM::FMAOp as legal in configureGpuToNVVMConversionLegality, since we can handle intrinsic lowering in the NVPTX backend and emit fma.rn.f32.
1 parent bdac958 commit c5613dc

File tree

3 files changed

+18
-6
lines changed

3 files changed

+18
-6
lines changed

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,10 +429,10 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) {
429429
target.addLegalDialect<::mlir::NVVM::NVVMDialect>();
430430
target.addIllegalDialect<gpu::GPUDialect>();
431431
target.addIllegalOp<LLVM::CopySignOp, LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op,
432-
LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FMAOp,
433-
LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op,
434-
LLVM::PowOp, LLVM::RoundEvenOp, LLVM::RoundOp,
435-
LLVM::SinOp, LLVM::SqrtOp>();
432+
LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp,
433+
LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp,
434+
LLVM::RoundEvenOp, LLVM::RoundOp, LLVM::SinOp,
435+
LLVM::SqrtOp>();
436436

437437
// TODO: Remove once we support replacing non-root ops.
438438
target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ module attributes {transform.with_named_sequence} {
10271027
legal_ops = ["func.func", "gpu.module", "gpu.yield"],
10281028
illegal_dialects = ["gpu"],
10291029
illegal_ops = ["llvm.copysign", "llvm.cos", "llvm.exp", "llvm.exp2", "llvm.fabs", "llvm.fceil",
1030-
"llvm.ffloor", "llvm.fma", "llvm.frem", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow",
1030+
"llvm.ffloor", "llvm.frem", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow",
10311031
"llvm.roundeven", "llvm.round", "llvm.sin", "llvm.sqrt"],
10321032
partial_conversion
10331033
} : !transform.any_op

mlir/test/Integration/GPU/CUDA/dump-ptx.mlir

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=serialize-to-isa \
33
// RUN: 2>&1 | FileCheck %s
44

5-
// CHECK: Generated by LLVM NVPTX Back-End
5+
// CHECK-LABEL: Generated by LLVM NVPTX Back-End
66
// CHECK: .visible .func kernel_a()
77
// CHECK: ret;
88
gpu.module @bar {
@@ -11,3 +11,15 @@ gpu.module @bar {
1111
llvm.return
1212
}
1313
}
14+
15+
// CHECK-LABEL: Generated by LLVM NVPTX Back-End
16+
// CHECK: .visible .func ({{.+}}) fma(
17+
// CHECK: fma.rn.f32
18+
19+
gpu.module @foo {
20+
llvm.func @fma(%arg0: f32, %arg1: f32) -> f32
21+
attributes { gpu.kernel } {
22+
%res = llvm.intr.fma (%arg0, %arg1, %arg1) : (f32, f32, f32) -> f32
23+
llvm.return %res : f32
24+
}
25+
}

0 commit comments

Comments
 (0)