-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir] lower min/maxnum to libdevice calls #127323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Introduce lowering from arith.minnum/maxxnum operations to the corresponding Nvidia libdevice calls. This requires to reorder pattern population methods so that the libdevice-targeting patterns are prioritized over default patterns targeting LLVM IR intrinsics from the Arith dialect. The tests are placed into a separate file as the existing gpu-to-nvvm.mlir files has a mode that forces Arith dialect operations to be preserved as is without using a separate FileCheck tag to differentiate.
@llvm/pr-subscribers-mlir-gpu Author: Oleksandr "Alex" Zinenko (ftynse) ChangesIntroduce lowering from arith.minnum/maxxnum operations to the corresponding Nvidia libdevice calls. This requires to reorder pattern population methods so that the libdevice-targeting patterns are prioritized over default patterns targeting LLVM IR intrinsics from the Arith dialect. The tests are placed into a separate file as the existing gpu-to-nvvm.mlir files has a mode that forces Arith dialect operations to be preserved as is without using a separate FileCheck tag to differentiate. Full diff: https://github.com/llvm/llvm-project/pull/127323.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 35330f870e6ae..c1a4d31861d3b 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -378,6 +378,8 @@ struct LowerGpuOpsToNVVMOpsPass final
RewritePatternSet llvmPatterns(m.getContext());
LLVMConversionTarget target(getContext());
+ populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
+
llvm::SmallDenseSet<StringRef> allowedDialectsSet(allowedDialects.begin(),
allowedDialects.end());
for (Dialect *dialect : getContext().getLoadedDialects()) {
@@ -407,7 +409,6 @@ struct LowerGpuOpsToNVVMOpsPass final
llvmPatterns);
}
- populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
if (this->hasRedux)
populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
@@ -552,6 +553,11 @@ void mlir::populateGpuToNVVMConversionPatterns(
populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
"__nv_fmod");
+ populateOpPatterns<arith::MaxNumFOp>(converter, patterns, "__nv_fmaxf",
+ "__nv_fmax");
+ populateOpPatterns<arith::MinNumFOp>(converter, patterns, "__nv_fminf",
+ "__nv_fmin");
+
populateIntOpPatterns<math::AbsIOp>(converter, patterns, "__nv_abs");
populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
"__nv_fabs");
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-arith-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-arith-ops-to-nvvm.mlir
new file mode 100644
index 0000000000000..2b1c5a7fef8fe
--- /dev/null
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-arith-ops-to-nvvm.mlir
@@ -0,0 +1,21 @@
+// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s
+
+gpu.module @test_module_54 {
+ // CHECK: llvm.func @__nv_fmaxf(f32, f32) -> f32
+ // CHECK: llvm.func @__nv_fminf(f32, f32) -> f32
+ // CHECK: llvm.func @__nv_fmax(f64, f64) -> f64
+ // CHECK: llvm.func @__nv_fmin(f64, f64) -> f64
+ // CHECK-LABEL: @gpu_fminmax
+ func.func @gpu_fminmax(%arg1_f32: f32, %arg2_f32: f32, %arg1_f64: f64, %arg2_f64: f64)
+ -> (f32, f32, f64, f64) {
+ // CHECK: llvm.call @__nv_fmaxf
+ %max_f32 = arith.maxnumf %arg1_f32, %arg2_f32 : f32
+ // CHECK: llvm.call @__nv_fminf
+ %min_f32 = arith.minnumf %arg1_f32, %arg2_f32 : f32
+ // CHECK: llvm.call @__nv_fmax(
+ %max_f64 = arith.maxnumf %arg1_f64, %arg2_f64 : f64
+ // CHECK: llvm.call @__nv_fmin(
+ %min_f64 = arith.minnumf %arg1_f64, %arg2_f64 : f64
+ return %max_f32, %min_f32, %max_f64, %min_f64 : f32, f32, f64, f64
+ }
+}
|
@llvm/pr-subscribers-mlir Author: Oleksandr "Alex" Zinenko (ftynse) ChangesIntroduce lowering from arith.minnum/maxxnum operations to the corresponding Nvidia libdevice calls. This requires to reorder pattern population methods so that the libdevice-targeting patterns are prioritized over default patterns targeting LLVM IR intrinsics from the Arith dialect. The tests are placed into a separate file as the existing gpu-to-nvvm.mlir files has a mode that forces Arith dialect operations to be preserved as is without using a separate FileCheck tag to differentiate. Full diff: https://github.com/llvm/llvm-project/pull/127323.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 35330f870e6ae..c1a4d31861d3b 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -378,6 +378,8 @@ struct LowerGpuOpsToNVVMOpsPass final
RewritePatternSet llvmPatterns(m.getContext());
LLVMConversionTarget target(getContext());
+ populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
+
llvm::SmallDenseSet<StringRef> allowedDialectsSet(allowedDialects.begin(),
allowedDialects.end());
for (Dialect *dialect : getContext().getLoadedDialects()) {
@@ -407,7 +409,6 @@ struct LowerGpuOpsToNVVMOpsPass final
llvmPatterns);
}
- populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
if (this->hasRedux)
populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
@@ -552,6 +553,11 @@ void mlir::populateGpuToNVVMConversionPatterns(
populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
"__nv_fmod");
+ populateOpPatterns<arith::MaxNumFOp>(converter, patterns, "__nv_fmaxf",
+ "__nv_fmax");
+ populateOpPatterns<arith::MinNumFOp>(converter, patterns, "__nv_fminf",
+ "__nv_fmin");
+
populateIntOpPatterns<math::AbsIOp>(converter, patterns, "__nv_abs");
populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
"__nv_fabs");
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-arith-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-arith-ops-to-nvvm.mlir
new file mode 100644
index 0000000000000..2b1c5a7fef8fe
--- /dev/null
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-arith-ops-to-nvvm.mlir
@@ -0,0 +1,21 @@
+// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s
+
+gpu.module @test_module_54 {
+ // CHECK: llvm.func @__nv_fmaxf(f32, f32) -> f32
+ // CHECK: llvm.func @__nv_fminf(f32, f32) -> f32
+ // CHECK: llvm.func @__nv_fmax(f64, f64) -> f64
+ // CHECK: llvm.func @__nv_fmin(f64, f64) -> f64
+ // CHECK-LABEL: @gpu_fminmax
+ func.func @gpu_fminmax(%arg1_f32: f32, %arg2_f32: f32, %arg1_f64: f64, %arg2_f64: f64)
+ -> (f32, f32, f64, f64) {
+ // CHECK: llvm.call @__nv_fmaxf
+ %max_f32 = arith.maxnumf %arg1_f32, %arg2_f32 : f32
+ // CHECK: llvm.call @__nv_fminf
+ %min_f32 = arith.minnumf %arg1_f32, %arg2_f32 : f32
+ // CHECK: llvm.call @__nv_fmax(
+ %max_f64 = arith.maxnumf %arg1_f64, %arg2_f64 : f64
+ // CHECK: llvm.call @__nv_fmin(
+ %min_f64 = arith.minnumf %arg1_f64, %arg2_f64 : f64
+ return %max_f32, %min_f32, %max_f64, %min_f64 : f32, f32, f64, f64
+ }
+}
|
Introduce lowering from arith.minnum/maxxnum operations to the corresponding Nvidia libdevice calls. This requires to reorder pattern population methods so that the libdevice-targeting patterns are prioritized over default patterns targeting LLVM IR intrinsics from the Arith dialect. The tests are placed into a separate file as the existing gpu-to-nvvm.mlir files has a mode that forces Arith dialect operations to be preserved as is without using a separate FileCheck tag to differentiate. Co-authored-by: William Moses <[email protected]>
Introduce lowering from arith.minnum/maxxnum operations to the corresponding Nvidia libdevice calls. This requires to reorder pattern population methods so that the libdevice-targeting patterns are prioritized over default patterns targeting LLVM IR intrinsics from the Arith dialect. The tests are placed into a separate file as the existing gpu-to-nvvm.mlir files has a mode that forces Arith dialect operations to be preserved as is without using a separate FileCheck tag to differentiate.