-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][Math][GPU] Add lowering of absi and fpowi to libdevice #123422
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir Author: William Moses (wsmoses) ChangesFull diff: https://github.com/llvm/llvm-project/pull/123422.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 2768929f460e2e..bbffec02009ea6 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -509,6 +509,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
"__nv_fmod");
+ populateOpPatterns<math::AbsIOp>(converter, patterns, "__nv_abs");
populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
"__nv_fabs");
populateOpPatterns<math::AcosOp>(converter, patterns, "__nv_acosf",
@@ -555,6 +556,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
"__nv_log2", "__nv_fast_log2f");
populateOpPatterns<math::PowFOp>(converter, patterns, "__nv_powf", "__nv_pow",
"__nv_fast_powf");
+ populateOpPatterns<math::FPowIOp>(converter, patterns, "__nv_powif", "__nv_powi");
populateOpPatterns<math::RoundOp>(converter, patterns, "__nv_roundf",
"__nv_round");
populateOpPatterns<math::RoundEvenOp>(converter, patterns, "__nv_rintf",
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index f52dd6c0d0ce30..b3df7186dab1ef 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1033,3 +1033,27 @@ module attributes {transform.with_named_sequence} {
transform.yield
}
}
+
+
+gpu.module @test_module_52 {
+ // CHECK: llvm.func @__nv_abs(i32) -> i32
+ // CHECK-LABEL: func @gpu_abs
+ func.func @gpu_fabs(%arg_i32 : i32) -> (i32) {
+ %result32 = math.absi %arg_i32 : i32
+ // CHECK: llvm.call @__nv_abs(%{{.*}}) : (i32) -> i32
+ func.return %result32 : i32
+ }
+}
+
+gpu.module @test_module_26 {
+ // CHECK: llvm.func @__nv_powif(f32, i32) -> f32
+ // CHECK: llvm.func @__nv_powi(f64, i32) -> f64
+ // CHECK-LABEL: func @gpu_powi
+ func.func @gpu_powi(%arg_f32 : f32, %arg_f64 : f64, %arg_i32 : i32) -> (f32, f64) {
+ %result32 = math.fpowi %arg_f32, %arg_i32 : f32
+ // CHECK: llvm.call @__nv_powf(%{{.*}}, %{{.*}}) : (f32, i32) -> f32
+ %result64 = math.fpowi %arg_f64, %arg_i64 : f64
+ // CHECK: llvm.call @__nv_pow(%{{.*}}, %{{.*}}) : (f64, i32) -> f64
+ func.return %result32, %result64 : f32, f64
+ }
+}
|
@llvm/pr-subscribers-mlir-gpu Author: William Moses (wsmoses) ChangesFull diff: https://github.com/llvm/llvm-project/pull/123422.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 2768929f460e2e..bbffec02009ea6 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -509,6 +509,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
"__nv_fmod");
+ populateOpPatterns<math::AbsIOp>(converter, patterns, "__nv_abs");
populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
"__nv_fabs");
populateOpPatterns<math::AcosOp>(converter, patterns, "__nv_acosf",
@@ -555,6 +556,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
"__nv_log2", "__nv_fast_log2f");
populateOpPatterns<math::PowFOp>(converter, patterns, "__nv_powf", "__nv_pow",
"__nv_fast_powf");
+ populateOpPatterns<math::FPowIOp>(converter, patterns, "__nv_powif", "__nv_powi");
populateOpPatterns<math::RoundOp>(converter, patterns, "__nv_roundf",
"__nv_round");
populateOpPatterns<math::RoundEvenOp>(converter, patterns, "__nv_rintf",
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index f52dd6c0d0ce30..b3df7186dab1ef 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1033,3 +1033,27 @@ module attributes {transform.with_named_sequence} {
transform.yield
}
}
+
+
+gpu.module @test_module_52 {
+ // CHECK: llvm.func @__nv_abs(i32) -> i32
+ // CHECK-LABEL: func @gpu_abs
+ func.func @gpu_fabs(%arg_i32 : i32) -> (i32) {
+ %result32 = math.absi %arg_i32 : i32
+ // CHECK: llvm.call @__nv_abs(%{{.*}}) : (i32) -> i32
+ func.return %result32 : i32
+ }
+}
+
+gpu.module @test_module_26 {
+ // CHECK: llvm.func @__nv_powif(f32, i32) -> f32
+ // CHECK: llvm.func @__nv_powi(f64, i32) -> f64
+ // CHECK-LABEL: func @gpu_powi
+ func.func @gpu_powi(%arg_f32 : f32, %arg_f64 : f64, %arg_i32 : i32) -> (f32, f64) {
+ %result32 = math.fpowi %arg_f32, %arg_i32 : f32
+ // CHECK: llvm.call @__nv_powf(%{{.*}}, %{{.*}}) : (f32, i32) -> f32
+ %result64 = math.fpowi %arg_f64, %arg_i64 : f64
+ // CHECK: llvm.call @__nv_pow(%{{.*}}, %{{.*}}) : (f64, i32) -> f64
+ func.return %result32, %result64 : f32, f64
+ }
+}
|
You can test this locally with the following command:git-clang-format --diff 22d4ff155aadf0f098dd5dc48d9038da15108937 d6b89f00447add33943da404fa198d80103c8c47 --extensions cpp,h -- mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp View the diff from clang-format here.diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
index 0c1755d593..a01a706f3d 100644
--- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
@@ -51,10 +51,8 @@ public:
Type resultType = castedOperands.front().getType();
Type funcType = getFunctionType(resultType, castedOperands);
- StringRef funcName =
- static_cast<const DerivedTy *>(this)
- ->getFunctionName(
- cast<LLVM::LLVMFunctionType>(funcType).getReturnType(), op);
+ StringRef funcName = static_cast<const DerivedTy *>(this)->getFunctionName(
+ cast<LLVM::LLVMFunctionType>(funcType).getReturnType(), op);
if (funcName.empty())
return failure();
@@ -85,7 +83,8 @@ private:
using LLVM::LLVMFuncOp;
auto funcAttr = StringAttr::get(op->getContext(), funcName);
- auto funcOp = SymbolTable::lookupNearestSymbolFrom<LLVMFuncOp>(op, funcAttr);
+ auto funcOp =
+ SymbolTable::lookupNearestSymbolFrom<LLVMFuncOp>(op, funcAttr);
if (funcOp)
return funcOp;
|
/// llvm.call @__nv_fast_expf(%arg_f32) : (f32) -> f32 | ||
template <typename SourceOp> | ||
struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> { | ||
template <typename SourceOp, typename DerivedTy> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add some documentation. I also think the implicit convention for CRTP is to have the derived class the as the leading template argument.
/// llvm.call @__nv_powf(%arg_f32, %arg_i32) : (f32, i32) -> f32 | ||
/// | ||
template <typename SourceOp> | ||
struct FloatIntOpToFuncCallLowering |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this needed? I don't see anything specific in the logic here that differs from OpToFuncCallLowering
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah the optofunccalllowering checks if input types are valid floats (saying it doesnt apply otherwise) whereas here we need to permit a 32bit integer for the second arg
/// llvm.call @__nv_abs(%arg_i32) : (i32) -> i32 | ||
/// | ||
template <typename SourceOp> | ||
struct IntOpToFuncCallLowering |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this just be folded into the the OpToFuncCallLowering
as another option with an empty-by-default function name?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The maybeCast/ getFunctionName option in the floating point one currently does checks for floating point types whereas here we check if it's a 32bit int
Co-authored-by: Oleksandr "Alex" Zinenko <[email protected]>
#123644 is what I had in mind and it seems to pass the new test. |
More concise version of #123422. --------- Co-authored-by: William S. Moses <[email protected]>
No description provided.