[MLIR][Math][GPU] Add lowering of absi and fpowi to libdevice #123422

wsmoses · 2025-01-17T23:30:49Z

No description provided.

llvmbot · 2025-01-17T23:31:20Z

@llvm/pr-subscribers-mlir

Author: William Moses (wsmoses)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/123422.diff

2 Files Affected:

(modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+2)
(modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+24)

diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 2768929f460e2e..bbffec02009ea6 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -509,6 +509,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
 
   populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
                                     "__nv_fmod");
+  populateOpPatterns<math::AbsIOp>(converter, patterns, "__nv_abs");
   populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
                                    "__nv_fabs");
   populateOpPatterns<math::AcosOp>(converter, patterns, "__nv_acosf",
@@ -555,6 +556,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
                                    "__nv_log2", "__nv_fast_log2f");
   populateOpPatterns<math::PowFOp>(converter, patterns, "__nv_powf", "__nv_pow",
                                    "__nv_fast_powf");
+  populateOpPatterns<math::FPowIOp>(converter, patterns, "__nv_powif", "__nv_powi");
   populateOpPatterns<math::RoundOp>(converter, patterns, "__nv_roundf",
                                     "__nv_round");
   populateOpPatterns<math::RoundEvenOp>(converter, patterns, "__nv_rintf",
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index f52dd6c0d0ce30..b3df7186dab1ef 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1033,3 +1033,27 @@ module attributes {transform.with_named_sequence} {
     transform.yield
   }
 }
+
+
+gpu.module @test_module_52 {
+  // CHECK: llvm.func @__nv_abs(i32) -> i32
+  // CHECK-LABEL: func @gpu_abs
+  func.func @gpu_fabs(%arg_i32 : i32) -> (i32) {
+    %result32 = math.absi %arg_i32 : i32
+    // CHECK: llvm.call @__nv_abs(%{{.*}}) : (i32) -> i32
+    func.return %result32 : i32
+  }
+}
+
+gpu.module @test_module_26 {
+  // CHECK: llvm.func @__nv_powif(f32, i32) -> f32
+  // CHECK: llvm.func @__nv_powi(f64, i32) -> f64
+  // CHECK-LABEL: func @gpu_powi
+  func.func @gpu_powi(%arg_f32 : f32, %arg_f64 : f64, %arg_i32 : i32) -> (f32, f64) {
+    %result32 = math.fpowi %arg_f32, %arg_i32 : f32
+    // CHECK: llvm.call @__nv_powf(%{{.*}}, %{{.*}}) : (f32, i32) -> f32
+    %result64 = math.fpowi %arg_f64, %arg_i64 : f64
+    // CHECK: llvm.call @__nv_pow(%{{.*}}, %{{.*}}) : (f64, i32) -> f64
+    func.return %result32, %result64 : f32, f64
+  }
+}

llvmbot · 2025-01-17T23:31:20Z

@llvm/pr-subscribers-mlir-gpu

Author: William Moses (wsmoses)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/123422.diff

2 Files Affected:

(modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+2)
(modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+24)

diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 2768929f460e2e..bbffec02009ea6 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -509,6 +509,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
 
   populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
                                     "__nv_fmod");
+  populateOpPatterns<math::AbsIOp>(converter, patterns, "__nv_abs");
   populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
                                    "__nv_fabs");
   populateOpPatterns<math::AcosOp>(converter, patterns, "__nv_acosf",
@@ -555,6 +556,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
                                    "__nv_log2", "__nv_fast_log2f");
   populateOpPatterns<math::PowFOp>(converter, patterns, "__nv_powf", "__nv_pow",
                                    "__nv_fast_powf");
+  populateOpPatterns<math::FPowIOp>(converter, patterns, "__nv_powif", "__nv_powi");
   populateOpPatterns<math::RoundOp>(converter, patterns, "__nv_roundf",
                                     "__nv_round");
   populateOpPatterns<math::RoundEvenOp>(converter, patterns, "__nv_rintf",
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index f52dd6c0d0ce30..b3df7186dab1ef 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1033,3 +1033,27 @@ module attributes {transform.with_named_sequence} {
     transform.yield
   }
 }
+
+
+gpu.module @test_module_52 {
+  // CHECK: llvm.func @__nv_abs(i32) -> i32
+  // CHECK-LABEL: func @gpu_abs
+  func.func @gpu_fabs(%arg_i32 : i32) -> (i32) {
+    %result32 = math.absi %arg_i32 : i32
+    // CHECK: llvm.call @__nv_abs(%{{.*}}) : (i32) -> i32
+    func.return %result32 : i32
+  }
+}
+
+gpu.module @test_module_26 {
+  // CHECK: llvm.func @__nv_powif(f32, i32) -> f32
+  // CHECK: llvm.func @__nv_powi(f64, i32) -> f64
+  // CHECK-LABEL: func @gpu_powi
+  func.func @gpu_powi(%arg_f32 : f32, %arg_f64 : f64, %arg_i32 : i32) -> (f32, f64) {
+    %result32 = math.fpowi %arg_f32, %arg_i32 : f32
+    // CHECK: llvm.call @__nv_powf(%{{.*}}, %{{.*}}) : (f32, i32) -> f32
+    %result64 = math.fpowi %arg_f64, %arg_i64 : f64
+    // CHECK: llvm.call @__nv_pow(%{{.*}}, %{{.*}}) : (f64, i32) -> f64
+    func.return %result32, %result64 : f32, f64
+  }
+}

github-actions · 2025-01-17T23:34:03Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 22d4ff155aadf0f098dd5dc48d9038da15108937 d6b89f00447add33943da404fa198d80103c8c47 --extensions cpp,h -- mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

View the diff from clang-format here.

diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
index 0c1755d593..a01a706f3d 100644
--- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
@@ -51,10 +51,8 @@ public:
 
     Type resultType = castedOperands.front().getType();
     Type funcType = getFunctionType(resultType, castedOperands);
-    StringRef funcName =
-        static_cast<const DerivedTy *>(this)
-            ->getFunctionName(
-                cast<LLVM::LLVMFunctionType>(funcType).getReturnType(), op);
+    StringRef funcName = static_cast<const DerivedTy *>(this)->getFunctionName(
+        cast<LLVM::LLVMFunctionType>(funcType).getReturnType(), op);
     if (funcName.empty())
       return failure();
 
@@ -85,7 +83,8 @@ private:
     using LLVM::LLVMFuncOp;
 
     auto funcAttr = StringAttr::get(op->getContext(), funcName);
-    auto funcOp = SymbolTable::lookupNearestSymbolFrom<LLVMFuncOp>(op, funcAttr);
+    auto funcOp =
+        SymbolTable::lookupNearestSymbolFrom<LLVMFuncOp>(op, funcAttr);
     if (funcOp)
       return funcOp;

ftynse · 2025-01-19T19:32:47Z

mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h

-///   llvm.call @__nv_fast_expf(%arg_f32) : (f32) -> f32
-template <typename SourceOp>
-struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
+template <typename SourceOp, typename DerivedTy>


Please add some documentation. I also think the implicit convention for CRTP is to have the derived class the as the leading template argument.

mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h

ftynse · 2025-01-19T19:55:23Z

mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h

+///   llvm.call @__nv_powf(%arg_f32, %arg_i32) : (f32, i32) -> f32
+///
+template <typename SourceOp>
+struct FloatIntOpToFuncCallLowering


Is this needed? I don't see anything specific in the logic here that differs from OpToFuncCallLowering.

yeah the optofunccalllowering checks if input types are valid floats (saying it doesnt apply otherwise) whereas here we need to permit a 32bit integer for the second arg

ftynse · 2025-01-19T19:56:39Z

mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h

+///   llvm.call @__nv_abs(%arg_i32) : (i32) -> i32
+///
+template <typename SourceOp>
+struct IntOpToFuncCallLowering


Can this just be folded into the the OpToFuncCallLowering as another option with an empty-by-default function name?

The maybeCast/ getFunctionName option in the floating point one currently does checks for floating point types whereas here we check if it's a 32bit int

Co-authored-by: Oleksandr "Alex" Zinenko <[email protected]>

ftynse · 2025-01-20T16:38:50Z

#123644 is what I had in mind and it seems to pass the new test.

More concise version of #123422. --------- Co-authored-by: William S. Moses <[email protected]>

wsmoses requested review from ftynse, definelicht, gysit and nicolasvasilache January 17, 2025 23:30

llvmbot added mlir:gpu mlir labels Jan 17, 2025

[MLIR][Math][GPU] Add lowering of absi and fpowi to libdevice

7c849d7

wsmoses force-pushed the absi branch from 8461886 to 7c849d7 Compare January 18, 2025 00:06

wsmoses requested a review from chelini January 18, 2025 16:20

ftynse reviewed Jan 19, 2025

View reviewed changes

Apply suggestions from code review

d6b89f0

Co-authored-by: Oleksandr "Alex" Zinenko <[email protected]>

ftynse mentioned this pull request Jan 20, 2025

[mlir] Add lowering of absi and fpowi to libdevice #123644

Merged

wsmoses closed this Jan 20, 2025

wsmoses added a commit that referenced this pull request Jan 20, 2025

[mlir] Add lowering of absi and fpowi to libdevice (#123644)

4df28af

More concise version of #123422. --------- Co-authored-by: William S. Moses <[email protected]>

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[MLIR][Math][GPU] Add lowering of absi and fpowi to libdevice #123422

[MLIR][Math][GPU] Add lowering of absi and fpowi to libdevice #123422

Uh oh!

wsmoses commented Jan 17, 2025

Uh oh!

llvmbot commented Jan 17, 2025

Uh oh!

llvmbot commented Jan 17, 2025

Uh oh!

github-actions bot commented Jan 17, 2025 •

edited

Loading

Uh oh!

ftynse Jan 19, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

ftynse Jan 19, 2025

Uh oh!

wsmoses Jan 19, 2025 •

edited

Loading

Uh oh!

ftynse Jan 19, 2025

Uh oh!

wsmoses Jan 19, 2025

Uh oh!

ftynse commented Jan 20, 2025

Uh oh!

Uh oh!

[MLIR][Math][GPU] Add lowering of absi and fpowi to libdevice #123422

[MLIR][Math][GPU] Add lowering of absi and fpowi to libdevice #123422

Uh oh!

Conversation

wsmoses commented Jan 17, 2025

Uh oh!

llvmbot commented Jan 17, 2025

Uh oh!

llvmbot commented Jan 17, 2025

Uh oh!

github-actions bot commented Jan 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

ftynse Jan 19, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

ftynse Jan 19, 2025

Choose a reason for hiding this comment

Uh oh!

wsmoses Jan 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

ftynse Jan 19, 2025

Choose a reason for hiding this comment

Uh oh!

wsmoses Jan 19, 2025

Choose a reason for hiding this comment

Uh oh!

ftynse commented Jan 20, 2025

Uh oh!

Uh oh!

github-actions bot commented Jan 17, 2025 •

edited

Loading

wsmoses Jan 19, 2025 •

edited

Loading