[mlir][Arith] Add FTZ (Flush-to-Zero) fast-math flag

chelini · chelini · commit 98b22fd3c2d9 · 2024-10-17T14:29:22.000+02:00
The Flush to Zero (FTZ) modifier is used in floating-point arithmetic to set 
very small numbers, known as denormal or subnormal numbers, to zero. FTZ is done
to improve performance, as handling these small numbers can slow down
computations. Note that this attribute does not specify if the rounding happens
toward positive or negative zero since it is architecture (or vendor)-dependent.
diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td
@@ -108,20 +108,22 @@ def FASTMATH_NO_SIGNED_ZEROS : I32BitEnumAttrCaseBit<"nsz",      3>;
 def FASTMATH_ALLOW_RECIP     : I32BitEnumAttrCaseBit<"arcp",     4>;
 def FASTMATH_ALLOW_CONTRACT  : I32BitEnumAttrCaseBit<"contract", 5>;
 def FASTMATH_APPROX_FUNC     : I32BitEnumAttrCaseBit<"afn",      6>;
+def FASTMATH_FTZ             : I32BitEnumAttrCaseBit<"ftz",      7>;
 def FASTMATH_FAST            : I32BitEnumAttrCaseGroup<
     "fast",
     [
       FASTMATH_REASSOC,         FASTMATH_NO_NANS,     FASTMATH_NO_INFS,
       FASTMATH_NO_SIGNED_ZEROS, FASTMATH_ALLOW_RECIP, FASTMATH_ALLOW_CONTRACT,
-      FASTMATH_APPROX_FUNC]>;
+      FASTMATH_APPROX_FUNC, FASTMATH_FTZ]>;
 
 def FastMathFlags : I32BitEnumAttr<
     "FastMathFlags",
     "Floating point fast math flags",
     [
       FASTMATH_NONE,           FASTMATH_REASSOC,         FASTMATH_NO_NANS,
       FASTMATH_NO_INFS,        FASTMATH_NO_SIGNED_ZEROS, FASTMATH_ALLOW_RECIP,
-      FASTMATH_ALLOW_CONTRACT, FASTMATH_APPROX_FUNC,     FASTMATH_FAST]> {
+      FASTMATH_ALLOW_CONTRACT, FASTMATH_APPROX_FUNC,     FASTMATH_FTZ,
+      FASTMATH_FAST]> {
   let separator = ",";
   let cppNamespace = "::mlir::arith";
   let genSpecializedAttr = 0;
diff --git a/mlir/test/Dialect/Arith/ops.mlir b/mlir/test/Dialect/Arith/ops.mlir
@@ -1127,7 +1127,7 @@ func.func @fastmath(%arg0: f32, %arg1: f32, %arg2: i32) {
 // CHECK: {{.*}} = arith.addf %arg0, %arg1 fastmath<nnan,ninf> : f32
   %7 = arith.addf %arg0, %arg1 fastmath<nnan,ninf> : f32
 // CHECK: {{.*}} = arith.mulf %arg0, %arg1 fastmath<fast> : f32
-  %8 = arith.mulf %arg0, %arg1 fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn> : f32
+  %8 = arith.mulf %arg0, %arg1 fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn,ftz> : f32
 // CHECK: {{.*}} = arith.cmpf oeq, %arg0, %arg1 fastmath<fast> : f32
   %9 = arith.cmpf oeq, %arg0, %arg1 fastmath<fast> : f32
 
@@ -1161,3 +1161,17 @@ func.func @intflags_func(%arg0: i64, %arg1: i64) {
   %3 = arith.shli %arg0, %arg1 overflow<nsw, nuw> : i64
   return
 }
+
+// CHECK-LABEL: flush_to_zero
+// CHECK-SAME: %[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32
+func.func @flush_to_zero(%arg0: f32, %arg1: f32) {
+  // CHECK: %{{.+}} = arith.addf %[[ARG0]], %[[ARG1]] fastmath<ftz> : f32
+  // CHECK-NEXT: %{{.+}} = arith.subf %[[ARG0]], %[[ARG1]] fastmath<ftz> : f32
+  // CHECK-NEXT: %{{.+}} = arith.mulf %[[ARG0]], %[[ARG1]] fastmath<ftz> : f32
+  // CHECK-NEXT: %{{.+}} = arith.divf %[[ARG0]], %[[ARG1]] fastmath<ftz> : f32
+  %0 = arith.addf %arg0, %arg1 fastmath<ftz> : f32
+  %1 = arith.subf %arg0, %arg1 fastmath<ftz> : f32
+  %2 = arith.mulf %arg0, %arg1 fastmath<ftz> : f32
+  %3 = arith.divf %arg0, %arg1 fastmath<ftz> : f32
+  return
+}
diff --git a/mlir/test/Dialect/Math/ops.mlir b/mlir/test/Dialect/Math/ops.mlir
@@ -289,7 +289,7 @@ func.func @fastmath(%f: f32, %i: i32, %v: vector<4xf32>, %t: tensor<4x4x?xf32>)
   // CHECK: math.trunc %[[F]] fastmath<fast> : f32
   %0 = math.trunc %f fastmath<fast> : f32
   // CHECK: math.powf %[[V]], %[[V]] fastmath<fast> : vector<4xf32>
-  %1 = math.powf %v, %v fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn> : vector<4xf32>
+  %1 = math.powf %v, %v fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn,ftz> : vector<4xf32>
   // CHECK: math.fma %[[T]], %[[T]], %[[T]] : tensor<4x4x?xf32>
   %2 = math.fma %t, %t, %t fastmath<none> : tensor<4x4x?xf32>
   // CHECK: math.absf %[[F]] fastmath<ninf> : f32