intel
diff --git a/‎scripts/correctness.sh
Lines changed: 9 additions & 0 deletions b/‎scripts/correctness.sh
Lines changed: 9 additions & 0 deletions
diff --git a/‎test/benchgc/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎test/benchgc/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/benchgc/src/benchgc/__main__.py
Lines changed: 4 additions & 2 deletions b/‎test/benchgc/src/benchgc/__main__.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎test/benchgc/src/benchgc/arg/__init__.py
Lines changed: 2 additions & 0 deletions b/‎test/benchgc/src/benchgc/arg/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎test/benchgc/src/benchgc/arg/reduce.py
Lines changed: 45 additions & 12 deletions b/‎test/benchgc/src/benchgc/arg/reduce.py
Lines changed: 45 additions & 12 deletions
diff --git a/‎test/benchgc/src/benchgc/arith/basic.py
Lines changed: 49 additions & 0 deletions b/‎test/benchgc/src/benchgc/arith/basic.py
Lines changed: 49 additions & 0 deletions
diff --git a/‎test/benchgc/src/benchgc/linalg/__init__.py
Lines changed: 1 addition & 0 deletions b/‎test/benchgc/src/benchgc/linalg/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/benchgc/src/benchgc/linalg/generic.py
Lines changed: 0 additions & 94 deletions b/‎test/benchgc/src/benchgc/linalg/generic.py
Lines changed: 0 additions & 94 deletions
@@ -10,6 +10,15 @@ python3 -m benchgc --verbose 0 --driver linalg --case matmul --md 0:32x128xbf16
 
 # f32
 
+# reduce
+
+python3 -m benchgc --verbose 0 --driver linalg --case reduce.add --md 0:128x64x8xf32 --md 1:128xf32 --dimensions=1 --dimensions=2 || FAIL=1
+python3 -m benchgc --verbose 0 --driver linalg --case reduce.mul --md 0:128x8xf32 --md 1:128xf32 --dimensions=1 || FAIL=1
+python3 -m benchgc --verbose 0 --driver linalg --case reduce.max --md 0:128x64x8xf32 --md 1:128xf32 --dimensions=1 --dimensions=2 || FAIL=1
+python3 -m benchgc --verbose 0 --driver linalg --case reduce.min --md 0:128x64x8xf32 --md 1:128xf32 --dimensions=1 --dimensions=2 || FAIL=1
+python3 -m benchgc --verbose 0 --driver linalg --case reduce.l1 --md 0:128x64x8xf32 --md 1:128xf32 --dimensions=1 --dimensions=2 || FAIL=1
+python3 -m benchgc --verbose 0 --driver linalg --case reduce.l2_square --md 0:128x64x8xf32 --md 1:128xf32 --dimensions=1 --dimensions=2 || FAIL=1
+
 # misc
 python3 -m benchgc --verbose 0 --driver linalg --case fill --md 0:f32 --md 1:32x4096xf32 --cmp 1:P:0:0 || FAIL=1
 python3 -m benchgc --verbose 0 --driver linalg --case copy --md 0:1024x1024xf32 --md 1:1024x1024xbf16 || FAIL=1
 
@@ -39,3 +39,4 @@ add_subdirectory("src/benchgc/mlir")
 add_subdirectory("src/benchgc/linalg")
 add_subdirectory("src/benchgc/tensor")
 add_subdirectory("src/benchgc/arith")
+add_subdirectory("src/benchgc/math")
@@ -189,7 +189,10 @@
 
     from .linalg import mlir_op
 
-    mlir_func = mlir_op[flags.case]
+    if flags.case.startswith("reduce."):
+        mlir_func = mlir_op["reduce"]
+    else:
+        mlir_func = mlir_op[flags.case]
     module = mlir_func(flags, args)
 else:
     raise Exception(f"unsupported driver {flags.driver}")
@@ -207,7 +210,6 @@
         raise Exception("Wrong cmp format: %s", cmp)
     idx = int(cmp[:colon])
     args[idx].set_cmp(cmp[colon + 1 :])
-
 entry = benchgc.mlir.util.get_entry(module)
 
 for i, arg in enumerate(args):
 
@@ -23,6 +23,7 @@
 import benchgc.arg.eltwise as eltwise
 import benchgc.arg.matmul as matmul
 import benchgc.arg.pool as pool
+import benchgc.arg.reduce as reduce
 import benchgc.arg.softmax as softmax
 import benchgc.util
 import torch
@@ -35,6 +36,7 @@
     "softmax": softmax,
     "conv": conv,
     "pool": pool,
+    "reduce": reduce,
 }
 
 
 
@@ -14,12 +14,42 @@
 # limitations under the License.
 ################################################################################
 
-from typing import List, Tuple
+import argparse
+from typing import List, Set, Tuple
 
 import benchgc.arg
 import benchgc.util
 import torch
-
+from benchgc.arg.arg import Arg
+from benchgc.arg.compare import p2p
+
+op: Set[str] = set(
+    [
+        "linalg.reduce.add",
+        "linalg.reduce.mul",
+        "linalg.reduce.max",
+        "linalg.reduce.min",
+        "linalg.reduce.l1",
+        "linalg.reduce.l2_square",
+    ]
+)
+
+
+def default_fill(
+    flags: argparse.Namespace,
+    arg: Arg,
+    arglist: List[Arg],
+):
+    if arg.index > 0:
+        raise Exception("reduce fill: dst filling is not allowed")
+    arg.fill_param = [
+        "reduce",
+        flags.case,
+        arglist[0].dtype,
+        arglist[1].dtype,
+        str(arglist[0].nelem() // arglist[1].nelem()),
+    ]
+    arg.fill_type = "D"
 
 def fill(shape: List[int], dtype: torch.dtype, params: List[str]) -> torch.Tensor:
 
@@ -30,22 +60,17 @@ def fill(shape: List[int], dtype: torch.dtype, params: List[str]) -> torch.Tenso
 
     safe_to_reduce_elems: int = benchgc.util.get_problem_bounds(op, sdtype)[0]
 
-    neutral_value: float = 1.0 if op == "mul" else 0.0
+    neutral_value: float = 1.0 if op == "reduce.mul" else 0.0
 
     shift: float = (
         1.0
-        if (
-            op == "mean"
-            or op == "min"
-            and not sdtype.is_signed
-            and not ddtype.is_signed
-        )
+        if (op == "reduce.min" and not sdtype.is_signed and not ddtype.is_signed)
         else 0.0
     )
 
     value_range: int = benchgc.util.get_problem_bounds(op, sdtype)[1]
 
-    is_mul_fp: bool = op == "mul" and sdtype.is_floating_point
+    is_mul_fp: bool = op == "reduce.mul" and sdtype.is_floating_point
     min_range: int = -value_range if is_mul_fp else 1
 
     index = torch.arange(benchgc.util.nelem(shape)).reshape(shape)
@@ -69,10 +94,18 @@ def fill(shape: List[int], dtype: torch.dtype, params: List[str]) -> torch.Tenso
     return value.to(dtype)
 
 
+def default_compare(
+    flags: argparse.Namespace,
+    arg: Arg,
+    arglist: List[Arg],
+):
+    arg.cmp_type = "D"
+    arg.cmp_param = ["reduce", arg.dtype, flags.case]
+
 def compare(
-    ref: torch.Tensor, res: torch.Tensor, verbose: int
+    param: List[str], ref: torch.Tensor, res: torch.Tensor, verbose: int
 ) -> Tuple[bool, bool | None]:
     dtype = ref.dtype
     ref = ref.to(torch.float)
     res = res.to(torch.float)
-    return benchgc.arg.p2p(benchgc.util.get_eps(dtype), 30.0, ref, res, verbose)
+    return p2p(benchgc.util.get_eps(dtype), 30.0, ref, res, verbose)
@@ -42,6 +42,19 @@ def ref_constant(
             )
         else:
             raise Exception("only support splat value now")
+    elif isinstance(value, gc_mlir._mlir_libs._mlir.ir.IntegerAttr):
+        return (torch.full(size=tuple(), fill_value=value.__int__(), dtype=torch.int),)
+    elif isinstance(value, gc_mlir._mlir_libs._mlir.ir.DenseIntElementsAttr):
+        if value.is_splat:
+            return (
+                torch.full(
+                    size=tuple(value.type.shape),
+                    fill_value=value.get_splat_value().value,
+                    dtype=benchgc.util.get_dtype(str(value.get_splat_value().type)),
+                ),
+            )
+        else:
+            raise Exception("only support splat value now")
     else:
         raise Exception("Not support constant type %s", type(value))
 
@@ -56,3 +69,39 @@ def ref_addf(
     cache: MLIRCache, op: gc_mlir.ir.OpView, var: Dict[str, torch.Tensor]
 ) -> Tuple[torch.Tensor, ...]:
     return (var[cache.opr[0]] + var[cache.opr[1]],)
+
+
+def ref_maxf(
+    cache: MLIRCache, op: gc_mlir.ir.OpView, var: Dict[str, torch.Tensor]
+) -> Tuple[torch.Tensor, ...]:
+    return (torch.max(var[cache.opr[0]], var[cache.opr[1]]),)
+
+
+def ref_minf(
+    cache: MLIRCache, op: gc_mlir.ir.OpView, var: Dict[str, torch.Tensor]
+) -> Tuple[torch.Tensor, ...]:
+    return (torch.min(var[cache.opr[0]], var[cache.opr[1]]),)
+
+
+def ref_muli(
+    cache: MLIRCache, op: gc_mlir.ir.OpView, var: Dict[str, torch.Tensor]
+) -> Tuple[torch.Tensor, ...]:
+    return (var[cache.opr[0]] * var[cache.opr[1]],)
+
+
+def ref_addi(
+    cache: MLIRCache, op: gc_mlir.ir.OpView, var: Dict[str, torch.Tensor]
+) -> Tuple[torch.Tensor, ...]:
+    return (var[cache.opr[0]] + var[cache.opr[1]],)
+
+
+def ref_maxsi(
+    cache: MLIRCache, op: gc_mlir.ir.OpView, var: Dict[str, torch.Tensor]
+) -> Tuple[torch.Tensor, ...]:
+    return (torch.max(var[cache.opr[0]], var[cache.opr[1]]),)
+
+
+def ref_minsi(
+    cache: MLIRCache, op: gc_mlir.ir.OpView, var: Dict[str, torch.Tensor]
+) -> Tuple[torch.Tensor, ...]:
+    return (torch.min(var[cache.opr[0]], var[cache.opr[1]]),)
@@ -41,6 +41,7 @@
     "softmax",
     "conv",
     "pool",
+    "reduce",
 ]:
     mod = importlib.import_module(f"benchgc.linalg.{dri}")
     for key in mod.__dict__:
 
@@ -140,97 +140,3 @@ def ref_generic(
     return result_tensors
 
 
-def reduce_loop(
-    cache: MLIRCache,
-    op: gc_mlir.ir.OpView,
-    depth: int,
-    in_shape: List[int],
-    var: Dict[str, torch.Tensor],
-    in_idx: List[int],
-    out_idx: List[int],
-    reduced_axis: int,
-    result_tensor: torch.Tensor,
-):
-    if depth == len(in_shape):
-        # we need to execute the block here
-        # we will need to read the block argument name and save it into the cache
-
-        block: gc_mlir.ir.Block = op.regions[0].blocks[0]
-
-        if len(cache.next) == 0:
-            # region cache
-            cache.next.append(MLIRCache())
-        if len(cache.next[0].next) == 0:
-            # region->block cache
-            cache.next[0].next.append(MLIRCache())
-            for arg in block.arguments:
-                cache.next[0].next[0].arg.append(arg.get_name())
-
-        block_arg: Dict[str, torch.Tensor] = {
-            # set input
-            cache.next[0].next[0].arg[0]: var[cache.opr[0]][tuple(in_idx)],
-            # set output
-            cache.next[0].next[0].arg[1]: result_tensor[tuple(out_idx)],
-        }
-
-        res: Tuple[torch.Tensor, ...] = benchgc.runner.dfs_block(
-            cache.next[0].next[0], op.regions[0].blocks[0], var | block_arg
-        )
-
-        # perform the yield operation
-        result_tensor[tuple(out_idx)] = res[0]
-    else:
-        dimensions: gc_mlir.ir.DenseI64ArrayAttr = op.attributes["dimensions"]
-        reduce_axis: bool = depth in list(dimensions)
-
-        for i in range(in_shape[depth]):
-            if reduce_axis:
-                in_idx[depth] = i
-                reduce_loop(
-                    cache,
-                    op,
-                    depth + 1,
-                    in_shape,
-                    var,
-                    in_idx,
-                    out_idx,
-                    reduced_axis + 1,
-                    result_tensor,
-                )
-            else:
-                in_idx[depth] = i
-                out_idx[depth - reduced_axis] = i
-                reduce_loop(
-                    cache,
-                    op,
-                    depth + 1,
-                    in_shape,
-                    var,
-                    in_idx,
-                    out_idx,
-                    reduced_axis,
-                    result_tensor,
-                )
-
-
-def ref_reduce(
-    cache: MLIRCache, op: gc_mlir.ir.OpView, tensors: Dict[str, torch.Tensor]
-) -> Tuple[torch.Tensor, ...]:
-    # create the buffer for result tensors
-    tensors[cache.res[0]] = tensors[cache.opr[-1]].clone()
-    in_shape: List[int] = list(op.operands[0].type.shape)
-    out_shape: List[int] = list(op.result.type.shape)
-
-    result_tensor: torch.Tensor = tensors[cache.opr[-1]].clone()
-    reduce_loop(
-        cache,
-        op,
-        0,
-        in_shape,
-        tensors,
-        [0] * len(in_shape),
-        [0] * len(out_shape),
-        0,
-        result_tensor,
-    )
-    return (result_tensor,)