Skip to content

[BenchGC] attach DLTI for mlir module #312

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 56 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
4772f9a
introduce benchgc for correctness check
WangJialei-A Aug 16, 2024
2124dc2
Merge branch 'main' into xurui/merge_bench_new
xurui1995 Aug 26, 2024
1f5b6ba
merge code
xurui1995 Aug 26, 2024
2cccd04
introduce benchgc for correctness check
WangJialei-A Aug 16, 2024
1cabc2c
remove print
xurui1995 Aug 27, 2024
c3c5441
merge code
xurui1995 Aug 27, 2024
e316a98
fix
xurui1995 Aug 27, 2024
841e81f
simplify
xurui1995 Aug 27, 2024
42a50c2
merge main
xurui1995 Aug 27, 2024
b16a9b6
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Aug 27, 2024
1e8b074
fix format
xurui1995 Aug 27, 2024
1c20184
fix format
xurui1995 Aug 27, 2024
69f2e94
reorg the pattern dir
xurui1995 Aug 27, 2024
8d0953c
improve
xurui1995 Aug 27, 2024
e05d5f0
fix format
xurui1995 Aug 27, 2024
e96d310
fix
xurui1995 Aug 27, 2024
9d03541
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Aug 27, 2024
44d591d
add example
xurui1995 Aug 27, 2024
bae0e8b
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Aug 27, 2024
bc7262d
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Aug 27, 2024
420e3de
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Aug 28, 2024
7923184
fix some comments
xurui1995 Aug 29, 2024
8e85b80
fix
xurui1995 Aug 29, 2024
56f2de6
fix
xurui1995 Aug 29, 2024
b87b2d4
add readme
xurui1995 Aug 29, 2024
8f09ed0
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Aug 29, 2024
4726c81
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Aug 30, 2024
b2597b9
add mlp filling
xurui1995 Sep 2, 2024
248dd12
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Sep 2, 2024
4392974
fix mlp
xurui1995 Sep 2, 2024
3566b83
add case
xurui1995 Sep 2, 2024
8deb44c
remove old bench code
xurui1995 Sep 2, 2024
a0641e9
update readme
xurui1995 Sep 2, 2024
5372bf0
Merge branch 'main' into xurui/merge_into_benchgc
xurui1995 Sep 2, 2024
2448b76
add attch dlti
xurui1995 Sep 2, 2024
d614c40
skip attach dlti when it was already added
xurui1995 Sep 2, 2024
9db3237
update readme
xurui1995 Sep 2, 2024
d714cf0
update readme
xurui1995 Sep 2, 2024
6f34f0f
fix ci
xurui1995 Sep 2, 2024
8cb976f
fix
xurui1995 Sep 2, 2024
15d14f4
fix
xurui1995 Sep 2, 2024
82521bb
fix env name
xurui1995 Sep 3, 2024
fa59611
add test print
xurui1995 Sep 3, 2024
b4aecff
add test print2
xurui1995 Sep 3, 2024
20a0e28
test ci
xurui1995 Sep 3, 2024
67d155c
test ci cpu
xurui1995 Sep 3, 2024
1e7c835
add new cpuinfo
xurui1995 Sep 5, 2024
ed60ede
merge main
xurui1995 Sep 5, 2024
21860b4
fix
xurui1995 Sep 5, 2024
6db244e
fix
xurui1995 Sep 5, 2024
2d8e5ed
Merge branch 'main' into xurui/add_dlti
xurui1995 Sep 5, 2024
1e09121
Merge branch 'main' into xurui/add_dlti
xurui1995 Sep 9, 2024
2ab9a48
fix
xurui1995 Sep 9, 2024
072eed6
Merge branch 'main' into xurui/add_dlti
xurui1995 Sep 9, 2024
e7bfb70
Merge branch 'main' into xurui/add_dlti
xurui1995 Sep 9, 2024
9f5cb89
Merge branch 'main' into xurui/add_dlti
xurui1995 Sep 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ declare_mlir_python_sources(GcPythonSources.Common
__init__.py
graph_compiler.py
dialects/__init__.py
tools/__init__.py
tools/cpuinfo.py
# init hooks
_mlir_libs/_site_initialize_0.py
)
Expand Down Expand Up @@ -86,6 +88,13 @@ declare_mlir_python_extension(GcPythonSources.Extension
GcCAPI
)

declare_mlir_python_extension(GcPythonSources.CpuInfoExtension
MODULE_NAME _cpuinfo
ADD_TO_PARENT GcPythonSources
SOURCES
CPUInfo.cpp
)

################################################################################
# Common CAPI
################################################################################
Expand Down
87 changes: 87 additions & 0 deletions python/CPUInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/

#include "mlir/Bindings/Python/PybindAdaptors.h"

#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86)
// x86 or x86_64 specific code
void cpuid(int info[4], int leaf, int subleaf) {
__asm__ __volatile__("cpuid"
: "=a"(info[0]), "=b"(info[1]), "=c"(info[2]),
"=d"(info[3])
: "a"(leaf), "c"(subleaf));
}

std::vector<int> getCacheSizes() {
int info[4];
cpuid(info, 0, 0);
int nIds = info[0];
int caches[3] = {};
for (int i = 0; i <= nIds; ++i) {
cpuid(info, 4, i);
int cacheType = info[0] & 0x1F;
if (cacheType == 0) {
break;
}
if (cacheType == 2) {
// skip instruction cache
continue;
}
int cacheLevel = (info[0] >> 5) & 0x7;
int cacheLinesPerTag = ((info[1] >> 0) & 0xFFF) + 1;
int cacheAssociativity = ((info[1] >> 12) & 0x3FF) + 1;
int cachePartitions = ((info[1] >> 22) & 0x3FF) + 1;
int cacheSets = info[2] + 1;
int cacheSize =
cacheLinesPerTag * cacheAssociativity * cachePartitions * cacheSets;
if (cacheLevel >= 1 && cacheLevel <= 3) {
caches[cacheLevel - 1] = cacheSize;
}
}
return std::vector<int>(std::begin(caches), std::end(caches));
}

bool isFeatureSupported(int function_id, int register_idx, int bit) {
int info[4];
cpuid(info, function_id, 0);
return (info[register_idx] & (1 << bit)) != 0;
}

int getMaxVectorWidth() {
if (isFeatureSupported(7, 1, 16)) { // Check for AVX-512F support
return 512;
} else if (isFeatureSupported(1, 2, 28)) { // Check for AVX support
return 256;
} else if (isFeatureSupported(1, 3, 25)) { // Check for SSE support
return 128;
}
return 64; // Default to 64 if none of the above features are supported
}
#else
std::vector<int> getCacheSizes() { return {}; }

int getMaxVectorWidth { return 0; }
#endif

PYBIND11_MODULE(_cpuinfo, m) {
m.doc() = "Graph-compiler MLIR Python binding";
m.def("get_cache_sizes", &getCacheSizes, "Get CPU L1,L2,L3 cache size");
m.def("get_max_vector_width", &getMaxVectorWidth,
"Get CPU supported max vector width");
}
7 changes: 7 additions & 0 deletions python/gc_mlir/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# ===-- __init__.py - init ------------------------------------*- Python -*-===#
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===-----------------------------------------------------------------------===#
26 changes: 26 additions & 0 deletions python/gc_mlir/tools/cpuinfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# ===-- cpuinfo.py - Getting the CPU info ---------------------*- Python -*-===#
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===-----------------------------------------------------------------------===#

from .._mlir_libs import _cpuinfo

_cache_sizes = []
_max_vector_width = None


def get_cache_sizes():
global _cache_sizes
if not _cache_sizes:
_cache_sizes = _cpuinfo.get_cache_sizes()
return _cache_sizes


def get_max_vector_width():
global _max_vector_width
if _max_vector_width is None:
_max_vector_width = _cpuinfo.get_max_vector_width()
return _max_vector_width
8 changes: 8 additions & 0 deletions test/benchgc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Benchgc is a tool used to verify the correctness and performance of graph compil
* python >= 3.10
* torch >= 2.2
* Enable mlir python binding, Refer to [`python/README.md`](../../python/README.md) for detail
* Set the envs
* OMP_NUM_THREADS [int] : the `num_threads` for dlti attr, default = 1

## Build
There are two ways for using benchgc
Expand Down Expand Up @@ -107,6 +109,12 @@ module {
| Pytorch tensor dump | F | dump filename |
| Benchdnn driver | D | driver_name[:driver filling parameter]* |

### --cpu_cache_sizes, --max_vector_width
* BenchGC will automatically obtain target info and add the DLTI attr to the IR
* In some cases, if the system info obtained by BenchGC is not accurate, you can specify the relevant attributes for BenchGC through these options.
* --cpu_cache_sizes: cpu cache sizes in bytes, format: L1:L2:L3, example: `--cpu_cache_sizes 49152:2097152:110100480`
* --max_vector_width: the maximum width of vector registers available in a CPU, example `--max_vector_width `

#### Benchdnn driver filling

| driver_name | driver filling parameter |
Expand Down
2 changes: 1 addition & 1 deletion test/benchgc/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@
packages=setuptools.find_packages("src")
+ setuptools.find_namespace_packages("../../python_packages/gc_mlir_core"),
package_data={"gc_mlir": ["_mlir_libs/*.so"]},
install_requires=["torch", "numpy", "ml_dtypes"],
install_requires=["torch", "numpy"],
)
16 changes: 16 additions & 0 deletions test/benchgc/src/benchgc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,20 @@ def add_common_options(parser: argparse.ArgumentParser):
help="if we need print the ir during the pass-pipeline",
)

parser.add_argument(
"--cpu_cache_sizes",
required=False,
help="set the cpu cache sizes, format: L1:L2:L3",
type=str,
)

parser.add_argument(
"--max_vector_width",
required=False,
help="set the cpu max_vector_width",
type=int,
)

if parser.parse_known_args()[0].driver == "linalg":
parser.add_argument(
"--cast",
Expand Down Expand Up @@ -269,6 +283,8 @@ def get_module_and_args(flags: argparse.Namespace):
for arg in args:
arg.print_verbose(flags.verbose)

benchgc.mlir.util.attach_dlti(flags, module)

if flags.verbose >= benchgc.util.MODULE_VERBOSE:
print(module)
return module, args
Expand Down
49 changes: 49 additions & 0 deletions test/benchgc/src/benchgc/mlir/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@
# limitations under the License.
################################################################################

import argparse
import ctypes
import os
from typing import Any, List

import torch
from gc_mlir import ir
from gc_mlir.dialects import arith, func, memref
from gc_mlir.tools import cpuinfo


# calling python binding consumes a lot of time e.g. get_name()
Expand Down Expand Up @@ -152,3 +155,49 @@ def get_kernel_func_from_module(
if type(f) is func.FuncOp and str(f.name).strip('"') == func_name:
return f
raise ValueError("can not find the entry function")


def attach_dlti(flags: argparse.Namespace, module: ir.Module):
# the moudle already had dlti attr
if "dlti.target_system_spec" in module.operation.attributes:
return
if flags.cpu_cache_sizes:
caches_sizes = [int(x) for x in flags.cpu_cache_sizes.strip().split(":")]
else:
caches_sizes = cpuinfo.get_cache_sizes()
if not caches_sizes or len(caches_sizes) != 3:
print(
"Failed to get CPU cache sizes, please added them manually br --cpu_cache_sizes"
)
return
if flags.max_vector_width:
max_vector_width = flags.max_vector_width
else:
max_vector_width = cpuinfo.get_max_vector_width()
if not max_vector_width:
print(
"Failed to get CPU max vector width, please added them manually br --max_vector_width"
)
return
l1_data_cache_size, l2_cache_size, l3_cache_size = caches_sizes
if "OMP_NUM_THREADS" not in os.environ:
print("OMP_NUM_THREADS is not found, using 1 as default")
num_threads = os.environ.get("OMP_NUM_THREADS", 1)

dlti_template = f"""
module attributes {{
dlti.target_system_spec = #dlti.target_system_spec<
"CPU": #dlti.target_device_spec<
#dlti.dl_entry<"L1_cache_size_in_bytes", {l1_data_cache_size} : ui32>,
#dlti.dl_entry<"L2_cache_size_in_bytes", {l2_cache_size} : ui64>,
#dlti.dl_entry<"L3_cache_size_in_bytes", {l3_cache_size} : ui64>,
#dlti.dl_entry<"num_threads", {num_threads} : i32>,
#dlti.dl_entry<"max_vector_width", {max_vector_width} : i64>>
>}} {{}}
"""
print(dlti_template)
with module.context:
template_module = ir.Module.parse(dlti_template)
module.operation.attributes["dlti.target_system_spec"] = (
template_module.operation.attributes["dlti.target_system_spec"]
)