Skip to content

Commit 0956de2

Browse files
authored
merge bench code into benchgc (#199)
1 parent 3f04dc9 commit 0956de2

37 files changed

+1082
-1108
lines changed

python/config.py.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ llvm_obj_root = "@LLVM_BINARY_DIR@"
55
llvm_lib_dir = "@LLVM_LIBRARY_DIR@"
66
shlib_ext = "@LTDL_SHLIB_EXT@"
77
gc_lib_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@"
8-
8+
GC_ENABLE_DNNL_API ="@GC_ENABLE_DNNL_API@" in ["ON", "1"]
99

1010
if sys.platform.startswith("win32"):
1111
mlir_runner_utils_dir = os.path.normpath(os.path.join(llvm_obj_root, "bin"))

python/gc_mlir/_mlir_libs/_site_initialize_0.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
#
77
# ===-----------------------------------------------------------------------===#
8+
from gc_mlir.config import GC_ENABLE_DNNL_API
89

910

1011
def context_init_hook(context):
@@ -13,11 +14,9 @@ def context_init_hook(context):
1314

1415
register_cpuruntime_dialect(context)
1516

16-
try:
17+
if GC_ENABLE_DNNL_API:
1718
from ._gc_mlir.onednn_graph import (
1819
register_dialect as register_onednn_graph_dialect,
1920
)
2021

2122
register_onednn_graph_dialect(context)
22-
except ModuleNotFoundError:
23-
print("onednn_graph dialect not found")

scripts/correctness.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,8 @@ python3 -m benchgc --verbose 0 --driver mlir --case ${CASE_DIR}/reduce.mlir || F
102102
# mlir
103103
# python3 -m benchgc --verbose 0 --driver mlir --case ${CASE_DIR}/llama2.mlir || FAIL=1
104104

105+
#mlp
106+
python3 -m benchgc --verbose 1 --driver pattern --case mlp --batch_size=32 --hidden_size_list=32x16x64 --has_bias=1x1 --act_type=noop --dtype=f32
107+
105108
set +e
106109
exit $FAIL

test/benchgc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,4 @@ add_subdirectory("src/benchgc/mlir")
3939
add_subdirectory("src/benchgc/linalg")
4040
add_subdirectory("src/benchgc/tensor")
4141
add_subdirectory("src/benchgc/arith")
42+
add_subdirectory("src/benchgc/pattern")

test/benchgc/README.md

Lines changed: 182 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,47 @@
22

33
## Description
44

5-
Benchgc is a tool used to verify the correctness and performance of graph compiler. Benchgc accepts MLIR files based on the OneDNN graph dialect as test cases and prepares test data for them. For correctness verification, Benchgc will use PyTorch as a reference for comparison.
5+
Benchgc is a tool used to verify the correctness and performance of graph compiler. Benchgc accepts MLIR files as test cases and prepares test data for them. For correctness verification, Benchgc will use PyTorch as a reference for comparison.
66

77
## Prerequisite
88
* python >= 3.10
99
* torch >= 2.2
10-
* pybind11
10+
* Enable mlir python binding, Refer to [`python/README.md`](../../python/README.md) for detail
1111

12-
## Build and install
12+
## Build
13+
There are two ways for using benchgc
14+
15+
* Build `.whl` and install benchgc
1316
```
1417
# Please execute at the top level of the project
1518
16-
mkdir -p build
17-
cd build
18-
19+
mkdir build && cd build
1920
cmake .. -DMLIR_DIR=$MLIR_PATH -DGC_TEST_ENABLE=ON -DGC_ENABLE_BINDINGS_PYTHON=ON -DGC_BENCH_ENABLE=ON
2021
make -j benchgc
21-
2222
python -m pip install test/benchgc/dist/benchgc-*.whl
2323
2424
```
2525

26+
* Run benchgc from source code
27+
28+
```
29+
# Please execute at the top level of the project
30+
31+
mkdir build && cd build
32+
cmake .. -DMLIR_DIR=$MLIR_PATH -DGC_TEST_ENABLE=ON -DGC_ENABLE_BINDINGS_PYTHON=ON -DGC_BENCH_ENABLE=ON
33+
make -j GcPythonModules
34+
export PYTHONPATH=$(pwd)/python_packages/gc_mlir_core/:$(pwd)/../test/benchgc/src/
35+
```
36+
2637
## Synopsis
2738
```
28-
python -m benchgc [OPTIONS] --driver [DRIVER] --case [CASE]
39+
python -m benchgc [OPTIONS] --mode [MODE] --driver [DRIVER] --case [CASE]
2940
```
30-
## Flags
41+
## Common Options
42+
### --mode [str]
43+
* C : correctness testing (by default)
44+
* P : performance testing
45+
3146
### --driver [str]
3247
* linalg: test the single op in linalg dialect
3348
* mlir: upload a mlir file and run
@@ -38,11 +53,25 @@ python -m benchgc [OPTIONS] --driver [DRIVER] --case [CASE]
3853
* if driver=pattern, please provide the pre-defined pattern name, such as mlp here
3954
* if driver is a dialect name, please provide the detail op name to start a single op test
4055

56+
### --entry [str]
57+
* default : "entry"
58+
* the entry name of the kernel of input mlir or generated mlir
59+
4160
### --seed [int]
4261
* set the seed to generate the test data and reprodce the test
4362

4463
### --verbose [int]
45-
* set the verbose level
64+
* set the verbose level, default : 0
65+
* 0 : NO_VERBOSE
66+
* 1 : MODULE_VERBOSE, print the module will be executed
67+
* 2 : ARG_VERBOSE, + print arg information
68+
* 3 : COMPARE_VERBOSE, + print threshold for comparison
69+
* 4 : ERROR_OUTPUT_VERBOSE, + print all error data points if failed
70+
* 5 : OUTPUT_VERBOSE, + print all result including passed tensor
71+
* 6 : INPUT_VERBOSE, + print input torch tensors
72+
73+
### --ir_printing (action=store_true)
74+
* Print the ir during the pass-pipeline
4675

4776
### --md index:SHAPExTYPE
4877
* Describe the shape and data type for argument
@@ -97,7 +126,28 @@ module {
97126
| Norm check | N | threshold |
98127
| Benchdnn driver | D | driver_name:dtype:case |
99128

129+
## Bench Options
130+
### --bench_kind [str]
131+
* py : use the MLIR Python API to invoke the kernel and use Python to calculate the time cost
132+
* wrapper : modify MLIR by wrapping the kernel into a new method and calling the `nanoTime()` method before and after calling the kernel. Finally, calculate the difference as the time cost
133+
134+
### --warm_up [int]
135+
* warm-up times of the execution
136+
137+
### --repeat [int]
138+
* repeat times of the execution
139+
140+
## Pattern Options
141+
Each pattern has its own unique options.
142+
### mlp
143+
* `--batch_size`: the input
144+
* `--hidden_size_list`: hidden_sizes of mlp, example: 32x16x64
145+
* `--has_bias`: if the matmul op has bias, example: 1x0
146+
* `--act_type`: choices=["noop", "relu"]
147+
* `--dtype`: choices=["bf16", "f32"]
148+
100149
## Example
150+
### Correctness testing example
101151
```
102152
# single add op test
103153
# using the same data filling / compare strategy as the benchdnn primitive driver if not set
@@ -254,4 +304,126 @@ p2p check: threshold: 0.0000000
254304
(1, 0): ref: 25.1690636 res: 25.1690636 abs_diff: 0.0000000 rel_diff: 0.0000000
255305
(1, 1): ref: -7.8600063 res: -7.8600044 abs_diff: 0.0000019 rel_diff: 0.0000002
256306
FAIL: linalg.matmul_transpose_b
307+
```
308+
309+
### Perf testing example
310+
* single op example
311+
```
312+
python3 -m benchgc --verbose 1 --mode P --driver linalg --case add --md 0:4x5xf32 --md 1:4x5xf32 --md 2:4x5xf32
313+
314+
module {
315+
func.func @entry(%arg0: tensor<4x5xf32>, %arg1: tensor<4x5xf32>) -> tensor<4x5xf32> attributes {llvm.emit_c_interface} {
316+
%cst = arith.constant 0.000000e+00 : f32
317+
%0 = tensor.empty() : tensor<4x5xf32>
318+
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4x5xf32>) -> tensor<4x5xf32>
319+
%2 = linalg.add ins(%arg0, %arg1 : tensor<4x5xf32>, tensor<4x5xf32>) outs(%1 : tensor<4x5xf32>) -> tensor<4x5xf32>
320+
return %2 : tensor<4x5xf32>
321+
}
322+
}
323+
324+
===========bench result===========
325+
{
326+
"args": {
327+
"mode": "P",
328+
"driver": "linalg",
329+
"case": "add",
330+
"md": [
331+
"0:4x5xf32",
332+
"1:4x5xf32",
333+
"2:4x5xf32"
334+
],
335+
"fill": [],
336+
"cmp": [],
337+
"seed": 0,
338+
"verbose": 1,
339+
"entry": "entry",
340+
"ir_printing": false,
341+
"cast": "cast_signed",
342+
"dimension": null,
343+
"dimensions": null,
344+
"dilations": null,
345+
"strides": null,
346+
"bench_kind": "py",
347+
"warm_up": 100,
348+
"repeat": 100
349+
},
350+
"compile_cost(ms)": 37.72595152258873,
351+
"execute_cost(ms)": 0.00022314488887786865
352+
}
353+
```
354+
355+
* mlir example
356+
```
357+
python3 -m benchgc --mode P --verbose 1 --driver mlir --case=./test.mlir --bench_kind wrapper --warm_up 50 --repeat 200
358+
\module {
359+
func.func @entry(%arg0: tensor<512x128xf32>) -> tensor<512x128xf32> attributes {llvm.emit_c_interface} {
360+
%cst = arith.constant 0.000000e+00 : f32
361+
%0 = tensor.empty() : tensor<512x128xf32>
362+
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<512x128xf32>) -> tensor<512x128xf32>
363+
%2 = linalg.abs ins(%arg0 : tensor<512x128xf32>) outs(%1 : tensor<512x128xf32>) -> tensor<512x128xf32>
364+
return %2 : tensor<512x128xf32>
365+
}
366+
}
367+
368+
===========bench result===========
369+
{
370+
"args": {
371+
"mode": "P",
372+
"driver": "mlir",
373+
"case": "/home/xurui/gc_v2/test.mlir",
374+
"md": [],
375+
"fill": [],
376+
"cmp": [],
377+
"seed": 0,
378+
"verbose": 1,
379+
"entry": "entry",
380+
"ir_printing": false,
381+
"bench_kind": "wrapper",
382+
"warm_up": 50,
383+
"repeat": 200
384+
},
385+
"compile_cost(ms)": 70.6995539367199,
386+
"execute_cost(ms)": 0.029325044999999984
387+
}
388+
```
389+
* mlp example
390+
```
391+
python3 -m benchgc --verbose 1 --mode P --driver pattern --case mlp --batch_size=32 --hidden_size_list=32x16x64 --has_bias=0x0 --act_type=noop --dtype=f32
392+
393+
module {
394+
func.func @entry(%arg0: tensor<32x32xf32>, %arg1: tensor<32x16xf32>, %arg2: tensor<16x64xf32>) -> tensor<32x64xf32> attributes {llvm.emit_c_interface} {
395+
%0 = tensor.empty() : tensor<32x16xf32>
396+
%1 = linalg.matmul {cast = #linalg.type_fn<cast_signed>} ins(%arg0, %arg1 : tensor<32x32xf32>, tensor<32x16xf32>) outs(%0 : tensor<32x16xf32>) -> tensor<32x16xf32>
397+
%2 = tensor.empty() : tensor<32x64xf32>
398+
%3 = linalg.matmul {cast = #linalg.type_fn<cast_signed>} ins(%1, %arg2 : tensor<32x16xf32>, tensor<16x64xf32>) outs(%2 : tensor<32x64xf32>) -> tensor<32x64xf32>
399+
return %3 : tensor<32x64xf32>
400+
}
401+
}
402+
403+
===========bench result===========
404+
{
405+
"args": {
406+
"mode": "P",
407+
"driver": "pattern",
408+
"case": "mlp",
409+
"md": [],
410+
"fill": [],
411+
"cmp": [],
412+
"seed": 0,
413+
"verbose": 1,
414+
"entry": "entry",
415+
"ir_printing": false,
416+
"bench_kind": "py",
417+
"warm_up": 100,
418+
"repeat": 100,
419+
"batch_size": 32,
420+
"hidden_size_list": "32x16x64",
421+
"has_bias": "0x0",
422+
"act_type": "noop",
423+
"dtype": "f32"
424+
},
425+
"compile_cost(ms)": 109.86808314919472,
426+
"execute_cost(ms)": 0.02944003790616989
427+
}
428+
257429
```

0 commit comments

Comments
 (0)