Skip to content

Commit 1731525

Browse files
committed
replace the sysDesc in fusion pass
1 parent b4a3351 commit 1731525

File tree

2 files changed

+21
-83
lines changed

2 files changed

+21
-83
lines changed

include/gc/Transforms/Passes.td

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -49,43 +49,35 @@ def LinalgToXeGPU : Pass<"linalg-to-xegpu", "func::FuncOp"> {
4949
}
5050
#endif
5151

52-
def IterativeTilingAndFusion
53-
: Pass<"iterative-tiling-and-fusion", "func::FuncOp"> {
52+
def IterativeTilingAndFusion : Pass<"iterative-tiling-and-fusion",
53+
"func::FuncOp"> {
5454
let summary = "Iterative tiling and fusion for any tilable operation";
5555
let description = [{
56-
The pass tries to fuse any MLIR operation which can be tiled.Moreover,
57-
this pass aims to
58-
support: 1. Matmul fusion with element -
59-
wise / reduce / broadcast ops.2. Pre - op and post -
60-
op fusion.3. Multi - consumer and multi -
61-
producer support.4. Multiple level of nest loops and candidates
62-
.5. Flexible option to control the boundary of iterative
63-
process.6. Cost -
64-
model to determine whether to fuse or not.
56+
The pass tries to fuse any MLIR operation which can be tiled. Moreover, this pass aims to support:
57+
1. Matmul fusion with element-wise/reduce/broadcast ops.
58+
2. Pre-op and post-op fusion.
59+
3. Multi-consumer and multi-producer support.
60+
4. Multiple level of nest loops and candidates.
61+
5. Flexible option to control the boundary of iterative process.
62+
6. Cost-model to determine whether to fuse or not.
6563

66-
It intends to control the granularity of fusion by `fusion -
67-
level`,
68-
E.g.* `0`: disable any fusion.* `1`:
69-
[Default] enable both producer and consumer fusion,
70-
covering any tilable operation including tensor.pack / tensor.fill /
71-
linalg.reduce etc but excluding branches forked by multiple
72-
uses.* `2`: `LEVEL 1` +
73-
extend to any topology including branches.
64+
It intends to control the granularity of fusion by `fusion-level`, E.g.
65+
* `0`: disable any fusion.
66+
* `1`:[Default] enable both producer and consumer fusion, covering any tilable operation including tensor.pack/tensor.fill/linalg.reduce etc but excluding branches forked by multiple uses.
67+
* `2`: `LEVEL 1` + extend to any topology including branches.
7468
}];
75-
let dependentDialects = [
76-
"func::FuncDialect", "linalg::LinalgDialect", "scf::SCFDialect",
77-
"tensor::TensorDialect"
78-
];
69+
let dependentDialects = ["func::FuncDialect", "linalg::LinalgDialect", "scf::SCFDialect",
70+
"tensor::TensorDialect"];
7971

8072
let options = [
8173
Option<"fusionLevel", "fusion-level", "int64_t",
82-
/*default=*/"1", "Control the granularity of fusion.">,
74+
/*default=*/"1",
75+
"Control the granularity of fusion.">,
8376
Option<"useCostModel", "use-cost-model", "bool",
8477
/*default=*/"false",
8578
"Decide if enable cost model to control iterative fusion.">,
8679
ListOption<"defaultTileSize", "default-tile-size", "std::string",
87-
"Set default TileSize for the certain type of op, saying "
88-
"matmul:{32,32}">,
80+
"Set default TileSize for the certain type of op, saying matmul:{32,32}">,
8981
];
9082
}
9183

lib/gc/Transforms/IterativeTilingAndFusion.cpp

Lines changed: 3 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "gc/Analysis/TargetDescriptionAnalysis.h"
910
#include "gc/Transforms/Passes.h"
1011
#include "mlir/Analysis/TopologicalSortUtils.h"
1112
#include "mlir/Dialect/DLTI/Traits.h"
@@ -579,62 +580,6 @@ static LogicalResult isSelfTiledOp(Operation *targetOp) {
579580
return success(walkResult.wasInterrupted());
580581
}
581582

582-
struct SystemDesc {
583-
// get runtime OMP_NUM_THREADS
584-
uint32_t getNumThreads() {
585-
std::optional<Attribute> numThreads = layout.getDevicePropertyValue(
586-
Builder(ctx).getStringAttr("CPU" /* device ID*/),
587-
Builder(ctx).getStringAttr("num_threads"));
588-
if (numThreads && isa<IntegerAttr>(*numThreads)) {
589-
return dyn_cast<IntegerAttr>(*numThreads).getInt();
590-
}
591-
return 1;
592-
}
593-
// get cache size by cacheLevel
594-
size_t getCacheSize(uint8_t cacheLevel) {
595-
if (cacheLevel == 1) {
596-
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
597-
Builder(ctx).getStringAttr("CPU" /* device ID*/),
598-
Builder(ctx).getStringAttr("L1_cache_size_in_bytes"));
599-
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
600-
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
601-
}
602-
} else if (cacheLevel == 2) {
603-
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
604-
Builder(ctx).getStringAttr("CPU" /* device ID*/),
605-
Builder(ctx).getStringAttr("L2_cache_size_in_bytes"));
606-
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
607-
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
608-
}
609-
} else if (cacheLevel == 3) {
610-
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
611-
Builder(ctx).getStringAttr("CPU" /* device ID*/),
612-
Builder(ctx).getStringAttr("L3_cache_size_in_bytes"));
613-
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
614-
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
615-
}
616-
}
617-
return 0;
618-
}
619-
620-
// get the maximum vector length in bits
621-
size_t getMaxVectorLength() {
622-
std::optional<Attribute> maxVectorLength = layout.getDevicePropertyValue(
623-
Builder(ctx).getStringAttr("CPU" /* device ID*/),
624-
Builder(ctx).getStringAttr("max_vector_width"));
625-
if (maxVectorLength && isa<IntegerAttr>(*maxVectorLength)) {
626-
return dyn_cast<IntegerAttr>(*maxVectorLength).getInt();
627-
}
628-
return 512;
629-
}
630-
631-
SystemDesc(ModuleOp m) : layout(m), ctx(m->getContext()) {}
632-
633-
private:
634-
DataLayout layout;
635-
MLIRContext *ctx;
636-
};
637-
638583
using OpTileSizeMap = std::unordered_map<std::string, SmallVector<int64_t>>;
639584

640585
template <typename OpTy>
@@ -806,7 +751,8 @@ struct IterativeTilingAndFusion
806751
// Get funcOp
807752
func::FuncOp func = getOperation();
808753
// Get system descriptor
809-
SystemDesc sysDesc(func->getParentOfType<ModuleOp>());
754+
CPUTargetDescriptionAnalysis sysDesc =
755+
getAnalysis<CPUTargetDescriptionAnalysis>();
810756
// Flexible options to control which candidate slice would be selected from
811757
// the view of both validity and performance.
812758
CandidateSliceOptions sliceOptions;

0 commit comments

Comments
 (0)