Skip to content

Commit 3597a30

Browse files
Implemented tiling and fusion path for GPU
1 parent 672edc9 commit 3597a30

File tree

9 files changed

+852
-14
lines changed

9 files changed

+852
-14
lines changed

include/gc/Transforms/Passes.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,36 @@ def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
120120
"Call finish() after each kernel launch.">
121121
];
122122
}
123+
124+
def GpuTilingAndFusion : Pass<"gpu-tiling", "func::FuncOp"> {
125+
let summary = "GPU tiling and fusion path.";
126+
let description = [{
127+
This pass tiles linalg operations and creates two nested csf.forall loops. When converting to gpu.launch,
128+
the inner loop is mapped to the block sizes and the outer - to grid sizes. The tiles calculation is based
129+
on the GPU device properties, retrieved from the DLTI attributes. If the DLTI attributes are not specified,
130+
defaults to the pass options.
131+
}];
132+
let options = [
133+
Option<"numEus", "num-eus", "size_t",
134+
/*default=*/"448",
135+
"Number of Execution Units.">,
136+
Option<"numEusPerSlice", "num-eus-per-slice", "size_t",
137+
/*default=*/"8",
138+
"Number of Execution Units per slice.">,
139+
Option<"numThreadsPerEu", "num-threads-per-eu", "size_t",
140+
/*default=*/"8",
141+
"Number of threads per Execution Unit.">,
142+
Option<"cacheSize", "cache-size", "size_t",
143+
/*default=*/"131072",
144+
"Execution Unit cache size.">,
145+
Option<"vectorWidth", "vector-width", "size_t",
146+
/*default=*/"512",
147+
"The maximum width of EU's vector registers.">,
148+
Option<"workGroupSize", "work-group-size", "size_t",
149+
/*default=*/"64",
150+
"The maximum workgroup size.">
151+
];
152+
}
123153
#endif // GC_USE_IMEX
124154

125155
def IterativeTilingAndFusion : Pass<"iterative-tiling-and-fusion",

lib/gc/Transforms/GPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ set_property(GLOBAL APPEND PROPERTY IMEX_LIBS ${IMEX_LIBS})
1313
gc_add_mlir_library(GcGpuPasses
1414
AddContextArg.cpp
1515
AllocsToSLM.cpp
16+
GpuTilingAndFusion.cpp
1617
GpuToGpuOcl.cpp
1718
LinalgToXeGPU.cpp
1819
Pipeline.cpp

0 commit comments

Comments
 (0)