@@ -124,29 +124,40 @@ def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
124
124
def GpuTilingAndFusion : Pass<"gpu-tiling", "func::FuncOp"> {
125
125
let summary = "GPU tiling and fusion path.";
126
126
let description = [{
127
- This path tiles linalg operations and wraps into foreach loops.
128
- The tiles calculation is based on the Execution Unit cache size and the number of threads per EU.
127
+ This pass tiles linalg operations and creates an inner loop that is mapped to the block sizes, when converting
128
+ to gpu.launch. The tiles calculation is based on the GPU device properties, retrieved from the DLTI attributes.
129
+ If the DLTI attributes are not specified, defaults to the pass options.
129
130
}];
130
131
let options = [
131
- Option<"euMem", "eu-mem", "size_t",
132
+ Option<"numEus", "num-eus", "size_t",
133
+ /*default=*/"448",
134
+ "Number of Execution Units.">,
135
+ Option<"numEusPerSlice", "num-eus-per-slice", "size_t",
136
+ /*default=*/"8",
137
+ "Number of Execution Units per slice.">,
138
+ Option<"numThreadsPerEu", "num-threads-per-eu", "size_t",
139
+ /*default=*/"8",
140
+ "Number of threads per Execution Unit.">,
141
+ Option<"cacheSize", "cache-size", "size_t",
132
142
/*default=*/"131072",
133
143
"Execution Unit cache size.">,
134
- Option<"euThreads ", "eu-threads ", "size_t",
135
- /*default=*/"8 ",
136
- "Number of threads per EU.">
144
+ Option<"vectorWidth ", "vector-width ", "size_t",
145
+ /*default=*/"512 ",
146
+ "The maximum width of EU's vector registers .">
137
147
];
138
148
}
139
149
140
150
def GpuLoopTiling : Pass<"gpu-loop-tiling", "func::FuncOp"> {
141
151
let summary = "Create nested parallel loops to be mapped to GPU.";
142
152
let description = [{
143
- This path tiles the loops created by the GpuTilingAndFusion pass and converted to parallel loops.
144
- Each tile of the outer loop is divided by the number of threads per EU.
153
+ This pass tiles the loops created by the GpuTilingAndFusion pass and converted to parallel loops. The tiles
154
+ calculation is based on the max_work_group_size DLTI attribute. If the attribute is not specified,
155
+ defaults to the pass options.
145
156
}];
146
157
let options = [
147
- Option<"euThreads ", "eu-threads ", "size_t",
148
- /*default=*/"8 ",
149
- "Number of threads per Execution Unit .">
158
+ Option<"workGroupSize ", "work-group-size ", "size_t",
159
+ /*default=*/"64 ",
160
+ "The maximum workgroup size .">
150
161
];
151
162
}
152
163
#endif // GC_USE_IMEX
0 commit comments