Skip to content

Commit 7387f33

Browse files
committed
[Polly] Hide IslScheduleOptimizer implementation from header. NFC.
These are implementation details of the IslScheduleOptimizer pass implementation and not use anywhere else. Hence, we can move them to the cpp file and into an anonymous namespace. Only getPartialTilePrefixes is, aside from the pass itself, used externally (by the ScheduleOptimizerTest) and moved into the polly namespace.
1 parent 5f022ad commit 7387f33

File tree

3 files changed

+319
-328
lines changed

3 files changed

+319
-328
lines changed

polly/include/polly/ScheduleOptimizer.h

Lines changed: 6 additions & 320 deletions
Original file line numberDiff line numberDiff line change
@@ -10,66 +10,13 @@
1010
#define POLLY_SCHEDULEOPTIMIZER_H
1111

1212
#include "polly/ScopPass.h"
13-
#include "llvm/ADT/ArrayRef.h"
14-
#include "isl/isl-noexceptions.h"
1513

1614
namespace llvm {
1715
class Pass;
1816
class PassRegistry;
19-
class TargetTransformInfo;
2017
} // namespace llvm
2118

22-
struct isl_schedule_node;
23-
24-
/// Parameters of the micro kernel.
25-
///
26-
/// Parameters, which determine sizes of rank-1 (i.e., outer product) update
27-
/// used in the optimized matrix multiplication.
28-
struct MicroKernelParamsTy {
29-
int Mr;
30-
int Nr;
31-
};
32-
33-
/// Parameters of the macro kernel.
34-
///
35-
/// Parameters, which determine sizes of blocks of partitioned matrices
36-
/// used in the optimized matrix multiplication.
37-
struct MacroKernelParamsTy {
38-
int Mc;
39-
int Nc;
40-
int Kc;
41-
};
42-
4319
namespace polly {
44-
struct Dependences;
45-
class MemoryAccess;
46-
class Scop;
47-
48-
/// Additional parameters of the schedule optimizer.
49-
///
50-
/// Target Transform Info and the SCoP dependencies used by the schedule
51-
/// optimizer.
52-
struct OptimizerAdditionalInfoTy {
53-
const llvm::TargetTransformInfo *TTI;
54-
const Dependences *D;
55-
};
56-
57-
/// Parameters of the matrix multiplication operands.
58-
///
59-
/// Parameters, which describe access relations that represent operands of the
60-
/// matrix multiplication.
61-
struct MatMulInfoTy {
62-
MemoryAccess *A = nullptr;
63-
MemoryAccess *B = nullptr;
64-
MemoryAccess *ReadFromC = nullptr;
65-
MemoryAccess *WriteToC = nullptr;
66-
int i = -1;
67-
int j = -1;
68-
int k = -1;
69-
};
70-
71-
extern bool DisablePollyTiling;
72-
7320
llvm::Pass *createIslScheduleOptimizerWrapperPass();
7421

7522
struct IslScheduleOptimizerPass
@@ -91,273 +38,6 @@ struct IslScheduleOptimizerPrinterPass
9138
llvm::raw_ostream &OS;
9239
};
9340

94-
} // namespace polly
95-
96-
namespace llvm {
97-
void initializeIslScheduleOptimizerWrapperPassPass(llvm::PassRegistry &);
98-
}
99-
100-
class ScheduleTreeOptimizer {
101-
public:
102-
/// Apply schedule tree transformations.
103-
///
104-
/// This function takes an (possibly already optimized) schedule tree and
105-
/// applies a set of additional optimizations on the schedule tree. The
106-
/// transformations applied include:
107-
///
108-
/// - Tiling
109-
/// - Prevectorization
110-
///
111-
/// @param Schedule The schedule object the transformations will be applied
112-
/// to.
113-
/// @param OAI Target Transform Info and the SCoP dependencies.
114-
/// @returns The transformed schedule.
115-
static isl::schedule
116-
optimizeSchedule(isl::schedule Schedule,
117-
const polly::OptimizerAdditionalInfoTy *OAI = nullptr);
118-
119-
/// Apply schedule tree transformations.
120-
///
121-
/// This function takes a node in an (possibly already optimized) schedule
122-
/// tree and applies a set of additional optimizations on this schedule tree
123-
/// node and its descendants. The transformations applied include:
124-
///
125-
/// - Tiling
126-
/// - Prevectorization
127-
///
128-
/// @param Node The schedule object post-transformations will be applied to.
129-
/// @param OAI Target Transform Info and the SCoP dependencies.
130-
/// @returns The transformed schedule.
131-
static isl::schedule_node
132-
optimizeScheduleNode(isl::schedule_node Node,
133-
const polly::OptimizerAdditionalInfoTy *OAI = nullptr);
134-
135-
/// Decide if the @p NewSchedule is profitable for @p S.
136-
///
137-
/// @param S The SCoP we optimize.
138-
/// @param NewSchedule The new schedule we computed.
139-
///
140-
/// @return True, if we believe @p NewSchedule is an improvement for @p S.
141-
static bool isProfitableSchedule(polly::Scop &S, isl::schedule NewSchedule);
142-
143-
/// Isolate a set of partial tile prefixes.
144-
///
145-
/// This set should ensure that it contains only partial tile prefixes that
146-
/// have exactly VectorWidth iterations.
147-
///
148-
/// @param Node A schedule node band, which is a parent of a band node,
149-
/// that contains a vector loop.
150-
/// @return Modified isl_schedule_node.
151-
static isl::schedule_node isolateFullPartialTiles(isl::schedule_node Node,
152-
int VectorWidth);
153-
154-
private:
155-
/// Tile a schedule node.
156-
///
157-
/// @param Node The node to tile.
158-
/// @param Identifier An name that identifies this kind of tiling and
159-
/// that is used to mark the tiled loops in the
160-
/// generated AST.
161-
/// @param TileSizes A vector of tile sizes that should be used for
162-
/// tiling.
163-
/// @param DefaultTileSize A default tile size that is used for dimensions
164-
/// that are not covered by the TileSizes vector.
165-
static isl::schedule_node tileNode(isl::schedule_node Node,
166-
const char *Identifier,
167-
llvm::ArrayRef<int> TileSizes,
168-
int DefaultTileSize);
169-
170-
/// Tile a schedule node and unroll point loops.
171-
///
172-
/// @param Node The node to register tile.
173-
/// @param TileSizes A vector of tile sizes that should be used for
174-
/// tiling.
175-
/// @param DefaultTileSize A default tile size that is used for dimensions
176-
static isl::schedule_node applyRegisterTiling(isl::schedule_node Node,
177-
llvm::ArrayRef<int> TileSizes,
178-
int DefaultTileSize);
179-
180-
/// Apply the BLIS matmul optimization pattern.
181-
///
182-
/// Make the loops containing the matrix multiplication be the innermost
183-
/// loops and apply the BLIS matmul optimization pattern. BLIS implements
184-
/// gemm as three nested loops around a macro-kernel, plus two packing
185-
/// routines. The macro-kernel is implemented in terms of two additional
186-
/// loops around a micro-kernel. The micro-kernel is a loop around a rank-1
187-
/// (i.e., outer product) update.
188-
///
189-
/// For a detailed description please see [1].
190-
///
191-
/// The order of the loops defines the data reused in the BLIS implementation
192-
/// of gemm ([1]). In particular, elements of the matrix B, the second
193-
/// operand of matrix multiplication, are reused between iterations of the
194-
/// innermost loop. To keep the reused data in cache, only elements of matrix
195-
/// A, the first operand of matrix multiplication, should be evicted during
196-
/// an iteration of the innermost loop. To provide such a cache replacement
197-
/// policy, elements of the matrix A can, in particular, be loaded first and,
198-
/// consequently, be least-recently-used.
199-
///
200-
/// In our case matrices are stored in row-major order instead of
201-
/// column-major order used in the BLIS implementation ([1]). It affects only
202-
/// on the form of the BLIS micro kernel and the computation of its
203-
/// parameters. In particular, reused elements of the matrix B are
204-
/// successively multiplied by specific elements of the matrix A.
205-
///
206-
/// Refs.:
207-
/// [1] - Analytical Modeling is Enough for High Performance BLIS
208-
/// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti
209-
/// Technical Report, 2014
210-
/// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf
211-
///
212-
/// @see ScheduleTreeOptimizer::createMicroKernel
213-
/// @see ScheduleTreeOptimizer::createMacroKernel
214-
/// @see getMicroKernelParams
215-
/// @see getMacroKernelParams
216-
///
217-
/// TODO: Implement the packing transformation.
218-
///
219-
/// @param Node The node that contains a band to be optimized. The node
220-
/// is required to successfully pass
221-
/// ScheduleTreeOptimizer::isMatrMultPattern.
222-
/// @param TTI Target Transform Info.
223-
/// @param MMI Parameters of the matrix multiplication operands.
224-
/// @returns The transformed schedule.
225-
static isl::schedule_node
226-
optimizeMatMulPattern(isl::schedule_node Node,
227-
const llvm::TargetTransformInfo *TTI,
228-
polly::MatMulInfoTy &MMI);
229-
230-
/// Check if this node is a band node we want to tile.
231-
///
232-
/// We look for innermost band nodes where individual dimensions are marked as
233-
/// permutable.
234-
///
235-
/// @param Node The node to check.
236-
static bool isTileableBandNode(isl::schedule_node Node);
237-
238-
/// Pre-vectorizes one scheduling dimension of a schedule band.
239-
///
240-
/// prevectSchedBand splits out the dimension DimToVectorize, tiles it and
241-
/// sinks the resulting point loop.
242-
///
243-
/// Example (DimToVectorize=0, VectorWidth=4):
244-
///
245-
/// | Before transformation:
246-
/// |
247-
/// | A[i,j] -> [i,j]
248-
/// |
249-
/// | for (i = 0; i < 128; i++)
250-
/// | for (j = 0; j < 128; j++)
251-
/// | A(i,j);
252-
///
253-
/// | After transformation:
254-
/// |
255-
/// | for (it = 0; it < 32; it+=1)
256-
/// | for (j = 0; j < 128; j++)
257-
/// | for (ip = 0; ip <= 3; ip++)
258-
/// | A(4 * it + ip,j);
259-
///
260-
/// The goal of this transformation is to create a trivially vectorizable
261-
/// loop. This means a parallel loop at the innermost level that has a
262-
/// constant number of iterations corresponding to the target vector width.
263-
///
264-
/// This transformation creates a loop at the innermost level. The loop has
265-
/// a constant number of iterations, if the number of loop iterations at
266-
/// DimToVectorize can be divided by VectorWidth. The default VectorWidth is
267-
/// currently constant and not yet target specific. This function does not
268-
/// reason about parallelism.
269-
static isl::schedule_node prevectSchedBand(isl::schedule_node Node,
270-
unsigned DimToVectorize,
271-
int VectorWidth);
272-
273-
/// Apply additional optimizations on the bands in the schedule tree.
274-
///
275-
/// We are looking for an innermost band node and apply the following
276-
/// transformations:
277-
///
278-
/// - Tile the band
279-
/// - if the band is tileable
280-
/// - if the band has more than one loop dimension
281-
///
282-
/// - Prevectorize the schedule of the band (or the point loop in case of
283-
/// tiling).
284-
/// - if vectorization is enabled
285-
///
286-
/// @param Node The schedule node to (possibly) optimize.
287-
/// @param User A pointer to forward some use information
288-
/// (currently unused).
289-
static isl_schedule_node *optimizeBand(isl_schedule_node *Node, void *User);
290-
291-
/// Apply additional optimizations on the bands in the schedule tree.
292-
///
293-
/// We apply the following
294-
/// transformations:
295-
///
296-
/// - Tile the band
297-
/// - Prevectorize the schedule of the band (or the point loop in case of
298-
/// tiling).
299-
/// - if vectorization is enabled
300-
///
301-
/// @param Node The schedule node to (possibly) optimize.
302-
/// @param User A pointer to forward some use information
303-
/// (currently unused).
304-
static isl::schedule_node standardBandOpts(isl::schedule_node Node,
305-
void *User);
306-
307-
/// Check if this node contains a partial schedule that could
308-
/// probably be optimized with analytical modeling.
309-
///
310-
/// isMatrMultPattern tries to determine whether the following conditions
311-
/// are true:
312-
/// 1. the partial schedule contains only one statement.
313-
/// 2. there are exactly three input dimensions.
314-
/// 3. all memory accesses of the statement will have stride 0 or 1, if we
315-
/// interchange loops (switch the variable used in the inner loop to
316-
/// the outer loop).
317-
/// 4. all memory accesses of the statement except from the last one, are
318-
/// read memory access and the last one is write memory access.
319-
/// 5. all subscripts of the last memory access of the statement don't
320-
/// contain the variable used in the inner loop.
321-
/// If this is the case, we could try to use an approach that is similar to
322-
/// the one used to get close-to-peak performance of matrix multiplications.
323-
///
324-
/// @param Node The node to check.
325-
/// @param D The SCoP dependencies.
326-
/// @param MMI Parameters of the matrix multiplication operands.
327-
static bool isMatrMultPattern(isl::schedule_node Node,
328-
const polly::Dependences *D,
329-
polly::MatMulInfoTy &MMI);
330-
331-
/// Create the BLIS macro-kernel.
332-
///
333-
/// We create the BLIS macro-kernel by applying a combination of tiling
334-
/// of dimensions of the band node and interchanging of two innermost
335-
/// modified dimensions. The values of of MacroKernelParams's fields are used
336-
/// as tile sizes.
337-
///
338-
/// @param Node The schedule node to be modified.
339-
/// @param MacroKernelParams Parameters of the macro kernel
340-
/// to be used as tile sizes.
341-
static isl::schedule_node
342-
createMacroKernel(isl::schedule_node Node,
343-
MacroKernelParamsTy MacroKernelParams);
344-
345-
/// Create the BLIS macro-kernel.
346-
///
347-
/// We create the BLIS macro-kernel by applying a combination of tiling
348-
/// of dimensions of the band node and interchanging of two innermost
349-
/// modified dimensions. The values passed in MicroKernelParam are used
350-
/// as tile sizes.
351-
///
352-
/// @param Node The schedule node to be modified.
353-
/// @param MicroKernelParams Parameters of the micro kernel
354-
/// to be used as tile sizes.
355-
/// @see MicroKernelParamsTy
356-
static isl::schedule_node
357-
createMicroKernel(isl::schedule_node Node,
358-
MicroKernelParamsTy MicroKernelParams);
359-
};
360-
36141
/// Build the desired set of partial tile prefixes.
36242
///
36343
/// We build a set of partial tile prefixes, which are prefixes of the vector
@@ -377,4 +57,10 @@ class ScheduleTreeOptimizer {
37757
/// @param ScheduleRange A range of a map, which describes a prefix schedule
37858
/// relation.
37959
isl::set getPartialTilePrefixes(isl::set ScheduleRange, int VectorWidth);
60+
} // namespace polly
61+
62+
namespace llvm {
63+
void initializeIslScheduleOptimizerWrapperPassPass(llvm::PassRegistry &);
64+
}
65+
38066
#endif // POLLY_SCHEDULEOPTIMIZER_H

0 commit comments

Comments
 (0)